Merge branch 'master' into devel/coarray_native.

Merge into devel/coarray_native to prepare for later merging of coarray_native with master.
author: Thomas Koenig <tkoenig@gcc.gnu.org> 2020-10-28 18:41:24 +0100
committer: Thomas Koenig <tkoenig@gcc.gnu.org> 2020-10-28 18:41:24 +0100
commit: bf6dad60c338a42a7fb85f7b2a5870c0fb2e20f8 (patch)
tree: e513781ef717465e7db0358e987a5a6cbef5665c /gcc/config
parent: 0c261d5b5c931d9e9214d06531bdc7e9e16aeaab (diff)
parent: 47d13acbda9a5d8eb57ff169ba74857cd54108e4 (diff)
download: gcc-bf6dad60c338a42a7fb85f7b2a5870c0fb2e20f8.zip
gcc-bf6dad60c338a42a7fb85f7b2a5870c0fb2e20f8.tar.gz
gcc-bf6dad60c338a42a7fb85f7b2a5870c0fb2e20f8.tar.bz2
147 files changed, 6989 insertions, 3292 deletions
diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c
index 4f33dd9..732a4dc 100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -2024,7 +2024,7 @@ aarch64_expand_builtin_memtag (int fcode, tree exp, rtx target)
   return target;
 }
 
-/* Expand an expression EXP as fpsr or cpsr setter (depending on
+/* Expand an expression EXP as fpsr or fpcr setter (depending on
    UNSPEC) using MODE.  */
 static void
 aarch64_expand_fpsr_fpcr_setter (int unspec, machine_mode mode, tree exp)
@@ -2034,6 +2034,18 @@ aarch64_expand_fpsr_fpcr_setter (int unspec, machine_mode mode, tree exp)
   emit_insn (gen_aarch64_set (unspec, mode, op));
 }
 
+/* Expand a fpsr or fpcr getter (depending on UNSPEC) using MODE.
+   Return the target.  */
+static rtx
+aarch64_expand_fpsr_fpcr_getter (enum insn_code icode, machine_mode mode,
+				 rtx target)
+{
+  expand_operand op;
+  create_output_operand (&op, target, mode);
+  expand_insn (icode, 1, &op);
+  return op.value;
+}
+
 /* Expand an expression EXP that calls built-in function FCODE,
    with result going to TARGET if that's convenient.  IGNORE is true
    if the result of the builtin is ignored.  */
@@ -2048,26 +2060,26 @@ aarch64_general_expand_builtin (unsigned int fcode, tree exp, rtx target,
   switch (fcode)
     {
     case AARCH64_BUILTIN_GET_FPCR:
-      emit_insn (gen_aarch64_get (UNSPECV_GET_FPCR, SImode, target));
-      return target;
+      return aarch64_expand_fpsr_fpcr_getter (CODE_FOR_aarch64_get_fpcrsi,
+					      SImode, target);
     case AARCH64_BUILTIN_SET_FPCR:
       aarch64_expand_fpsr_fpcr_setter (UNSPECV_SET_FPCR, SImode, exp);
       return target;
     case AARCH64_BUILTIN_GET_FPSR:
-      emit_insn (gen_aarch64_get (UNSPECV_GET_FPSR, SImode, target));
-      return target;
+      return aarch64_expand_fpsr_fpcr_getter (CODE_FOR_aarch64_get_fpsrsi,
+					      SImode, target);
     case AARCH64_BUILTIN_SET_FPSR:
       aarch64_expand_fpsr_fpcr_setter (UNSPECV_SET_FPSR, SImode, exp);
       return target;
     case AARCH64_BUILTIN_GET_FPCR64:
-      emit_insn (gen_aarch64_get (UNSPECV_GET_FPCR, DImode, target));
-      return target;
+      return aarch64_expand_fpsr_fpcr_getter (CODE_FOR_aarch64_get_fpcrdi,
+					      DImode, target);
     case AARCH64_BUILTIN_SET_FPCR64:
       aarch64_expand_fpsr_fpcr_setter (UNSPECV_SET_FPCR, DImode, exp);
       return target;
     case AARCH64_BUILTIN_GET_FPSR64:
-      emit_insn (gen_aarch64_get (UNSPECV_GET_FPSR, DImode, target));
-      return target;
+      return aarch64_expand_fpsr_fpcr_getter (CODE_FOR_aarch64_get_fpsrdi,
+					      DImode, target);
     case AARCH64_BUILTIN_SET_FPSR64:
       aarch64_expand_fpsr_fpcr_setter (UNSPECV_SET_FPSR, DImode, exp);
       return target;
@@ -2079,20 +2091,13 @@ aarch64_general_expand_builtin (unsigned int fcode, tree exp, rtx target,
       arg0 = CALL_EXPR_ARG (exp, 0);
       op0 = force_reg (Pmode, expand_normal (arg0));
 
-      if (!target)
-	target = gen_reg_rtx (Pmode);
-      else
-	target = force_reg (Pmode, target);
-
-      emit_move_insn (target, op0);
-
       if (fcode == AARCH64_PAUTH_BUILTIN_XPACLRI)
 	{
 	  rtx lr = gen_rtx_REG (Pmode, R30_REGNUM);
 	  icode = CODE_FOR_xpaclri;
 	  emit_move_insn (lr, op0);
 	  emit_insn (GEN_FCN (icode) ());
-	  emit_move_insn (target, lr);
+	  return lr;
 	}
       else
 	{
@@ -2122,20 +2127,18 @@ aarch64_general_expand_builtin (unsigned int fcode, tree exp, rtx target,
 	  emit_move_insn (x17_reg, op0);
 	  emit_move_insn (x16_reg, op1);
 	  emit_insn (GEN_FCN (icode) ());
-	  emit_move_insn (target, x17_reg);
+	  return x17_reg;
 	}
 
-      return target;
-
     case AARCH64_JSCVT:
-      arg0 = CALL_EXPR_ARG (exp, 0);
-      op0 = force_reg (DFmode, expand_normal (arg0));
-      if (!target)
-	target = gen_reg_rtx (SImode);
-      else
-	target = force_reg (SImode, target);
-      emit_insn (GEN_FCN (CODE_FOR_aarch64_fjcvtzs) (target, op0));
-      return target;
+      {
+	expand_operand ops[2];
+	create_output_operand (&ops[0], target, SImode);
+	op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
+	create_input_operand (&ops[1], op0, DFmode);
+	expand_insn (CODE_FOR_aarch64_fjcvtzs, 2, ops);
+	return ops[0].value;
+      }
 
     case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ0_V2SF:
     case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ90_V2SF:
diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
index f30ff35..3aa13f6 100644
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -103,8 +103,11 @@ AARCH64_CORE("cortex-a75",  cortexa75, cortexa57, 8_2A,  AARCH64_FL_FOR_ARCH8_2
 AARCH64_CORE("cortex-a76",  cortexa76, cortexa57, 8_2A,  AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, neoversen1, 0x41, 0xd0b, -1)
 AARCH64_CORE("cortex-a76ae",  cortexa76ae, cortexa57, 8_2A,  AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, neoversen1, 0x41, 0xd0e, -1)
 AARCH64_CORE("cortex-a77",  cortexa77, cortexa57, 8_2A,  AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, neoversen1, 0x41, 0xd0d, -1)
+AARCH64_CORE("cortex-a78",  cortexa78, cortexa57, 8_2A,  AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd41, -1)
+AARCH64_CORE("cortex-a78ae",  cortexa78ae, cortexa57, 8_2A,  AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd42, -1)
 AARCH64_CORE("cortex-a65",  cortexa65, cortexa53, 8_2A,  AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd06, -1)
 AARCH64_CORE("cortex-a65ae",  cortexa65ae, cortexa53, 8_2A,  AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd43, -1)
+AARCH64_CORE("cortex-x1",  cortexx1, cortexa57, 8_2A,  AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd44, -1)
 AARCH64_CORE("ares",  ares, cortexa57, 8_2A,  AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd0c, -1)
 AARCH64_CORE("neoverse-n1",  neoversen1, cortexa57, 8_2A,  AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd0c, -1)
 AARCH64_CORE("neoverse-e1",  neoversee1, cortexa53, 8_2A,  AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd4a, -1)
@@ -133,11 +136,15 @@ AARCH64_CORE("thunderx3t110",  thunderx3t110,  thunderx3t110, 8_3A,  AARCH64_FL_
 /* ARMv8.4-A Architecture Processors.  */
 
 /* Arm ('A') cores.  */
-AARCH64_CORE("zeus", zeus, cortexa57, 8_4A,  AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversen1, 0x41, 0xd40, -1)
+AARCH64_CORE("zeus", zeus, cortexa57, 8_4A,  AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1)
+AARCH64_CORE("neoverse-v1", neoversev1, cortexa57, 8_4A,  AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1)
 
 /* Qualcomm ('Q') cores. */
 AARCH64_CORE("saphira",     saphira,    saphira,    8_4A,  AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_CRYPTO | AARCH64_FL_RCPC, saphira,   0x51, 0xC01, -1)
 
+/* Armv8.5-A Architecture Processors.  */
+AARCH64_CORE("neoverse-n2", neoversen2, cortexa57, 8_5A, AARCH64_FL_FOR_ARCH8_5 | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_SVE | AARCH64_FL_SVE2 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG, neoversen2, 0x41, 0xd49, -1)
+
 /* ARMv8-A big.LITTLE implementations.  */
 
 AARCH64_CORE("cortex-a57.cortex-a53",  cortexa57cortexa53, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, 0x41, AARCH64_BIG_LITTLE (0xd07, 0xd03), -1)
diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def
index 8257df9..ca08642 100644
--- a/gcc/config/aarch64/aarch64-option-extensions.def
+++ b/gcc/config/aarch64/aarch64-option-extensions.def
@@ -155,7 +155,7 @@ AARCH64_OPT_EXTENSION("sve", AARCH64_FL_SVE, AARCH64_FL_FP | AARCH64_FL_SIMD | \
 AARCH64_OPT_EXTENSION("profile", AARCH64_FL_PROFILE, 0, 0, false, "")
 
 /* Enabling/Disabling "rng" only changes "rng".  */
-AARCH64_OPT_EXTENSION("rng", AARCH64_FL_RNG, 0, 0, false, "")
+AARCH64_OPT_EXTENSION("rng", AARCH64_FL_RNG, 0, 0, false, "rng")
 
 /* Enabling/Disabling "memtag" only changes "memtag".  */
 AARCH64_OPT_EXTENSION("memtag", AARCH64_FL_MEMTAG, 0, 0, false, "")
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index c7e828d..7a34c84 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -136,6 +136,25 @@ enum aarch64_addr_query_type {
   ADDR_QUERY_ANY
 };
 
+/* Enumerates values that can be arbitrarily mixed into a calculation
+   in order to make the result of the calculation unique to its use case.
+
+   AARCH64_SALT_SSP_SET
+   AARCH64_SALT_SSP_TEST
+      Used when calculating the address of the stack protection canary value.
+      There is a separate value for setting and testing the canary, meaning
+      that these two operations produce unique addresses: they are different
+      from each other, and from all other address calculations.
+
+      The main purpose of this is to prevent the SET address being spilled
+      to the stack and reloaded for the TEST, since that would give an
+      attacker the opportunity to change the address of the expected
+      canary value.  */
+enum aarch64_salt_type {
+  AARCH64_SALT_SSP_SET,
+  AARCH64_SALT_SSP_TEST
+};
+
 /* A set of tuning parameters contains references to size and time
    cost models and vectors for address cost calculations, register
    move costs and memory move costs.  */
@@ -608,9 +627,9 @@ opt_machine_mode aarch64_ptrue_all_mode (rtx);
 rtx aarch64_convert_sve_data_to_pred (rtx, machine_mode, rtx);
 rtx aarch64_expand_sve_dupq (rtx, machine_mode, rtx);
 void aarch64_expand_mov_immediate (rtx, rtx);
+rtx aarch64_stack_protect_canary_mem (machine_mode, rtx, aarch64_salt_type);
 rtx aarch64_ptrue_reg (machine_mode);
 rtx aarch64_pfalse_reg (machine_mode);
-bool aarch64_sve_pred_dominates_p (rtx *, rtx);
 bool aarch64_sve_same_pred_for_ptest_p (rtx *, rtx *);
 void aarch64_emit_sve_pred_move (rtx, rtx, rtx);
 void aarch64_expand_sve_mem_move (rtx, rtx, machine_mode);
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index d1b2110..5bc596d 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -45,8 +45,8 @@
 
   BUILTIN_VDC (COMBINE, combine, 0, ALL)
   VAR1 (COMBINEP, combine, 0, ALL, di)
-  BUILTIN_VB (BINOP, pmul, 0, ALL)
-  BUILTIN_VHSDF_HSDF (BINOP, fmulx, 0, ALL)
+  BUILTIN_VB (BINOP, pmul, 0, NONE)
+  BUILTIN_VHSDF_HSDF (BINOP, fmulx, 0, FP)
   BUILTIN_VHSDF_DF (UNOP, sqrt, 2, ALL)
   BUILTIN_VD_BHSI (BINOP, addp, 0, NONE)
   VAR1 (UNOP, addp, 0, NONE, di)
@@ -70,26 +70,26 @@
   BUILTIN_VSDQ_I (BINOP_UUS, usqadd, 0, ALL)
 
   /* Implemented by aarch64_get_dreg<VSTRUCT:mode><VDC:mode>.  */
-  BUILTIN_VDC (GETREG, get_dregoi, 0, ALL)
-  BUILTIN_VDC (GETREG, get_dregci, 0, ALL)
-  BUILTIN_VDC (GETREG, get_dregxi, 0, ALL)
-  VAR1 (GETREGP, get_dregoi, 0, ALL, di)
-  VAR1 (GETREGP, get_dregci, 0, ALL, di)
-  VAR1 (GETREGP, get_dregxi, 0, ALL, di)
+  BUILTIN_VDC (GETREG, get_dregoi, 0, AUTO_FP)
+  BUILTIN_VDC (GETREG, get_dregci, 0, AUTO_FP)
+  BUILTIN_VDC (GETREG, get_dregxi, 0, AUTO_FP)
+  VAR1 (GETREGP, get_dregoi, 0, AUTO_FP, di)
+  VAR1 (GETREGP, get_dregci, 0, AUTO_FP, di)
+  VAR1 (GETREGP, get_dregxi, 0, AUTO_FP, di)
   /* Implemented by aarch64_get_qreg<VSTRUCT:mode><VQ:mode>.  */
-  BUILTIN_VQ (GETREG, get_qregoi, 0, ALL)
-  BUILTIN_VQ (GETREG, get_qregci, 0, ALL)
-  BUILTIN_VQ (GETREG, get_qregxi, 0, ALL)
-  VAR1 (GETREGP, get_qregoi, 0, ALL, v2di)
-  VAR1 (GETREGP, get_qregci, 0, ALL, v2di)
-  VAR1 (GETREGP, get_qregxi, 0, ALL, v2di)
+  BUILTIN_VQ (GETREG, get_qregoi, 0, AUTO_FP)
+  BUILTIN_VQ (GETREG, get_qregci, 0, AUTO_FP)
+  BUILTIN_VQ (GETREG, get_qregxi, 0, AUTO_FP)
+  VAR1 (GETREGP, get_qregoi, 0, AUTO_FP, v2di)
+  VAR1 (GETREGP, get_qregci, 0, AUTO_FP, v2di)
+  VAR1 (GETREGP, get_qregxi, 0, AUTO_FP, v2di)
   /* Implemented by aarch64_set_qreg<VSTRUCT:mode><VQ:mode>.  */
-  BUILTIN_VQ (SETREG, set_qregoi, 0, ALL)
-  BUILTIN_VQ (SETREG, set_qregci, 0, ALL)
-  BUILTIN_VQ (SETREG, set_qregxi, 0, ALL)
-  VAR1 (SETREGP, set_qregoi, 0, ALL, v2di)
-  VAR1 (SETREGP, set_qregci, 0, ALL, v2di)
-  VAR1 (SETREGP, set_qregxi, 0, ALL, v2di)
+  BUILTIN_VQ (SETREG, set_qregoi, 0, AUTO_FP)
+  BUILTIN_VQ (SETREG, set_qregci, 0, AUTO_FP)
+  BUILTIN_VQ (SETREG, set_qregxi, 0, AUTO_FP)
+  VAR1 (SETREGP, set_qregoi, 0, AUTO_FP, v2di)
+  VAR1 (SETREGP, set_qregci, 0, AUTO_FP, v2di)
+  VAR1 (SETREGP, set_qregxi, 0, AUTO_FP, v2di)
   /* Implemented by aarch64_ld1x2<VQ:mode>. */
   BUILTIN_VQ (LOADSTRUCT, ld1x2, 0, ALL)
   /* Implemented by aarch64_ld1x2<VDC:mode>. */
@@ -159,7 +159,7 @@
   BUILTIN_VQN (TERNOP, raddhn2, 0, NONE)
   BUILTIN_VQN (TERNOP, rsubhn2, 0, NONE)
 
-  BUILTIN_VSQN_HSDI (UNOP, sqmovun, 0, ALL)
+  BUILTIN_VSQN_HSDI (UNOPUS, sqmovun, 0, ALL)
   /* Implemented by aarch64_<sur>qmovn<mode>.  */
   BUILTIN_VSQN_HSDI (UNOP, sqmovn, 0, ALL)
   BUILTIN_VSQN_HSDI (UNOP, uqmovn, 0, ALL)
@@ -189,11 +189,11 @@
   BUILTIN_VQ_HSI (TERNOP, sqdmlal2_n, 0, ALL)
   BUILTIN_VQ_HSI (TERNOP, sqdmlsl2_n, 0, ALL)
 
-  BUILTIN_VD_BHSI (BINOP, intrinsic_vec_smult_lo_, 0, ALL)
-  BUILTIN_VD_BHSI (BINOPU, intrinsic_vec_umult_lo_, 0, ALL)
+  BUILTIN_VD_BHSI (BINOP, intrinsic_vec_smult_lo_, 0, NONE)
+  BUILTIN_VD_BHSI (BINOPU, intrinsic_vec_umult_lo_, 0, NONE)
 
-  BUILTIN_VQW (BINOP, vec_widen_smult_hi_, 10, ALL)
-  BUILTIN_VQW (BINOPU, vec_widen_umult_hi_, 10, ALL)
+  BUILTIN_VQW (BINOP, vec_widen_smult_hi_, 10, NONE)
+  BUILTIN_VQW (BINOPU, vec_widen_umult_hi_, 10, NONE)
 
   BUILTIN_VD_HSI (TERNOP_LANE, vec_smult_lane_, 0, ALL)
   BUILTIN_VD_HSI (QUADOP_LANE, vec_smlal_lane_, 0, ALL)
@@ -246,10 +246,10 @@
   BUILTIN_VHSDF (BINOP, fcadd270, 0, FP)
 
   /* Implemented by aarch64_fcmla{_lane}{q}<rot><mode>.   */
-  BUILTIN_VHSDF (TERNOP, fcmla0, 0, ALL)
-  BUILTIN_VHSDF (TERNOP, fcmla90, 0, ALL)
-  BUILTIN_VHSDF (TERNOP, fcmla180, 0, ALL)
-  BUILTIN_VHSDF (TERNOP, fcmla270, 0, ALL)
+  BUILTIN_VHSDF (TERNOP, fcmla0, 0, FP)
+  BUILTIN_VHSDF (TERNOP, fcmla90, 0, FP)
+  BUILTIN_VHSDF (TERNOP, fcmla180, 0, FP)
+  BUILTIN_VHSDF (TERNOP, fcmla270, 0, FP)
   BUILTIN_VHSDF (QUADOP_LANE_PAIR, fcmla_lane0, 0, ALL)
   BUILTIN_VHSDF (QUADOP_LANE_PAIR, fcmla_lane90, 0, ALL)
   BUILTIN_VHSDF (QUADOP_LANE_PAIR, fcmla_lane180, 0, ALL)
@@ -338,12 +338,11 @@
   BUILTIN_VHSDF (UNOP, nearbyint, 2, FP)
   BUILTIN_VHSDF (UNOP, rint, 2, FP)
   BUILTIN_VHSDF (UNOP, round, 2, FP)
-  BUILTIN_VHSDF_DF (UNOP, frintn, 2, FP)
+  BUILTIN_VHSDF_HSDF (UNOP, frintn, 2, FP)
 
   VAR1 (UNOP, btrunc, 2, FP, hf)
   VAR1 (UNOP, ceil, 2, FP, hf)
   VAR1 (UNOP, floor, 2, FP, hf)
-  VAR1 (UNOP, frintn, 2, FP, hf)
   VAR1 (UNOP, nearbyint, 2, FP, hf)
   VAR1 (UNOP, rint, 2, FP, hf)
   VAR1 (UNOP, round, 2, FP, hf)
@@ -535,8 +534,8 @@
   VAR1 (TERNOPU, crypto_sha256su1, 0, ALL, v4si)
 
   /* Implemented by aarch64_crypto_pmull<mode>.  */
-  VAR1 (BINOPP, crypto_pmull, 0, ALL, di)
-  VAR1 (BINOPP, crypto_pmull, 0, ALL, v2di)
+  VAR1 (BINOPP, crypto_pmull, 0, NONE, di)
+  VAR1 (BINOPP, crypto_pmull, 0, NONE, v2di)
 
   /* Implemented by aarch64_tbl3<mode>.  */
   VAR1 (BINOP, tbl3, 0, ALL, v8qi)
@@ -667,15 +666,15 @@
   BUILTIN_VQ_I (TERNOP, bcaxq, 4, ALL)
 
   /* Implemented by aarch64_fml<f16mac1>l<f16quad>_low<mode>.  */
-  VAR1 (TERNOP, fmlal_low, 0, ALL, v2sf)
-  VAR1 (TERNOP, fmlsl_low, 0, ALL, v2sf)
-  VAR1 (TERNOP, fmlalq_low, 0, ALL, v4sf)
-  VAR1 (TERNOP, fmlslq_low, 0, ALL, v4sf)
+  VAR1 (TERNOP, fmlal_low, 0, FP, v2sf)
+  VAR1 (TERNOP, fmlsl_low, 0, FP, v2sf)
+  VAR1 (TERNOP, fmlalq_low, 0, FP, v4sf)
+  VAR1 (TERNOP, fmlslq_low, 0, FP, v4sf)
   /* Implemented by aarch64_fml<f16mac1>l<f16quad>_high<mode>.  */
-  VAR1 (TERNOP, fmlal_high, 0, ALL, v2sf)
-  VAR1 (TERNOP, fmlsl_high, 0, ALL, v2sf)
-  VAR1 (TERNOP, fmlalq_high, 0, ALL, v4sf)
-  VAR1 (TERNOP, fmlslq_high, 0, ALL, v4sf)
+  VAR1 (TERNOP, fmlal_high, 0, FP, v2sf)
+  VAR1 (TERNOP, fmlsl_high, 0, FP, v2sf)
+  VAR1 (TERNOP, fmlalq_high, 0, FP, v4sf)
+  VAR1 (TERNOP, fmlslq_high, 0, FP, v4sf)
   /* Implemented by aarch64_fml<f16mac1>l_lane_lowv2sf.  */
   VAR1 (QUADOP_LANE, fmlal_lane_low, 0, ALL, v2sf)
   VAR1 (QUADOP_LANE, fmlsl_lane_low, 0, ALL, v2sf)
@@ -713,20 +712,20 @@
   VAR2 (QUADOP_LANE_PAIR, bfdot_laneq, 0, ALL, v2sf, v4sf)
 
   /* Implemented by aarch64_bfmmlaqv4sf  */
-  VAR1 (TERNOP, bfmmlaq, 0, ALL, v4sf)
+  VAR1 (TERNOP, bfmmlaq, 0, AUTO_FP, v4sf)
 
   /* Implemented by aarch64_bfmlal<bt>{_lane{q}}v4sf  */
-  VAR1 (TERNOP, bfmlalb, 0, ALL, v4sf)
-  VAR1 (TERNOP, bfmlalt, 0, ALL, v4sf)
+  VAR1 (TERNOP, bfmlalb, 0, FP, v4sf)
+  VAR1 (TERNOP, bfmlalt, 0, FP, v4sf)
   VAR1 (QUADOP_LANE, bfmlalb_lane, 0, ALL, v4sf)
   VAR1 (QUADOP_LANE, bfmlalt_lane, 0, ALL, v4sf)
   VAR1 (QUADOP_LANE, bfmlalb_lane_q, 0, ALL, v4sf)
   VAR1 (QUADOP_LANE, bfmlalt_lane_q, 0, ALL, v4sf)
 
   /* Implemented by aarch64_simd_<sur>mmlav16qi.  */
-  VAR1 (TERNOP, simd_smmla, 0, ALL, v16qi)
-  VAR1 (TERNOPU, simd_ummla, 0, ALL, v16qi)
-  VAR1 (TERNOP_SSUS, simd_usmmla, 0, ALL, v16qi)
+  VAR1 (TERNOP, simd_smmla, 0, NONE, v16qi)
+  VAR1 (TERNOPU, simd_ummla, 0, NONE, v16qi)
+  VAR1 (TERNOP_SSUS, simd_usmmla, 0, NONE, v16qi)
 
   /* Implemented by aarch64_bfcvtn{q}{2}<mode>  */
   VAR1 (UNOP, bfcvtn, 0, ALL, v4bf)
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
index cd79aba..31a8c5a 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -464,6 +464,95 @@
 ;;
 ;; - MNEMONIC is the mnemonic of the associated SVE instruction.
 ;;
+;; For (3) and (4), we combine these operations with an UNSPEC_SEL
+;; that selects between the result of the FP operation and the "else"
+;; value.  (This else value is a merge input for _m ACLE functions
+;; and zero for _z ACLE functions.)  The outer pattern then has the form:
+;;
+;;   (unspec [pred fp_operation else_value] UNSPEC_SEL)
+;;
+;; This means that the patterns for (3) and (4) have two predicates:
+;; one for the FP operation itself and one for the UNSPEC_SEL.
+;; This pattern is equivalent to the result of combining an instance
+;; of (1) or (2) with a separate vcond instruction, so these patterns
+;; are useful as combine targets too.
+;;
+;; However, in the combine case, the instructions that we want to
+;; combine might use different predicates.  Then:
+;;
+;; - Some of the active lanes of the FP operation might be discarded
+;;   by the UNSPEC_SEL.  It's OK to drop the FP operation on those lanes,
+;;   even for SVE_STRICT_GP, since the operations on those lanes are
+;;   effectively dead code.
+;;
+;; - Some of the inactive lanes of the FP operation might be selected
+;;   by the UNSPEC_SEL, giving unspecified values for those lanes.
+;;   SVE_RELAXED_GP lets us extend the FP operation to cover these
+;;   extra lanes, but SVE_STRICT_GP does not.
+;;
+;; Thus SVE_RELAXED_GP allows us to ignore the predicate on the FP operation
+;; and operate on exactly the lanes selected by the UNSPEC_SEL predicate.
+;; This typically leads to patterns like:
+;;
+;;    (unspec [(match_operand 1 "register_operand" "Upl")
+;;             (unspec [(match_operand N)
+;;                      (const_int SVE_RELAXED_GP)
+;;                      ...]
+;;                     UNSPEC_COND_<MNEMONIC>)
+;;             ...])
+;;
+;; where operand N is allowed to be anything.  These instructions then
+;; have rewrite rules to replace operand N with operand 1, which gives the
+;; instructions a canonical form and means that the original operand N is
+;; not kept live unnecessarily.
+;;
+;; In contrast, SVE_STRICT_GP only allows the UNSPEC_SEL predicate to be
+;; a subset of the FP operation predicate.  This case isn't interesting
+;; for FP operations that have an all-true predicate, since such operations
+;; use SVE_RELAXED_GP instead.  And it is not possible for instruction
+;; conditions to track the subset relationship for arbitrary registers.
+;; So in practice, the only useful case for SVE_STRICT_GP is the one
+;; in which the predicates match:
+;;
+;;    (unspec [(match_operand 1 "register_operand" "Upl")
+;;             (unspec [(match_dup 1)
+;;                      (const_int SVE_STRICT_GP)
+;;                      ...]
+;;                     UNSPEC_COND_<MNEMONIC>)
+;;             ...])
+;;
+;; This pattern would also be correct for SVE_RELAXED_GP, but it would
+;; be redundant with the one above.  However, if the combine pattern
+;; has multiple FP operations, using a match_operand allows combinations
+;; of SVE_STRICT_GP and SVE_RELAXED_GP in the same operation, provided
+;; that the predicates are the same:
+;;
+;;    (unspec [(match_operand 1 "register_operand" "Upl")
+;;             (...
+;;                (unspec [(match_dup 1)
+;;                         (match_operand:SI N "aarch64_sve_gp_strictness")
+;;                         ...]
+;;                        UNSPEC_COND_<MNEMONIC1>)
+;;                (unspec [(match_dup 1)
+;;                         (match_operand:SI M "aarch64_sve_gp_strictness")
+;;                         ...]
+;;                        UNSPEC_COND_<MNEMONIC2>) ...)
+;;             ...])
+;;
+;; The fully-relaxed version of this pattern is:
+;;
+;;    (unspec [(match_operand 1 "register_operand" "Upl")
+;;             (...
+;;                (unspec [(match_operand:SI N)
+;;                         (const_int SVE_RELAXED_GP)
+;;                         ...]
+;;                        UNSPEC_COND_<MNEMONIC1>)
+;;                (unspec [(match_operand:SI M)
+;;                         (const_int SVE_RELAXED_GP)
+;;                         ...]
+;;                        UNSPEC_COND_<MNEMONIC2>) ...)
+;;             ...])
+;;
 ;; -------------------------------------------------------------------------
 ;; ---- Note on FFR handling
 ;; -------------------------------------------------------------------------
@@ -3304,18 +3393,18 @@
 )
 
 ;; Predicated floating-point unary arithmetic, merging with the first input.
-(define_insn_and_rewrite "*cond_<optab><mode>_2"
+(define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed"
   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
 	(unspec:SVE_FULL_F
 	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
 	   (unspec:SVE_FULL_F
 	     [(match_operand 3)
-	      (match_operand:SI 4 "aarch64_sve_gp_strictness")
+	      (const_int SVE_RELAXED_GP)
 	      (match_operand:SVE_FULL_F 2 "register_operand" "0, w")]
 	     SVE_COND_FP_UNARY)
 	   (match_dup 2)]
 	  UNSPEC_SEL))]
-  "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[3], operands[1])"
+  "TARGET_SVE"
   "@
    <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>
    movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
@@ -3326,6 +3415,24 @@
   [(set_attr "movprfx" "*,yes")]
 )
 
+(define_insn "*cond_<optab><mode>_2_strict"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (unspec:SVE_FULL_F
+	     [(match_dup 1)
+	      (const_int SVE_STRICT_GP)
+	      (match_operand:SVE_FULL_F 2 "register_operand" "0, w")]
+	     SVE_COND_FP_UNARY)
+	   (match_dup 2)]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+  "@
+   <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>
+   movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
+  [(set_attr "movprfx" "*,yes")]
+)
+
 ;; Predicated floating-point unary arithmetic, merging with an independent
 ;; value.
 ;;
@@ -3334,20 +3441,18 @@
 ;; which is handled above rather than here.  Marking all the alternatives
 ;; as earlyclobber helps to make the instruction more regular to the
 ;; register allocator.
-(define_insn_and_rewrite "*cond_<optab><mode>_any"
+(define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed"
   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, ?&w, ?&w")
 	(unspec:SVE_FULL_F
 	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
 	   (unspec:SVE_FULL_F
 	     [(match_operand 4)
-	      (match_operand:SI 5 "aarch64_sve_gp_strictness")
+	      (const_int SVE_RELAXED_GP)
 	      (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")]
 	     SVE_COND_FP_UNARY)
 	   (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
 	  UNSPEC_SEL))]
-  "TARGET_SVE
-   && !rtx_equal_p (operands[2], operands[3])
-   && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
+  "TARGET_SVE && !rtx_equal_p (operands[2], operands[3])"
   "@
    <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
@@ -3359,6 +3464,25 @@
   [(set_attr "movprfx" "*,yes,yes")]
 )
 
+(define_insn "*cond_<optab><mode>_any_strict"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, ?&w, ?&w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
+	   (unspec:SVE_FULL_F
+	     [(match_dup 1)
+	      (const_int SVE_STRICT_GP)
+	      (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")]
+	     SVE_COND_FP_UNARY)
+	   (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE && !rtx_equal_p (operands[2], operands[3])"
+  "@
+   <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
+   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
+   movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
+  [(set_attr "movprfx" "*,yes,yes")]
+)
+
 ;; -------------------------------------------------------------------------
 ;; ---- [FP] Square root
 ;; -------------------------------------------------------------------------
@@ -4649,19 +4773,19 @@
 
 ;; Predicated floating-point binary operations that take an integer as their
 ;; second operand, with inactive lanes coming from the first operand.
-(define_insn_and_rewrite "*cond_<optab><mode>_2"
+(define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed"
   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
 	(unspec:SVE_FULL_F
 	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
 	   (unspec:SVE_FULL_F
 	     [(match_operand 4)
-	      (match_operand:SI 5 "aarch64_sve_gp_strictness")
+	      (const_int SVE_RELAXED_GP)
 	      (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
 	      (match_operand:<V_INT_EQUIV> 3 "register_operand" "w, w")]
 	     SVE_COND_FP_BINARY_INT)
 	   (match_dup 2)]
 	  UNSPEC_SEL))]
-  "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
+  "TARGET_SVE"
   "@
    <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
    movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
@@ -4672,24 +4796,41 @@
   [(set_attr "movprfx" "*,yes")]
 )
 
+(define_insn "*cond_<optab><mode>_2_strict"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (unspec:SVE_FULL_F
+	     [(match_dup 1)
+	      (const_int SVE_STRICT_GP)
+	      (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
+	      (match_operand:<V_INT_EQUIV> 3 "register_operand" "w, w")]
+	     SVE_COND_FP_BINARY_INT)
+	   (match_dup 2)]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+  "@
+   <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+  [(set_attr "movprfx" "*,yes")]
+)
+
 ;; Predicated floating-point binary operations that take an integer as
 ;; their second operand, with the values of inactive lanes being distinct
 ;; from the other inputs.
-(define_insn_and_rewrite "*cond_<optab><mode>_any"
+(define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed"
   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w")
 	(unspec:SVE_FULL_F
 	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
 	   (unspec:SVE_FULL_F
 	     [(match_operand 5)
-	      (match_operand:SI 6 "aarch64_sve_gp_strictness")
+	      (const_int SVE_RELAXED_GP)
 	      (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w")
 	      (match_operand:<V_INT_EQUIV> 3 "register_operand" "w, w, w, w")]
 	     SVE_COND_FP_BINARY_INT)
 	   (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")]
 	  UNSPEC_SEL))]
-  "TARGET_SVE
-   && !rtx_equal_p (operands[2], operands[4])
-   && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
+  "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
   "@
    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
@@ -4713,6 +4854,35 @@
   [(set_attr "movprfx" "yes")]
 )
 
+(define_insn_and_rewrite "*cond_<optab><mode>_any_strict"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
+	   (unspec:SVE_FULL_F
+	     [(match_dup 1)
+	      (const_int SVE_STRICT_GP)
+	      (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w")
+	      (match_operand:<V_INT_EQUIV> 3 "register_operand" "w, w, w, w")]
+	     SVE_COND_FP_BINARY_INT)
+	   (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
+  "@
+   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   #"
+  "&& reload_completed
+   && register_operand (operands[4], <MODE>mode)
+   && !rtx_equal_p (operands[0], operands[4])"
+  {
+    emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
+					     operands[4], operands[1]));
+    operands[4] = operands[2] = operands[0];
+  }
+  [(set_attr "movprfx" "yes")]
+)
+
 ;; -------------------------------------------------------------------------
 ;; ---- [FP] General binary arithmetic corresponding to rtx codes
 ;; -------------------------------------------------------------------------
@@ -4813,19 +4983,19 @@
 )
 
 ;; Predicated floating-point operations, merging with the first input.
-(define_insn_and_rewrite "*cond_<optab><mode>_2"
+(define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed"
   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
 	(unspec:SVE_FULL_F
 	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
 	   (unspec:SVE_FULL_F
 	     [(match_operand 4)
-	      (match_operand:SI 5 "aarch64_sve_gp_strictness")
+	      (const_int SVE_RELAXED_GP)
 	      (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
 	      (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
 	     SVE_COND_FP_BINARY)
 	   (match_dup 2)]
 	  UNSPEC_SEL))]
-  "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
+  "TARGET_SVE"
   "@
    <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
    movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
@@ -4836,20 +5006,39 @@
   [(set_attr "movprfx" "*,yes")]
 )
 
+(define_insn "*cond_<optab><mode>_2_strict"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (unspec:SVE_FULL_F
+	     [(match_dup 1)
+	      (const_int SVE_STRICT_GP)
+	      (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
+	      (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
+	     SVE_COND_FP_BINARY)
+	   (match_dup 2)]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+  "@
+   <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+  [(set_attr "movprfx" "*,yes")]
+)
+
 ;; Same for operations that take a 1-bit constant.
-(define_insn_and_rewrite "*cond_<optab><mode>_2_const"
+(define_insn_and_rewrite "*cond_<optab><mode>_2_const_relaxed"
   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?w")
 	(unspec:SVE_FULL_F
 	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
 	   (unspec:SVE_FULL_F
 	     [(match_operand 4)
-	      (match_operand:SI 5 "aarch64_sve_gp_strictness")
+	      (const_int SVE_RELAXED_GP)
 	      (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
 	      (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")]
 	     SVE_COND_FP_BINARY_I1)
 	   (match_dup 2)]
 	  UNSPEC_SEL))]
-  "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
+  "TARGET_SVE"
   "@
    <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
    movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3"
@@ -4860,20 +5049,39 @@
   [(set_attr "movprfx" "*,yes")]
 )
 
+(define_insn "*cond_<optab><mode>_2_const_strict"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (unspec:SVE_FULL_F
+	     [(match_dup 1)
+	      (const_int SVE_STRICT_GP)
+	      (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
+	      (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")]
+	     SVE_COND_FP_BINARY_I1)
+	   (match_dup 2)]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+  "@
+   <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
+   movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3"
+  [(set_attr "movprfx" "*,yes")]
+)
+
 ;; Predicated floating-point operations, merging with the second input.
-(define_insn_and_rewrite "*cond_<optab><mode>_3"
+(define_insn_and_rewrite "*cond_<optab><mode>_3_relaxed"
   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
 	(unspec:SVE_FULL_F
 	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
 	   (unspec:SVE_FULL_F
 	     [(match_operand 4)
-	      (match_operand:SI 5 "aarch64_sve_gp_strictness")
+	      (const_int SVE_RELAXED_GP)
 	      (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
 	      (match_operand:SVE_FULL_F 3 "register_operand" "0, w")]
 	     SVE_COND_FP_BINARY)
 	   (match_dup 3)]
 	  UNSPEC_SEL))]
-  "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
+  "TARGET_SVE"
   "@
    <sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
    movprfx\t%0, %3\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
@@ -4884,14 +5092,33 @@
   [(set_attr "movprfx" "*,yes")]
 )
 
+(define_insn "*cond_<optab><mode>_3_strict"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (unspec:SVE_FULL_F
+	     [(match_dup 1)
+	      (const_int SVE_STRICT_GP)
+	      (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
+	      (match_operand:SVE_FULL_F 3 "register_operand" "0, w")]
+	     SVE_COND_FP_BINARY)
+	   (match_dup 3)]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+  "@
+   <sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
+   movprfx\t%0, %3\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
+  [(set_attr "movprfx" "*,yes")]
+)
+
 ;; Predicated floating-point operations, merging with an independent value.
-(define_insn_and_rewrite "*cond_<optab><mode>_any"
+(define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed"
   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, ?&w")
 	(unspec:SVE_FULL_F
 	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
 	   (unspec:SVE_FULL_F
 	     [(match_operand 5)
-	      (match_operand:SI 6 "aarch64_sve_gp_strictness")
+	      (const_int SVE_RELAXED_GP)
 	      (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w, w")
 	      (match_operand:SVE_FULL_F 3 "register_operand" "w, 0, w, w, w")]
 	     SVE_COND_FP_BINARY)
@@ -4899,8 +5126,7 @@
 	  UNSPEC_SEL))]
   "TARGET_SVE
    && !rtx_equal_p (operands[2], operands[4])
-   && !rtx_equal_p (operands[3], operands[4])
-   && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
+   && !rtx_equal_p (operands[3], operands[4])"
   "@
    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
@@ -4925,22 +5151,52 @@
   [(set_attr "movprfx" "yes")]
 )
 
+(define_insn_and_rewrite "*cond_<optab><mode>_any_strict"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, ?&w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
+	   (unspec:SVE_FULL_F
+	     [(match_dup 1)
+	      (const_int SVE_STRICT_GP)
+	      (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w, w")
+	      (match_operand:SVE_FULL_F 3 "register_operand" "w, 0, w, w, w")]
+	     SVE_COND_FP_BINARY)
+	   (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE
+   && !rtx_equal_p (operands[2], operands[4])
+   && !rtx_equal_p (operands[3], operands[4])"
+  "@
+   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
+   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   #"
+  "&& reload_completed
+   && register_operand (operands[4], <MODE>mode)
+   && !rtx_equal_p (operands[0], operands[4])"
+  {
+    emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
+					     operands[4], operands[1]));
+    operands[4] = operands[2] = operands[0];
+  }
+  [(set_attr "movprfx" "yes")]
+)
+
 ;; Same for operations that take a 1-bit constant.
-(define_insn_and_rewrite "*cond_<optab><mode>_any_const"
+(define_insn_and_rewrite "*cond_<optab><mode>_any_const_relaxed"
   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w")
 	(unspec:SVE_FULL_F
 	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
 	   (unspec:SVE_FULL_F
 	     [(match_operand 5)
-	      (match_operand:SI 6 "aarch64_sve_gp_strictness")
+	      (const_int SVE_RELAXED_GP)
 	      (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")
 	      (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")]
 	     SVE_COND_FP_BINARY_I1)
 	   (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, 0, w")]
 	  UNSPEC_SEL))]
-  "TARGET_SVE
-   && !rtx_equal_p (operands[2], operands[4])
-   && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
+  "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
   "@
    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
    movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
@@ -4963,6 +5219,34 @@
   [(set_attr "movprfx" "yes")]
 )
 
+(define_insn_and_rewrite "*cond_<optab><mode>_any_const_strict"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
+	   (unspec:SVE_FULL_F
+	     [(match_dup 1)
+	      (const_int SVE_STRICT_GP)
+	      (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")
+	      (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")]
+	     SVE_COND_FP_BINARY_I1)
+	   (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, 0, w")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
+  "@
+   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
+   movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
+   #"
+  "&& reload_completed
+   && register_operand (operands[4], <MODE>mode)
+   && !rtx_equal_p (operands[0], operands[4])"
+  {
+    emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
+					     operands[4], operands[1]));
+    operands[4] = operands[2] = operands[0];
+  }
+  [(set_attr "movprfx" "yes")]
+)
+
 ;; -------------------------------------------------------------------------
 ;; ---- [FP] Addition
 ;; -------------------------------------------------------------------------
@@ -5001,19 +5285,19 @@
 
 ;; Predicated floating-point addition of a constant, merging with the
 ;; first input.
-(define_insn_and_rewrite "*cond_add<mode>_2_const"
+(define_insn_and_rewrite "*cond_add<mode>_2_const_relaxed"
   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w, ?w")
 	(unspec:SVE_FULL_F
 	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
 	   (unspec:SVE_FULL_F
 	     [(match_operand 4)
-	      (match_operand:SI 5 "aarch64_sve_gp_strictness")
+	      (const_int SVE_RELAXED_GP)
 	      (match_operand:SVE_FULL_F 2 "register_operand" "0, 0, w, w")
 	      (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate" "vsA, vsN, vsA, vsN")]
 	     UNSPEC_COND_FADD)
 	   (match_dup 2)]
 	  UNSPEC_SEL))]
-  "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
+  "TARGET_SVE"
   "@
    fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
    fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
@@ -5026,23 +5310,42 @@
   [(set_attr "movprfx" "*,*,yes,yes")]
 )
 
+(define_insn "*cond_add<mode>_2_const_strict"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w, ?w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
+	   (unspec:SVE_FULL_F
+	     [(match_dup 1)
+	      (const_int SVE_STRICT_GP)
+	      (match_operand:SVE_FULL_F 2 "register_operand" "0, 0, w, w")
+	      (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate" "vsA, vsN, vsA, vsN")]
+	     UNSPEC_COND_FADD)
+	   (match_dup 2)]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+  "@
+   fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
+   fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
+   movprfx\t%0, %2\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
+   movprfx\t%0, %2\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3"
+  [(set_attr "movprfx" "*,*,yes,yes")]
+)
+
 ;; Predicated floating-point addition of a constant, merging with an
 ;; independent value.
-(define_insn_and_rewrite "*cond_add<mode>_any_const"
+(define_insn_and_rewrite "*cond_add<mode>_any_const_relaxed"
   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, w, w, ?w, ?w")
 	(unspec:SVE_FULL_F
 	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
 	   (unspec:SVE_FULL_F
 	     [(match_operand 5)
-	      (match_operand:SI 6 "aarch64_sve_gp_strictness")
+	      (const_int SVE_RELAXED_GP)
 	      (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w, w, w, w")
 	      (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate" "vsA, vsN, vsA, vsN, vsA, vsN")]
 	     UNSPEC_COND_FADD)
 	   (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, 0, w, w")]
 	  UNSPEC_SEL))]
-  "TARGET_SVE
-   && !rtx_equal_p (operands[2], operands[4])
-   && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
+  "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
   "@
    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
@@ -5068,6 +5371,37 @@
   [(set_attr "movprfx" "yes")]
 )
 
+(define_insn_and_rewrite "*cond_add<mode>_any_const_strict"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, w, w, ?w, ?w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
+	   (unspec:SVE_FULL_F
+	     [(match_dup 1)
+	      (const_int SVE_STRICT_GP)
+	      (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w, w, w, w")
+	      (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate" "vsA, vsN, vsA, vsN, vsA, vsN")]
+	     UNSPEC_COND_FADD)
+	   (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, 0, w, w")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
+  "@
+   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
+   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
+   movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
+   movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
+   #
+   #"
+  "&& reload_completed
+   && register_operand (operands[4], <MODE>mode)
+   && !rtx_equal_p (operands[0], operands[4])"
+  {
+    emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
+					     operands[4], operands[1]));
+    operands[4] = operands[2] = operands[0];
+  }
+  [(set_attr "movprfx" "yes")]
+)
+
 ;; Register merging forms are handled through SVE_COND_FP_BINARY.
 
 ;; -------------------------------------------------------------------------
@@ -5110,19 +5444,19 @@
 )
 
 ;; Predicated FCADD, merging with the first input.
-(define_insn_and_rewrite "*cond_<optab><mode>_2"
+(define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed"
   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
 	(unspec:SVE_FULL_F
 	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
 	   (unspec:SVE_FULL_F
 	     [(match_operand 4)
-	      (match_operand:SI 5 "aarch64_sve_gp_strictness")
+	      (const_int SVE_RELAXED_GP)
 	      (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
 	      (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
 	     SVE_COND_FCADD)
 	   (match_dup 2)]
 	  UNSPEC_SEL))]
-  "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
+  "TARGET_SVE"
   "@
    fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
    movprfx\t%0, %2\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>"
@@ -5133,22 +5467,39 @@
   [(set_attr "movprfx" "*,yes")]
 )
 
+(define_insn "*cond_<optab><mode>_2_strict"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (unspec:SVE_FULL_F
+	     [(match_dup 1)
+	      (const_int SVE_STRICT_GP)
+	      (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
+	      (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
+	     SVE_COND_FCADD)
+	   (match_dup 2)]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+  "@
+   fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
+   movprfx\t%0, %2\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>"
+  [(set_attr "movprfx" "*,yes")]
+)
+
 ;; Predicated FCADD, merging with an independent value.
-(define_insn_and_rewrite "*cond_<optab><mode>_any"
+(define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed"
   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w")
 	(unspec:SVE_FULL_F
 	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
 	   (unspec:SVE_FULL_F
 	     [(match_operand 5)
-	      (match_operand:SI 6 "aarch64_sve_gp_strictness")
+	      (const_int SVE_RELAXED_GP)
 	      (match_operand:SVE_FULL_F 2 "register_operand" "w, 0, w, w")
 	      (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, w")]
 	     SVE_COND_FCADD)
 	   (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")]
 	  UNSPEC_SEL))]
-  "TARGET_SVE
-   && !rtx_equal_p (operands[2], operands[4])
-   && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
+  "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
   "@
    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
@@ -5172,6 +5523,35 @@
   [(set_attr "movprfx" "yes")]
 )
 
+(define_insn_and_rewrite "*cond_<optab><mode>_any_strict"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
+	   (unspec:SVE_FULL_F
+	     [(match_dup 1)
+	      (const_int SVE_STRICT_GP)
+	      (match_operand:SVE_FULL_F 2 "register_operand" "w, 0, w, w")
+	      (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, w")]
+	     SVE_COND_FCADD)
+	   (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
+  "@
+   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
+   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
+   movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
+   #"
+  "&& reload_completed
+   && register_operand (operands[4], <MODE>mode)
+   && !rtx_equal_p (operands[0], operands[4])"
+  {
+    emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
+					     operands[4], operands[1]));
+    operands[4] = operands[2] = operands[0];
+  }
+  [(set_attr "movprfx" "yes")]
+)
+
 ;; -------------------------------------------------------------------------
 ;; ---- [FP] Subtraction
 ;; -------------------------------------------------------------------------
@@ -5209,19 +5589,19 @@
 
 ;; Predicated floating-point subtraction from a constant, merging with the
 ;; second input.
-(define_insn_and_rewrite "*cond_sub<mode>_3_const"
+(define_insn_and_rewrite "*cond_sub<mode>_3_const_relaxed"
   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?w")
 	(unspec:SVE_FULL_F
 	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
 	   (unspec:SVE_FULL_F
 	     [(match_operand 4)
-	      (match_operand:SI 5 "aarch64_sve_gp_strictness")
+	      (const_int SVE_RELAXED_GP)
 	      (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate")
 	      (match_operand:SVE_FULL_F 3 "register_operand" "0, w")]
 	     UNSPEC_COND_FSUB)
 	   (match_dup 3)]
 	  UNSPEC_SEL))]
-  "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
+  "TARGET_SVE"
   "@
    fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
    movprfx\t%0, %3\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2"
@@ -5232,12 +5612,28 @@
   [(set_attr "movprfx" "*,yes")]
 )
 
+(define_insn "*cond_sub<mode>_3_const_strict"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (unspec:SVE_FULL_F
+	     [(match_dup 1)
+	      (const_int SVE_STRICT_GP)
+	      (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate")
+	      (match_operand:SVE_FULL_F 3 "register_operand" "0, w")]
+	     UNSPEC_COND_FSUB)
+	   (match_dup 3)]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+  "@
+   fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
+   movprfx\t%0, %3\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2"
+  [(set_attr "movprfx" "*,yes")]
+)
+
 ;; Predicated floating-point subtraction from a constant, merging with an
 ;; independent value.
-;;
-;; The subtraction predicate and the merge predicate are allowed to be
-;; different.
-(define_insn_and_rewrite "*cond_sub<mode>_relaxed_const"
+(define_insn_and_rewrite "*cond_sub<mode>_const_relaxed"
   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w")
 	(unspec:SVE_FULL_F
 	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
@@ -5272,11 +5668,7 @@
   [(set_attr "movprfx" "yes")]
 )
 
-;; Predicated floating-point subtraction from a constant, merging with an
-;; independent value.
-;;
-;; The subtraction predicate and the merge predicate must be the same.
-(define_insn_and_rewrite "*cond_sub<mode>_strict_const"
+(define_insn_and_rewrite "*cond_sub<mode>_const_strict"
   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w")
 	(unspec:SVE_FULL_F
 	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
@@ -5329,19 +5721,19 @@
 )
 
 ;; Predicated floating-point absolute difference.
-(define_insn_and_rewrite "*aarch64_pred_abd<mode>"
+(define_insn_and_rewrite "*aarch64_pred_abd<mode>_relaxed"
   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
 	(unspec:SVE_FULL_F
 	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
 	   (match_operand:SI 4 "aarch64_sve_gp_strictness")
 	   (unspec:SVE_FULL_F
 	     [(match_operand 5)
-	      (match_operand:SI 6 "aarch64_sve_gp_strictness")
+	      (const_int SVE_RELAXED_GP)
 	      (match_operand:SVE_FULL_F 2 "register_operand" "%0, w")
 	      (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
 	     UNSPEC_COND_FSUB)]
 	  UNSPEC_COND_FABS))]
-  "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
+  "TARGET_SVE"
   "@
    fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
    movprfx\t%0, %2\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
@@ -5352,6 +5744,25 @@
   [(set_attr "movprfx" "*,yes")]
 )
 
+(define_insn "*aarch64_pred_abd<mode>_strict"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (match_operand:SI 4 "aarch64_sve_gp_strictness")
+	   (unspec:SVE_FULL_F
+	     [(match_dup 1)
+	      (const_int SVE_STRICT_GP)
+	      (match_operand:SVE_FULL_F 2 "register_operand" "%0, w")
+	      (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
+	     UNSPEC_COND_FSUB)]
+	  UNSPEC_COND_FABS))]
+  "TARGET_SVE"
+  "@
+   fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   movprfx\t%0, %2\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+  [(set_attr "movprfx" "*,yes")]
+)
+
 (define_expand "@aarch64_cond_abd<mode>"
   [(set (match_operand:SVE_FULL_F 0 "register_operand")
 	(unspec:SVE_FULL_F
@@ -5376,82 +5787,124 @@
 
 ;; Predicated floating-point absolute difference, merging with the first
 ;; input.
-(define_insn_and_rewrite "*aarch64_cond_abd<mode>_2"
+(define_insn_and_rewrite "*aarch64_cond_abd<mode>_2_relaxed"
   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
 	(unspec:SVE_FULL_F
 	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
 	   (unspec:SVE_FULL_F
 	     [(match_operand 4)
-	      (match_operand:SI 5 "aarch64_sve_gp_strictness")
+	      (const_int SVE_RELAXED_GP)
 	      (unspec:SVE_FULL_F
-		[(match_operand 6)
-		 (match_operand:SI 7 "aarch64_sve_gp_strictness")
+		[(match_operand 5)
+		 (const_int SVE_RELAXED_GP)
 		 (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
 		 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
 		UNSPEC_COND_FSUB)]
 	     UNSPEC_COND_FABS)
 	   (match_dup 2)]
 	  UNSPEC_SEL))]
-  "TARGET_SVE
-   && aarch64_sve_pred_dominates_p (&operands[4], operands[1])
-   && aarch64_sve_pred_dominates_p (&operands[6], operands[1])"
+  "TARGET_SVE"
   "@
    fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
    movprfx\t%0, %2\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
   "&& (!rtx_equal_p (operands[1], operands[4])
-       || !rtx_equal_p (operands[1], operands[6]))"
+       || !rtx_equal_p (operands[1], operands[5]))"
   {
     operands[4] = copy_rtx (operands[1]);
-    operands[6] = copy_rtx (operands[1]);
+    operands[5] = copy_rtx (operands[1]);
   }
   [(set_attr "movprfx" "*,yes")]
 )
 
+(define_insn "*aarch64_cond_abd<mode>_2_strict"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (unspec:SVE_FULL_F
+	     [(match_dup 1)
+	      (match_operand:SI 4 "aarch64_sve_gp_strictness")
+	      (unspec:SVE_FULL_F
+		[(match_dup 1)
+		 (match_operand:SI 5 "aarch64_sve_gp_strictness")
+		 (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
+		 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
+		UNSPEC_COND_FSUB)]
+	     UNSPEC_COND_FABS)
+	   (match_dup 2)]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+  "@
+   fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   movprfx\t%0, %2\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+  [(set_attr "movprfx" "*,yes")]
+)
+
 ;; Predicated floating-point absolute difference, merging with the second
 ;; input.
-(define_insn_and_rewrite "*aarch64_cond_abd<mode>_3"
+(define_insn_and_rewrite "*aarch64_cond_abd<mode>_3_relaxed"
   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
 	(unspec:SVE_FULL_F
 	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
 	   (unspec:SVE_FULL_F
 	     [(match_operand 4)
-	      (match_operand:SI 5 "aarch64_sve_gp_strictness")
+	      (const_int SVE_RELAXED_GP)
 	      (unspec:SVE_FULL_F
-		[(match_operand 6)
-		 (match_operand:SI 7 "aarch64_sve_gp_strictness")
+		[(match_operand 5)
+		 (const_int SVE_RELAXED_GP)
 		 (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
 		 (match_operand:SVE_FULL_F 3 "register_operand" "0, w")]
 		UNSPEC_COND_FSUB)]
 	     UNSPEC_COND_FABS)
 	   (match_dup 3)]
 	  UNSPEC_SEL))]
-  "TARGET_SVE
-   && aarch64_sve_pred_dominates_p (&operands[4], operands[1])
-   && aarch64_sve_pred_dominates_p (&operands[6], operands[1])"
+  "TARGET_SVE"
   "@
    fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
    movprfx\t%0, %3\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
   "&& (!rtx_equal_p (operands[1], operands[4])
-       || !rtx_equal_p (operands[1], operands[6]))"
+       || !rtx_equal_p (operands[1], operands[5]))"
   {
     operands[4] = copy_rtx (operands[1]);
-    operands[6] = copy_rtx (operands[1]);
+    operands[5] = copy_rtx (operands[1]);
   }
   [(set_attr "movprfx" "*,yes")]
 )
 
+(define_insn "*aarch64_cond_abd<mode>_3_strict"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (unspec:SVE_FULL_F
+	     [(match_dup 1)
+	      (match_operand:SI 4 "aarch64_sve_gp_strictness")
+	      (unspec:SVE_FULL_F
+		[(match_dup 1)
+		 (match_operand:SI 5 "aarch64_sve_gp_strictness")
+		 (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
+		 (match_operand:SVE_FULL_F 3 "register_operand" "0, w")]
+		UNSPEC_COND_FSUB)]
+	     UNSPEC_COND_FABS)
+	   (match_dup 3)]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+  "@
+   fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
+   movprfx\t%0, %3\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
+  [(set_attr "movprfx" "*,yes")]
+)
+
 ;; Predicated floating-point absolute difference, merging with an
 ;; independent value.
-(define_insn_and_rewrite "*aarch64_cond_abd<mode>_any"
+(define_insn_and_rewrite "*aarch64_cond_abd<mode>_any_relaxed"
   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, ?&w")
 	(unspec:SVE_FULL_F
 	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
 	   (unspec:SVE_FULL_F
 	     [(match_operand 5)
-	      (match_operand:SI 6 "aarch64_sve_gp_strictness")
+	      (const_int SVE_RELAXED_GP)
 	      (unspec:SVE_FULL_F
-		[(match_operand 7)
-		 (match_operand:SI 8 "aarch64_sve_gp_strictness")
+		[(match_operand 6)
+		 (const_int SVE_RELAXED_GP)
 		 (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w, w")
 		 (match_operand:SVE_FULL_F 3 "register_operand" "w, 0, w, w, w")]
 		UNSPEC_COND_FSUB)]
@@ -5460,9 +5913,7 @@
 	  UNSPEC_SEL))]
   "TARGET_SVE
    && !rtx_equal_p (operands[2], operands[4])
-   && !rtx_equal_p (operands[3], operands[4])
-   && aarch64_sve_pred_dominates_p (&operands[5], operands[1])
-   && aarch64_sve_pred_dominates_p (&operands[7], operands[1])"
+   && !rtx_equal_p (operands[3], operands[4])"
   "@
    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
@@ -5472,18 +5923,18 @@
   "&& 1"
   {
     if (reload_completed
-        && register_operand (operands[4], <MODE>mode)
-        && !rtx_equal_p (operands[0], operands[4]))
+	&& register_operand (operands[4], <MODE>mode)
+	&& !rtx_equal_p (operands[0], operands[4]))
       {
 	emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[3],
 						 operands[4], operands[1]));
 	operands[4] = operands[3] = operands[0];
       }
     else if (!rtx_equal_p (operands[1], operands[5])
-	     || !rtx_equal_p (operands[1], operands[7]))
+	     || !rtx_equal_p (operands[1], operands[6]))
       {
 	operands[5] = copy_rtx (operands[1]);
-	operands[7] = copy_rtx (operands[1]);
+	operands[6] = copy_rtx (operands[1]);
       }
     else
       FAIL;
@@ -5491,6 +5942,42 @@
   [(set_attr "movprfx" "yes")]
 )
 
+(define_insn_and_rewrite "*aarch64_cond_abd<mode>_any_strict"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, ?&w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
+	   (unspec:SVE_FULL_F
+	     [(match_dup 1)
+	      (match_operand:SI 5 "aarch64_sve_gp_strictness")
+	      (unspec:SVE_FULL_F
+		[(match_dup 1)
+		 (match_operand:SI 6 "aarch64_sve_gp_strictness")
+		 (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w, w")
+		 (match_operand:SVE_FULL_F 3 "register_operand" "w, 0, w, w, w")]
+		UNSPEC_COND_FSUB)]
+	     UNSPEC_COND_FABS)
+	   (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE
+   && !rtx_equal_p (operands[2], operands[4])
+   && !rtx_equal_p (operands[3], operands[4])"
+  "@
+   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
+   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   #"
+  "&& reload_completed
+   && register_operand (operands[4], <MODE>mode)
+   && !rtx_equal_p (operands[0], operands[4])"
+  {
+    emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[3],
+					     operands[4], operands[1]));
+    operands[4] = operands[3] = operands[0];
+  }
+  [(set_attr "movprfx" "yes")]
+)
+
 ;; -------------------------------------------------------------------------
 ;; ---- [FP] Multiplication
 ;; -------------------------------------------------------------------------
@@ -6416,20 +6903,20 @@
 
 ;; Predicated floating-point ternary operations, merging with the
 ;; first input.
-(define_insn_and_rewrite "*cond_<optab><mode>_2"
+(define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed"
   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
 	(unspec:SVE_FULL_F
 	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
 	   (unspec:SVE_FULL_F
 	     [(match_operand 5)
-	      (match_operand:SI 6 "aarch64_sve_gp_strictness")
+	      (const_int SVE_RELAXED_GP)
 	      (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
 	      (match_operand:SVE_FULL_F 3 "register_operand" "w, w")
 	      (match_operand:SVE_FULL_F 4 "register_operand" "w, w")]
 	     SVE_COND_FP_TERNARY)
 	   (match_dup 2)]
 	  UNSPEC_SEL))]
-  "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
+  "TARGET_SVE"
   "@
    <sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
    movprfx\t%0, %2\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
@@ -6440,22 +6927,42 @@
   [(set_attr "movprfx" "*,yes")]
 )
 
+(define_insn "*cond_<optab><mode>_2_strict"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (unspec:SVE_FULL_F
+	     [(match_dup 1)
+	      (const_int SVE_STRICT_GP)
+	      (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
+	      (match_operand:SVE_FULL_F 3 "register_operand" "w, w")
+	      (match_operand:SVE_FULL_F 4 "register_operand" "w, w")]
+	     SVE_COND_FP_TERNARY)
+	   (match_dup 2)]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+  "@
+   <sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
+   movprfx\t%0, %2\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
+  [(set_attr "movprfx" "*,yes")]
+)
+
 ;; Predicated floating-point ternary operations, merging with the
 ;; third input.
-(define_insn_and_rewrite "*cond_<optab><mode>_4"
+(define_insn_and_rewrite "*cond_<optab><mode>_4_relaxed"
   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
 	(unspec:SVE_FULL_F
 	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
 	   (unspec:SVE_FULL_F
 	     [(match_operand 5)
-	      (match_operand:SI 6 "aarch64_sve_gp_strictness")
+	      (const_int SVE_RELAXED_GP)
 	      (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
 	      (match_operand:SVE_FULL_F 3 "register_operand" "w, w")
 	      (match_operand:SVE_FULL_F 4 "register_operand" "0, w")]
 	     SVE_COND_FP_TERNARY)
 	   (match_dup 4)]
 	  UNSPEC_SEL))]
-  "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
+  "TARGET_SVE"
   "@
    <sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
    movprfx\t%0, %4\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
@@ -6466,15 +6973,35 @@
   [(set_attr "movprfx" "*,yes")]
 )
 
+(define_insn "*cond_<optab><mode>_4_strict"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (unspec:SVE_FULL_F
+	     [(match_dup 1)
+	      (const_int SVE_STRICT_GP)
+	      (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
+	      (match_operand:SVE_FULL_F 3 "register_operand" "w, w")
+	      (match_operand:SVE_FULL_F 4 "register_operand" "0, w")]
+	     SVE_COND_FP_TERNARY)
+	   (match_dup 4)]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+  "@
+   <sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
+   movprfx\t%0, %4\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
+  [(set_attr "movprfx" "*,yes")]
+)
+
 ;; Predicated floating-point ternary operations, merging with an
 ;; independent value.
-(define_insn_and_rewrite "*cond_<optab><mode>_any"
+(define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed"
   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, &w, ?&w")
 	(unspec:SVE_FULL_F
 	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
 	   (unspec:SVE_FULL_F
 	     [(match_operand 6)
-	      (match_operand:SI 7 "aarch64_sve_gp_strictness")
+	      (const_int SVE_RELAXED_GP)
 	      (match_operand:SVE_FULL_F 2 "register_operand" "w, w, 0, w, w, w")
 	      (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, 0, w, w")
 	      (match_operand:SVE_FULL_F 4 "register_operand" "w, 0, w, w, w, w")]
@@ -6484,8 +7011,7 @@
   "TARGET_SVE
    && !rtx_equal_p (operands[2], operands[5])
    && !rtx_equal_p (operands[3], operands[5])
-   && !rtx_equal_p (operands[4], operands[5])
-   && aarch64_sve_pred_dominates_p (&operands[6], operands[1])"
+   && !rtx_equal_p (operands[4], operands[5])"
   "@
    movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
@@ -6511,6 +7037,41 @@
   [(set_attr "movprfx" "yes")]
 )
 
+(define_insn_and_rewrite "*cond_<optab><mode>_any_strict"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, &w, ?&w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
+	   (unspec:SVE_FULL_F
+	     [(match_dup 1)
+	      (const_int SVE_STRICT_GP)
+	      (match_operand:SVE_FULL_F 2 "register_operand" "w, w, 0, w, w, w")
+	      (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, 0, w, w")
+	      (match_operand:SVE_FULL_F 4 "register_operand" "w, 0, w, w, w, w")]
+	     SVE_COND_FP_TERNARY)
+	   (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, Dz, 0, w")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE
+   && !rtx_equal_p (operands[2], operands[5])
+   && !rtx_equal_p (operands[3], operands[5])
+   && !rtx_equal_p (operands[4], operands[5])"
+  "@
+   movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
+   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
+   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
+   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %4.<Vetype>
+   movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
+   #"
+  "&& reload_completed
+   && register_operand (operands[5], <MODE>mode)
+   && !rtx_equal_p (operands[0], operands[5])"
+  {
+    emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
+					     operands[5], operands[1]));
+    operands[5] = operands[4] = operands[0];
+  }
+  [(set_attr "movprfx" "yes")]
+)
+
 ;; Unpredicated FMLA and FMLS by selected lanes.  It doesn't seem worth using
 ;; (fma ...) since target-independent code won't understand the indexing.
 (define_insn "@aarch64_<optab>_lane_<mode>"
@@ -6572,20 +7133,20 @@
 )
 
 ;; Predicated FCMLA, merging with the third input.
-(define_insn_and_rewrite "*cond_<optab><mode>_4"
+(define_insn_and_rewrite "*cond_<optab><mode>_4_relaxed"
   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
 	(unspec:SVE_FULL_F
 	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
 	   (unspec:SVE_FULL_F
 	     [(match_operand 5)
-	      (match_operand:SI 6 "aarch64_sve_gp_strictness")
+	      (const_int SVE_RELAXED_GP)
 	      (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
 	      (match_operand:SVE_FULL_F 3 "register_operand" "w, w")
 	      (match_operand:SVE_FULL_F 4 "register_operand" "0, w")]
 	     SVE_COND_FCMLA)
 	   (match_dup 4)]
 	  UNSPEC_SEL))]
-  "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
+  "TARGET_SVE"
   "@
    fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
    movprfx\t%0, %4\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>"
@@ -6596,23 +7157,41 @@
   [(set_attr "movprfx" "*,yes")]
 )
 
+(define_insn "*cond_<optab><mode>_4_strict"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (unspec:SVE_FULL_F
+	     [(match_dup 1)
+	      (const_int SVE_STRICT_GP)
+	      (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
+	      (match_operand:SVE_FULL_F 3 "register_operand" "w, w")
+	      (match_operand:SVE_FULL_F 4 "register_operand" "0, w")]
+	     SVE_COND_FCMLA)
+	   (match_dup 4)]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+  "@
+   fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
+   movprfx\t%0, %4\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>"
+  [(set_attr "movprfx" "*,yes")]
+)
+
 ;; Predicated FCMLA, merging with an independent value.
-(define_insn_and_rewrite "*cond_<optab><mode>_any"
+(define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed"
   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w")
 	(unspec:SVE_FULL_F
 	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
 	   (unspec:SVE_FULL_F
 	     [(match_operand 6)
-	      (match_operand:SI 7 "aarch64_sve_gp_strictness")
+	      (const_int SVE_RELAXED_GP)
 	      (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w, w")
 	      (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, w")
 	      (match_operand:SVE_FULL_F 4 "register_operand" "w, 0, w, w")]
 	     SVE_COND_FCMLA)
 	   (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")]
 	  UNSPEC_SEL))]
-  "TARGET_SVE
-   && !rtx_equal_p (operands[4], operands[5])
-   && aarch64_sve_pred_dominates_p (&operands[6], operands[1])"
+  "TARGET_SVE && !rtx_equal_p (operands[4], operands[5])"
   "@
    movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
@@ -6636,6 +7215,36 @@
   [(set_attr "movprfx" "yes")]
 )
 
+(define_insn_and_rewrite "*cond_<optab><mode>_any_strict"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
+	   (unspec:SVE_FULL_F
+	     [(match_dup 1)
+	      (const_int SVE_STRICT_GP)
+	      (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w, w")
+	      (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, w")
+	      (match_operand:SVE_FULL_F 4 "register_operand" "w, 0, w, w")]
+	     SVE_COND_FCMLA)
+	   (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE && !rtx_equal_p (operands[4], operands[5])"
+  "@
+   movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
+   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
+   movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
+   #"
+  "&& reload_completed
+   && register_operand (operands[5], <MODE>mode)
+   && !rtx_equal_p (operands[0], operands[5])"
+  {
+    emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
+					     operands[5], operands[1]));
+    operands[5] = operands[4] = operands[0];
+  }
+  [(set_attr "movprfx" "yes")]
+)
+
 ;; Unpredicated FCMLA with indexing.
 (define_insn "@aarch64_<optab>_lane_<mode>"
   [(set (match_operand:SVE_FULL_HSF 0 "register_operand" "=w, ?&w")
@@ -7328,34 +7937,52 @@
   "TARGET_SVE"
 )
 
-(define_insn_and_rewrite "*aarch64_pred_fac<cmp_op><mode>"
+(define_insn_and_rewrite "*aarch64_pred_fac<cmp_op><mode>_relaxed"
   [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
 	(unspec:<VPRED>
 	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
 	   (match_operand:SI 4 "aarch64_sve_ptrue_flag")
 	   (unspec:SVE_FULL_F
 	     [(match_operand 5)
-	      (match_operand:SI 6 "aarch64_sve_gp_strictness")
+	      (const_int SVE_RELAXED_GP)
 	      (match_operand:SVE_FULL_F 2 "register_operand" "w")]
 	     UNSPEC_COND_FABS)
 	   (unspec:SVE_FULL_F
-	     [(match_operand 7)
-	      (match_operand:SI 8 "aarch64_sve_gp_strictness")
+	     [(match_operand 6)
+	      (const_int SVE_RELAXED_GP)
 	      (match_operand:SVE_FULL_F 3 "register_operand" "w")]
 	     UNSPEC_COND_FABS)]
 	  SVE_COND_FP_ABS_CMP))]
-  "TARGET_SVE
-   && aarch64_sve_pred_dominates_p (&operands[5], operands[1])
-   && aarch64_sve_pred_dominates_p (&operands[7], operands[1])"
+  "TARGET_SVE"
   "fac<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
   "&& (!rtx_equal_p (operands[1], operands[5])
-       || !rtx_equal_p (operands[1], operands[7]))"
+       || !rtx_equal_p (operands[1], operands[6]))"
   {
     operands[5] = copy_rtx (operands[1]);
-    operands[7] = copy_rtx (operands[1]);
+    operands[6] = copy_rtx (operands[1]);
   }
 )
 
+(define_insn "*aarch64_pred_fac<cmp_op><mode>_strict"
+  [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
+	(unspec:<VPRED>
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
+	   (match_operand:SI 4 "aarch64_sve_ptrue_flag")
+	   (unspec:SVE_FULL_F
+	     [(match_dup 1)
+	      (match_operand:SI 5 "aarch64_sve_gp_strictness")
+	      (match_operand:SVE_FULL_F 2 "register_operand" "w")]
+	     UNSPEC_COND_FABS)
+	   (unspec:SVE_FULL_F
+	     [(match_dup 1)
+	      (match_operand:SI 6 "aarch64_sve_gp_strictness")
+	      (match_operand:SVE_FULL_F 3 "register_operand" "w")]
+	     UNSPEC_COND_FABS)]
+	  SVE_COND_FP_ABS_CMP))]
+  "TARGET_SVE"
+  "fac<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
+)
+
 ;; -------------------------------------------------------------------------
 ;; ---- [PRED] Select
 ;; -------------------------------------------------------------------------
@@ -7937,20 +8564,18 @@
 ;; the same register (despite having different modes).  Making all the
 ;; alternatives earlyclobber makes things more consistent for the
 ;; register allocator.
-(define_insn_and_rewrite "*cond_<optab>_nontrunc<SVE_FULL_F:mode><SVE_FULL_HSDI:mode>"
+(define_insn_and_rewrite "*cond_<optab>_nontrunc<SVE_FULL_F:mode><SVE_FULL_HSDI:mode>_relaxed"
   [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=&w, &w, ?&w")
 	(unspec:SVE_FULL_HSDI
 	  [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl, Upl")
 	   (unspec:SVE_FULL_HSDI
 	     [(match_operand 4)
-	      (match_operand:SI 5 "aarch64_sve_gp_strictness")
+	      (const_int SVE_RELAXED_GP)
 	      (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")]
 	     SVE_COND_FCVTI)
 	   (match_operand:SVE_FULL_HSDI 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
 	  UNSPEC_SEL))]
-  "TARGET_SVE
-   && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>
-   && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
+  "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
   "@
    fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>
    movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>
@@ -7962,6 +8587,25 @@
   [(set_attr "movprfx" "*,yes,yes")]
 )
 
+(define_insn "*cond_<optab>_nontrunc<SVE_FULL_F:mode><SVE_FULL_HSDI:mode>_strict"
+  [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=&w, &w, ?&w")
+	(unspec:SVE_FULL_HSDI
+	  [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl, Upl")
+	   (unspec:SVE_FULL_HSDI
+	     [(match_dup 1)
+	      (const_int SVE_STRICT_GP)
+	      (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")]
+	     SVE_COND_FCVTI)
+	   (match_operand:SVE_FULL_HSDI 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
+  "@
+   fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>
+   movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>
+   movprfx\t%0, %3\;fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>"
+  [(set_attr "movprfx" "*,yes,yes")]
+)
+
 ;; Predicated narrowing float-to-integer conversion with merging.
 (define_expand "@cond_<optab>_trunc<VNx2DF_ONLY:mode><VNx4SI_ONLY:mode>"
   [(set (match_operand:VNx4SI_ONLY 0 "register_operand")
@@ -8101,20 +8745,18 @@
 ;; the same register (despite having different modes).  Making all the
 ;; alternatives earlyclobber makes things more consistent for the
 ;; register allocator.
-(define_insn_and_rewrite "*cond_<optab>_nonextend<SVE_FULL_HSDI:mode><SVE_FULL_F:mode>"
+(define_insn_and_rewrite "*cond_<optab>_nonextend<SVE_FULL_HSDI:mode><SVE_FULL_F:mode>_relaxed"
   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, ?&w")
 	(unspec:SVE_FULL_F
 	  [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl, Upl")
 	   (unspec:SVE_FULL_F
 	     [(match_operand 4)
-	      (match_operand:SI 5 "aarch64_sve_gp_strictness")
+	      (const_int SVE_RELAXED_GP)
 	      (match_operand:SVE_FULL_HSDI 2 "register_operand" "w, w, w")]
 	     SVE_COND_ICVTF)
 	   (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
 	  UNSPEC_SEL))]
-  "TARGET_SVE
-   && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>
-   && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
+  "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
   "@
    <su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
    movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
@@ -8126,6 +8768,25 @@
   [(set_attr "movprfx" "*,yes,yes")]
 )
 
+(define_insn "*cond_<optab>_nonextend<SVE_FULL_HSDI:mode><SVE_FULL_F:mode>_strict"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, ?&w")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl, Upl")
+	   (unspec:SVE_FULL_F
+	     [(match_dup 1)
+	      (const_int SVE_STRICT_GP)
+	      (match_operand:SVE_FULL_HSDI 2 "register_operand" "w, w, w")]
+	     SVE_COND_ICVTF)
+	   (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
+  "@
+   <su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
+   movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
+   movprfx\t%0, %3\;<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>"
+  [(set_attr "movprfx" "*,yes,yes")]
+)
+
 ;; Predicated widening integer-to-float conversion with merging.
 (define_expand "@cond_<optab>_extend<VNx4SI_ONLY:mode><VNx2DF_ONLY:mode>"
   [(set (match_operand:VNx2DF_ONLY 0 "register_operand")
diff --git a/gcc/config/aarch64/aarch64-sve2.md b/gcc/config/aarch64/aarch64-sve2.md
index e18b9fe..0cafd0b 100644
--- a/gcc/config/aarch64/aarch64-sve2.md
+++ b/gcc/config/aarch64/aarch64-sve2.md
@@ -1890,18 +1890,18 @@
 )
 
 ;; These instructions do not take MOVPRFX.
-(define_insn_and_rewrite "*cond_<sve_fp_op><mode>"
+(define_insn_and_rewrite "*cond_<sve_fp_op><mode>_relaxed"
   [(set (match_operand:SVE_FULL_SDF 0 "register_operand" "=w")
 	(unspec:SVE_FULL_SDF
 	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
 	   (unspec:SVE_FULL_SDF
 	     [(match_operand 4)
-	      (match_operand:SI 5 "aarch64_sve_gp_strictness")
+	      (const_int SVE_RELAXED_GP)
 	      (match_operand:<VNARROW> 2 "register_operand" "w")]
 	     SVE2_COND_FP_UNARY_LONG)
 	   (match_operand:SVE_FULL_SDF 3 "register_operand" "0")]
 	  UNSPEC_SEL))]
-  "TARGET_SVE2 && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
+  "TARGET_SVE2"
   "<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Ventype>"
   "&& !rtx_equal_p (operands[1], operands[4])"
   {
@@ -1909,6 +1909,21 @@
   }
 )
 
+(define_insn "*cond_<sve_fp_op><mode>_strict"
+  [(set (match_operand:SVE_FULL_SDF 0 "register_operand" "=w")
+	(unspec:SVE_FULL_SDF
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
+	   (unspec:SVE_FULL_SDF
+	     [(match_dup 1)
+	      (const_int SVE_STRICT_GP)
+	      (match_operand:<VNARROW> 2 "register_operand" "w")]
+	     SVE2_COND_FP_UNARY_LONG)
+	   (match_operand:SVE_FULL_SDF 3 "register_operand" "0")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE2"
+  "<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Ventype>"
+)
+
 ;; -------------------------------------------------------------------------
 ;; ---- [FP<-FP] Narrowing conversions
 ;; -------------------------------------------------------------------------
@@ -1963,20 +1978,18 @@
   "TARGET_SVE2"
 )
 
-(define_insn_and_rewrite "*cond_<sve_fp_op><mode>_any"
+(define_insn_and_rewrite "*cond_<sve_fp_op><mode>_any_relaxed"
   [(set (match_operand:VNx4SF_ONLY 0 "register_operand" "=&w, &w, &w")
 	(unspec:VNx4SF_ONLY
 	  [(match_operand:<VWIDE_PRED> 1 "register_operand" "Upl, Upl, Upl")
 	   (unspec:VNx4SF_ONLY
 	     [(match_operand 4)
-	      (match_operand:SI 5 "aarch64_sve_gp_strictness")
+	      (const_int SVE_RELAXED_GP)
 	      (match_operand:<VWIDE> 2 "register_operand" "w, w, w")]
 	     SVE2_COND_FP_UNARY_NARROWB)
 	   (match_operand:VNx4SF_ONLY 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
 	  UNSPEC_SEL))]
-  "TARGET_SVE2
-   && !rtx_equal_p (operands[2], operands[3])
-   && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
+  "TARGET_SVE2 && !rtx_equal_p (operands[2], operands[3])"
   "@
    <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype>
    movprfx\t%0.<Vewtype>, %1/z, %2.<Vewtype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype>
@@ -1988,6 +2001,25 @@
   [(set_attr "movprfx" "*,yes,yes")]
 )
 
+(define_insn "*cond_<sve_fp_op><mode>_any_strict"
+  [(set (match_operand:VNx4SF_ONLY 0 "register_operand" "=&w, &w, &w")
+	(unspec:VNx4SF_ONLY
+	  [(match_operand:<VWIDE_PRED> 1 "register_operand" "Upl, Upl, Upl")
+	   (unspec:VNx4SF_ONLY
+	     [(match_dup 1)
+	      (const_int SVE_STRICT_GP)
+	      (match_operand:<VWIDE> 2 "register_operand" "w, w, w")]
+	     SVE2_COND_FP_UNARY_NARROWB)
+	   (match_operand:VNx4SF_ONLY 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE2 && !rtx_equal_p (operands[2], operands[3])"
+  "@
+   <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype>
+   movprfx\t%0.<Vewtype>, %1/z, %2.<Vewtype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype>
+   movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype>"
+  [(set_attr "movprfx" "*,yes,yes")]
+)
+
 ;; Predicated FCVTXNT.  This doesn't give a natural aarch64_pred_*/cond_*
 ;; pair because the even elements always have to be supplied for active
 ;; elements, even if the inactive elements don't matter.
@@ -2113,14 +2145,12 @@
 	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
 	   (unspec:<V_INT_EQUIV>
 	     [(match_operand 4)
-	      (match_operand:SI 5 "aarch64_sve_gp_strictness")
+	      (const_int SVE_RELAXED_GP)
 	      (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")]
 	     SVE2_COND_INT_UNARY_FP)
 	   (match_operand:<V_INT_EQUIV> 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
 	  UNSPEC_SEL))]
-  "TARGET_SVE2
-   && !rtx_equal_p (operands[2], operands[3])
-   && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
+  "TARGET_SVE2 && !rtx_equal_p (operands[2], operands[3])"
   "@
    <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
@@ -2132,6 +2162,25 @@
   [(set_attr "movprfx" "*,yes,yes")]
 )
 
+(define_insn "*cond_<sve_fp_op><mode>_strict"
+  [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=&w, ?&w, ?&w")
+	(unspec:<V_INT_EQUIV>
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
+	   (unspec:<V_INT_EQUIV>
+	     [(match_dup 1)
+	      (const_int SVE_STRICT_GP)
+	      (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")]
+	     SVE2_COND_INT_UNARY_FP)
+	   (match_operand:<V_INT_EQUIV> 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE2 && !rtx_equal_p (operands[2], operands[3])"
+  "@
+   <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
+   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
+   movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
+  [(set_attr "movprfx" "*,yes,yes")]
+)
+
 ;; -------------------------------------------------------------------------
 ;; ---- [INT] Polynomial multiplication
 ;; -------------------------------------------------------------------------
diff --git a/gcc/config/aarch64/aarch64-tune.md b/gcc/config/aarch64/aarch64-tune.md
index 0e3239c..e060302 100644
--- a/gcc/config/aarch64/aarch64-tune.md
+++ b/gcc/config/aarch64/aarch64-tune.md
@@ -1,5 +1,5 @@
 ;; -*- buffer-read-only: t -*-
 ;; Generated automatically by gentune.sh from aarch64-cores.def
 (define_attr "tune"
-	"cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa65,cortexa65ae,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,thunderx3t110,zeus,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82"
+	"cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa65,cortexa65ae,cortexx1,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,thunderx3t110,zeus,neoversev1,saphira,neoversen2,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82"
 	(const (symbol_ref "((enum attr_tune) aarch64_tune)")))
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index b251f39..a8cc545 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -1336,6 +1336,58 @@ static const struct tune_params neoversen1_tunings =
   &generic_prefetch_tune
 };
 
+static const struct tune_params neoversev1_tunings =
+{
+  &cortexa57_extra_costs,
+  &generic_addrcost_table,
+  &generic_regmove_cost,
+  &cortexa57_vector_cost,
+  &generic_branch_cost,
+  &generic_approx_modes,
+  SVE_256, /* sve_width  */
+  4, /* memmov_cost  */
+  3, /* issue_rate  */
+  (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_CMP_BRANCH), /* fusible_ops  */
+  "32:16",	/* function_align.  */
+  "4",		/* jump_align.  */
+  "32:16",	/* loop_align.  */
+  2,	/* int_reassoc_width.  */
+  4,	/* fp_reassoc_width.  */
+  2,	/* vec_reassoc_width.  */
+  2,	/* min_div_recip_mul_sf.  */
+  2,	/* min_div_recip_mul_df.  */
+  0,	/* max_case_values.  */
+  tune_params::AUTOPREFETCHER_WEAK,	/* autoprefetcher_model.  */
+  (AARCH64_EXTRA_TUNE_NONE),	/* tune_flags.  */
+  &generic_prefetch_tune
+};
+
+static const struct tune_params neoversen2_tunings =
+{
+  &cortexa57_extra_costs,
+  &generic_addrcost_table,
+  &generic_regmove_cost,
+  &cortexa57_vector_cost,
+  &generic_branch_cost,
+  &generic_approx_modes,
+  SVE_128, /* sve_width  */
+  4, /* memmov_cost  */
+  3, /* issue_rate  */
+  (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_CMP_BRANCH), /* fusible_ops  */
+  "32:16",	/* function_align.  */
+  "4",		/* jump_align.  */
+  "32:16",	/* loop_align.  */
+  2,	/* int_reassoc_width.  */
+  4,	/* fp_reassoc_width.  */
+  2,	/* vec_reassoc_width.  */
+  2,	/* min_div_recip_mul_sf.  */
+  2,	/* min_div_recip_mul_df.  */
+  0,	/* max_case_values.  */
+  tune_params::AUTOPREFETCHER_WEAK,	/* autoprefetcher_model.  */
+  (AARCH64_EXTRA_TUNE_NONE),	/* tune_flags.  */
+  &generic_prefetch_tune
+};
+
 static const struct tune_params a64fx_tunings =
 {
   &generic_extra_costs,
@@ -1935,6 +1987,29 @@ aarch64_sve_abi (void)
   return sve_abi;
 }
 
+/* If X is an UNSPEC_SALT_ADDR expression, return the address that it
+   wraps, otherwise return X itself.  */
+
+static rtx
+strip_salt (rtx x)
+{
+  rtx search = x;
+  if (GET_CODE (search) == CONST)
+    search = XEXP (search, 0);
+  if (GET_CODE (search) == UNSPEC && XINT (search, 1) == UNSPEC_SALT_ADDR)
+    x = XVECEXP (search, 0, 0);
+  return x;
+}
+
+/* Like strip_offset, but also strip any UNSPEC_SALT_ADDR from the
+   expression.  */
+
+static rtx
+strip_offset_and_salt (rtx addr, poly_int64 *offset)
+{
+  return strip_salt (strip_offset (addr, offset));
+}
+
 /* Generate code to enable conditional branches in functions over 1 MiB.  */
 const char *
 aarch64_gen_far_branch (rtx * operands, int pos_label, const char * dest,
@@ -2932,14 +3007,9 @@ static enum tls_model
 tls_symbolic_operand_type (rtx addr)
 {
   enum tls_model tls_kind = TLS_MODEL_NONE;
-  if (GET_CODE (addr) == CONST)
-    {
-      poly_int64 addend;
-      rtx sym = strip_offset (addr, &addend);
-      if (GET_CODE (sym) == SYMBOL_REF)
-	tls_kind = SYMBOL_REF_TLS_MODEL (sym);
-    }
-  else if (GET_CODE (addr) == SYMBOL_REF)
+  poly_int64 offset;
+  addr = strip_offset_and_salt (addr, &offset);
+  if (GET_CODE (addr) == SYMBOL_REF)
     tls_kind = SYMBOL_REF_TLS_MODEL (addr);
 
   return tls_kind;
@@ -3404,11 +3474,16 @@ aarch64_split_128bit_move (rtx dst, rtx src)
     }
 }
 
+/* Return true if we should split a move from 128-bit value SRC
+   to 128-bit register DEST.  */
+
 bool
 aarch64_split_128bit_move_p (rtx dst, rtx src)
 {
-  return (! REG_P (src)
-	  || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
+  if (FP_REGNUM_P (REGNO (dst)))
+    return REG_P (src) && !FP_REGNUM_P (REGNO (src));
+  /* All moves to GPRs need to be split.  */
+  return true;
 }
 
 /* Split a complex SIMD combine.  */
@@ -3694,24 +3769,6 @@ aarch64_pfalse_reg (machine_mode mode)
   return gen_lowpart (mode, reg);
 }
 
-/* Return true if predicate PRED1[0] is true whenever predicate PRED2 is
-   true, or alternatively if we know that the operation predicated by
-   PRED1[0] is safe to perform whenever PRED2 is true.  PRED1[1] is a
-   aarch64_sve_gp_strictness operand that describes the operation
-   predicated by PRED1[0].  */
-
-bool
-aarch64_sve_pred_dominates_p (rtx *pred1, rtx pred2)
-{
-  machine_mode mode = GET_MODE (pred2);
-  gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL
-	      && mode == GET_MODE (pred1[0])
-	      && aarch64_sve_gp_strictness (pred1[1], SImode));
-  return (pred1[0] == CONSTM1_RTX (mode)
-	  || INTVAL (pred1[1]) == SVE_RELAXED_GP
-	  || rtx_equal_p (pred1[0], pred2));
-}
-
 /* PRED1[0] is a PTEST predicate and PRED1[1] is an aarch64_sve_ptrue_flag
    for it.  PRED2[0] is the predicate for the instruction whose result
    is tested by the PTEST and PRED2[1] is again an aarch64_sve_ptrue_flag
@@ -5239,6 +5296,48 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm)
 				  as_a <scalar_int_mode> (mode));
 }
 
+/* Return the MEM rtx that provides the canary value that should be used
+   for stack-smashing protection.  MODE is the mode of the memory.
+   For SSP_GLOBAL, DECL_RTL is the MEM rtx for the canary variable
+   (__stack_chk_guard), otherwise it has no useful value.  SALT_TYPE
+   indicates whether the caller is performing a SET or a TEST operation.  */
+
+rtx
+aarch64_stack_protect_canary_mem (machine_mode mode, rtx decl_rtl,
+				  aarch64_salt_type salt_type)
+{
+  rtx addr;
+  if (aarch64_stack_protector_guard == SSP_GLOBAL)
+    {
+      gcc_assert (MEM_P (decl_rtl));
+      addr = XEXP (decl_rtl, 0);
+      poly_int64 offset;
+      rtx base = strip_offset_and_salt (addr, &offset);
+      if (!SYMBOL_REF_P (base))
+	return decl_rtl;
+
+      rtvec v = gen_rtvec (2, base, GEN_INT (salt_type));
+      addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_SALT_ADDR);
+      addr = gen_rtx_CONST (Pmode, addr);
+      addr = plus_constant (Pmode, addr, offset);
+    }
+  else
+    {
+      /* Calculate the address from the system register.  */
+      rtx salt = GEN_INT (salt_type);
+      addr = gen_reg_rtx (mode);
+      if (mode == DImode)
+	emit_insn (gen_reg_stack_protect_address_di (addr, salt));
+      else
+	{
+	  emit_insn (gen_reg_stack_protect_address_si (addr, salt));
+	  addr = convert_memory_address (Pmode, addr);
+	}
+      addr = plus_constant (Pmode, addr, aarch64_stack_protector_guard_offset);
+    }
+  return gen_rtx_MEM (mode, force_reg (Pmode, addr));
+}
+
 /* Emit an SVE predicated move from SRC to DEST.  PRED is a predicate
    that is known to contain PTRUE.  */
 
@@ -8677,8 +8776,6 @@ aarch64_move_imm (HOST_WIDE_INT val, machine_mode mode)
 static bool
 aarch64_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
 {
-  rtx base, offset;
-
   if (GET_CODE (x) == HIGH)
     return true;
 
@@ -8688,10 +8785,12 @@ aarch64_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
     if (GET_CODE (*iter) == CONST_POLY_INT)
       return true;
 
-  split_const (x, &base, &offset);
+  poly_int64 offset;
+  rtx base = strip_offset_and_salt (x, &offset);
   if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
     {
-      if (aarch64_classify_symbol (base, INTVAL (offset))
+      /* We checked for POLY_INT_CST offsets above.  */
+      if (aarch64_classify_symbol (base, offset.to_constant ())
 	  != SYMBOL_FORCE_TO_MEM)
 	return true;
       else
@@ -9217,9 +9316,8 @@ aarch64_classify_address (struct aarch64_address_info *info,
 	  && GET_MODE_SIZE (mode).is_constant (&const_size)
 	  && const_size >= 4)
 	{
-	  rtx sym, addend;
-
-	  split_const (x, &sym, &addend);
+	  poly_int64 offset;
+	  rtx sym = strip_offset_and_salt (x, &offset);
 	  return ((GET_CODE (sym) == LABEL_REF
 		   || (GET_CODE (sym) == SYMBOL_REF
 		       && CONSTANT_POOL_ADDRESS_P (sym)
@@ -9234,10 +9332,12 @@ aarch64_classify_address (struct aarch64_address_info *info,
       if (allow_reg_index_p
 	  && aarch64_base_register_rtx_p (info->base, strict_p))
 	{
-	  rtx sym, offs;
-	  split_const (info->offset, &sym, &offs);
+	  poly_int64 offset;
+	  HOST_WIDE_INT const_offset;
+	  rtx sym = strip_offset_and_salt (info->offset, &offset);
 	  if (GET_CODE (sym) == SYMBOL_REF
-	      && (aarch64_classify_symbol (sym, INTVAL (offs))
+	      && offset.is_constant (&const_offset)
+	      && (aarch64_classify_symbol (sym, const_offset)
 		  == SYMBOL_SMALL_ABSOLUTE))
 	    {
 	      /* The symbol and offset must be aligned to the access size.  */
@@ -9263,7 +9363,7 @@ aarch64_classify_address (struct aarch64_address_info *info,
 	      if (known_eq (ref_size, 0))
 		ref_size = GET_MODE_SIZE (DImode);
 
-	      return (multiple_p (INTVAL (offs), ref_size)
+	      return (multiple_p (const_offset, ref_size)
 		      && multiple_p (align / BITS_PER_UNIT, ref_size));
 	    }
 	}
@@ -9295,9 +9395,8 @@ aarch64_address_valid_for_prefetch_p (rtx x, bool strict_p)
 bool
 aarch64_symbolic_address_p (rtx x)
 {
-  rtx offset;
-
-  split_const (x, &x, &offset);
+  poly_int64 offset;
+  x = strip_offset_and_salt (x, &offset);
   return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
 }
 
@@ -10028,27 +10127,16 @@ aarch64_print_operand (FILE *f, rtx x, int code)
   switch (code)
     {
     case 'c':
-      switch (GET_CODE (x))
+      if (CONST_INT_P (x))
+	fprintf (f, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
+      else
 	{
-	case CONST_INT:
-	  fprintf (f, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
-	  break;
-
-	case SYMBOL_REF:
-	  output_addr_const (f, x);
-	  break;
-
-	case CONST:
-	  if (GET_CODE (XEXP (x, 0)) == PLUS
-	      && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
-	    {
-	      output_addr_const (f, x);
-	      break;
-	    }
-	  /* Fall through.  */
-
-	default:
-	  output_operand_lossage ("unsupported operand for code '%c'", code);
+	  poly_int64 offset;
+	  rtx base = strip_offset_and_salt (x, &offset);
+	  if (SYMBOL_REF_P (base))
+	    output_addr_const (f, x);
+	  else
+	    output_operand_lossage ("unsupported operand for code '%c'", code);
 	}
       break;
 
@@ -10623,6 +10711,19 @@ aarch64_print_operand_address (FILE *f, machine_mode mode, rtx x)
     output_addr_const (f, x);
 }
 
+/* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA.  */
+
+static bool
+aarch64_output_addr_const_extra (FILE *file, rtx x)
+{
+  if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SALT_ADDR)
+    {
+      output_addr_const (file, XVECEXP (x, 0, 0));
+      return true;
+   }
+  return false;
+}
+
 bool
 aarch64_label_mentioned_p (rtx x)
 {
@@ -15932,6 +16033,7 @@ aarch64_tls_symbol_p (rtx x)
   if (! TARGET_HAVE_TLS)
     return false;
 
+  x = strip_salt (x);
   if (GET_CODE (x) != SYMBOL_REF)
     return false;
 
@@ -15987,6 +16089,8 @@ aarch64_classify_tls_symbol (rtx x)
 enum aarch64_symbol_type
 aarch64_classify_symbol (rtx x, HOST_WIDE_INT offset)
 {
+  x = strip_salt (x);
+
   if (GET_CODE (x) == LABEL_REF)
     {
       switch (aarch64_cmodel)
@@ -16086,11 +16190,10 @@ aarch64_constant_address_p (rtx x)
 bool
 aarch64_legitimate_pic_operand_p (rtx x)
 {
-  if (GET_CODE (x) == SYMBOL_REF
-      || (GET_CODE (x) == CONST
-	  && GET_CODE (XEXP (x, 0)) == PLUS
-	  && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
-     return false;
+  poly_int64 offset;
+  x = strip_offset_and_salt (x, &offset);
+  if (GET_CODE (x) == SYMBOL_REF)
+    return false;
 
   return true;
 }
@@ -16136,7 +16239,7 @@ aarch64_legitimate_constant_p (machine_mode mode, rtx x)
   /* If an offset is being added to something else, we need to allow the
      base to be moved into the destination register, meaning that there
      are no free temporaries for the offset.  */
-  x = strip_offset (x, &offset);
+  x = strip_offset_and_salt (x, &offset);
   if (!offset.is_constant () && aarch64_offset_temporaries (true, offset) > 0)
     return false;
 
@@ -18035,6 +18138,7 @@ aarch64_mov_operand_p (rtx x, machine_mode mode)
       return aarch64_simd_valid_immediate (x, NULL);
     }
 
+  x = strip_salt (x);
   if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
     return true;
 
@@ -23890,6 +23994,9 @@ aarch64_libgcc_floating_mode_supported_p
 #undef TARGET_PRINT_OPERAND_ADDRESS
 #define TARGET_PRINT_OPERAND_ADDRESS aarch64_print_operand_address
 
+#undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
+#define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA aarch64_output_addr_const_extra
+
 #undef TARGET_OPTAB_SUPPORTED_P
 #define TARGET_OPTAB_SUPPORTED_P aarch64_optab_supported_p
 
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index dbc6b1d..78fe7c43 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -281,6 +281,7 @@
     UNSPEC_GEN_TAG_RND		; Generate a random 4-bit MTE tag.
     UNSPEC_TAG_SPACE		; Translate address to MTE tag address space.
     UNSPEC_LD1RO
+    UNSPEC_SALT_ADDR
 ])
 
 (define_c_enum "unspecv" [
@@ -1360,13 +1361,14 @@
 
 (define_insn "*movti_aarch64"
   [(set (match_operand:TI 0
-	 "nonimmediate_operand"  "=   r,w, r,w,r,m,m,w,m")
+	 "nonimmediate_operand"  "=   r,w,w, r,w,r,m,m,w,m")
 	(match_operand:TI 1
-	 "aarch64_movti_operand" " rUti,r, w,w,m,r,Z,m,w"))]
+	 "aarch64_movti_operand" " rUti,Z,r, w,w,m,r,Z,m,w"))]
   "(register_operand (operands[0], TImode)
     || aarch64_reg_or_zero (operands[1], TImode))"
   "@
    #
+   movi\\t%0.2d, #0
    #
    #
    mov\\t%0.16b, %1.16b
@@ -1375,11 +1377,11 @@
    stp\\txzr, xzr, %0
    ldr\\t%q0, %1
    str\\t%q1, %0"
-  [(set_attr "type" "multiple,f_mcr,f_mrc,neon_logic_q, \
+  [(set_attr "type" "multiple,neon_move,f_mcr,f_mrc,neon_logic_q, \
 		             load_16,store_16,store_16,\
                              load_16,store_16")
-   (set_attr "length" "8,8,8,4,4,4,4,4,4")
-   (set_attr "arch" "*,*,*,simd,*,*,*,fp,fp")]
+   (set_attr "length" "8,4,8,8,4,4,4,4,4,4")
+   (set_attr "arch" "*,simd,*,*,simd,*,*,*,fp,fp")]
 )
 
 ;; Split a TImode register-register or register-immediate move into
@@ -1510,9 +1512,9 @@
 
 (define_insn "*movtf_aarch64"
   [(set (match_operand:TF 0
-	 "nonimmediate_operand" "=w,?&r,w ,?r,w,?w,w,m,?r,m ,m")
+	 "nonimmediate_operand" "=w,?r ,w ,?r,w,?w,w,m,?r,m ,m")
 	(match_operand:TF 1
-	 "general_operand"      " w,?r, ?r,w ,Y,Y ,m,w,m ,?r,Y"))]
+	 "general_operand"      " w,?rY,?r,w ,Y,Y ,m,w,m ,?r,Y"))]
   "TARGET_FLOAT && (register_operand (operands[0], TFmode)
     || aarch64_reg_or_fp_zero (operands[1], TFmode))"
   "@
@@ -1535,7 +1537,7 @@
 
 (define_split
    [(set (match_operand:TF 0 "register_operand" "")
-	 (match_operand:TF 1 "aarch64_reg_or_imm" ""))]
+	 (match_operand:TF 1 "nonmemory_operand" ""))]
   "reload_completed && aarch64_split_128bit_move_p (operands[0], operands[1])"
   [(const_int 0)]
   {
@@ -6881,43 +6883,37 @@
   DONE;
 })
 
-;; Named patterns for stack smashing protection.
+;; Defined for -mstack-protector-guard=sysreg, which goes through this
+;; pattern rather than stack_protect_combined_set.  Our implementation
+;; of the latter can handle both.
 (define_expand "stack_protect_set"
   [(match_operand 0 "memory_operand")
-   (match_operand 1 "memory_operand")]
+   (match_operand 1 "")]
   ""
 {
-  machine_mode mode = GET_MODE (operands[0]);
-  if (aarch64_stack_protector_guard != SSP_GLOBAL)
-  {
-    /* Generate access through the system register.  */
-    rtx tmp_reg = gen_reg_rtx (mode);
-    if (mode == DImode)
-    {
-        emit_insn (gen_reg_stack_protect_address_di (tmp_reg));
-        emit_insn (gen_adddi3 (tmp_reg, tmp_reg,
-			       GEN_INT (aarch64_stack_protector_guard_offset)));
-    }
-    else
-    {
-	emit_insn (gen_reg_stack_protect_address_si (tmp_reg));
-	emit_insn (gen_addsi3 (tmp_reg, tmp_reg,
-			       GEN_INT (aarch64_stack_protector_guard_offset)));
+  emit_insn (gen_stack_protect_combined_set (operands[0], operands[1]));
+  DONE;
+})
 
-    }
-    operands[1] = gen_rtx_MEM (mode, tmp_reg);
-  }
-  
+(define_expand "stack_protect_combined_set"
+  [(match_operand 0 "memory_operand")
+   (match_operand 1 "")]
+  ""
+{
+  machine_mode mode = GET_MODE (operands[0]);
+  operands[1] = aarch64_stack_protect_canary_mem (mode, operands[1],
+						  AARCH64_SALT_SSP_SET);
   emit_insn ((mode == DImode
 	      ? gen_stack_protect_set_di
 	      : gen_stack_protect_set_si) (operands[0], operands[1]));
   DONE;
 })
 
+;; Operand 1 is either AARCH64_SALT_SSP_SET or AARCH64_SALT_SSP_TEST.
 (define_insn "reg_stack_protect_address_<mode>"
  [(set (match_operand:PTR 0 "register_operand" "=r")
-       (unspec:PTR [(const_int 0)]
-	UNSPEC_SSP_SYSREG))]
+       (unspec:PTR [(match_operand 1 "const_int_operand")]
+		   UNSPEC_SSP_SYSREG))]
  "aarch64_stack_protector_guard != SSP_GLOBAL"
  {
    char buf[150];
@@ -6940,37 +6936,29 @@
   [(set_attr "length" "12")
    (set_attr "type" "multiple")])
 
+;; Defined for -mstack-protector-guard=sysreg, which goes through this
+;; pattern rather than stack_protect_combined_test.  Our implementation
+;; of the latter can handle both.
 (define_expand "stack_protect_test"
   [(match_operand 0 "memory_operand")
-   (match_operand 1 "memory_operand")
+   (match_operand 1 "")
    (match_operand 2)]
   ""
 {
-  machine_mode mode = GET_MODE (operands[0]);
-
-  if (aarch64_stack_protector_guard != SSP_GLOBAL)
-  {
-    /* Generate access through the system register. The
-       sequence we want here is the access
-       of the stack offset to come with
-       mrs scratch_reg, <system_register>
-       add scratch_reg, scratch_reg, :lo12:offset. */
-    rtx tmp_reg = gen_reg_rtx (mode);
-    if (mode == DImode)
-    {
-       emit_insn (gen_reg_stack_protect_address_di (tmp_reg));
-       emit_insn (gen_adddi3 (tmp_reg, tmp_reg,
-       		              GEN_INT (aarch64_stack_protector_guard_offset)));
-    }
-    else
-    {
-	emit_insn (gen_reg_stack_protect_address_si (tmp_reg));
-	emit_insn (gen_addsi3 (tmp_reg, tmp_reg,
-			       GEN_INT (aarch64_stack_protector_guard_offset)));
+  emit_insn (gen_stack_protect_combined_test (operands[0], operands[1],
+					      operands[2]));
+  DONE;
+})
 
-    }
-    operands[1] = gen_rtx_MEM (mode, tmp_reg);
-  }
+(define_expand "stack_protect_combined_test"
+  [(match_operand 0 "memory_operand")
+   (match_operand 1 "")
+   (match_operand 2)]
+  ""
+{
+  machine_mode mode = GET_MODE (operands[0]);
+  operands[1] = aarch64_stack_protect_canary_mem (mode, operands[1],
+						  AARCH64_SALT_SSP_TEST);
   emit_insn ((mode == DImode
 	     ? gen_stack_protect_test_di
 	     : gen_stack_protect_test_si) (operands[0], operands[1]));
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 50f8b23..85c0d62 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -6088,6 +6088,20 @@ vreinterpretq_u32_p128 (poly128_t __a)
   return (uint32x4_t)__a;
 }
 
+__extension__ extern __inline float64x2_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_f64_p128 (poly128_t __a)
+{
+  return (float64x2_t) __a;
+}
+
+__extension__ extern __inline poly128_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_p128_f64 (float64x2_t __a)
+{
+  return (poly128_t) __a;
+}
+
 /* vset_lane  */
 
 __extension__ extern __inline float16x4_t
@@ -12670,6 +12684,13 @@ vceqq_u64 (uint64x2_t __a, uint64x2_t __b)
   return (__a == __b);
 }
 
+__extension__ extern __inline uint64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vceqq_p64 (poly64x2_t __a, poly64x2_t __b)
+{
+  return (__a == __b);
+}
+
 /* vceq - scalar.  */
 
 __extension__ extern __inline uint32_t
@@ -12779,6 +12800,13 @@ vceqz_u64 (uint64x1_t __a)
   return (__a == __AARCH64_UINT64_C (0));
 }
 
+__extension__ extern __inline uint64x1_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vceqz_p64 (poly64x1_t __a)
+{
+  return (__a == __AARCH64_UINT64_C (0));
+}
+
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vceqzq_f32 (float32x4_t __a)
@@ -12856,6 +12884,13 @@ vceqzq_u64 (uint64x2_t __a)
   return (__a == __AARCH64_UINT64_C (0));
 }
 
+__extension__ extern __inline uint64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vceqzq_p64 (poly64x2_t __a)
+{
+  return (__a == __AARCH64_UINT64_C (0));
+}
+
 /* vceqz - scalar.  */
 
 __extension__ extern __inline uint32_t
@@ -14054,6 +14089,48 @@ vclsq_s32 (int32x4_t __a)
   return __builtin_aarch64_clrsbv4si (__a);
 }
 
+__extension__ extern __inline int8x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcls_u8 (uint8x8_t __a)
+{
+  return __builtin_aarch64_clrsbv8qi ((int8x8_t) __a);
+}
+
+__extension__ extern __inline int16x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcls_u16 (uint16x4_t __a)
+{
+  return __builtin_aarch64_clrsbv4hi ((int16x4_t) __a);
+}
+
+__extension__ extern __inline int32x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vcls_u32 (uint32x2_t __a)
+{
+  return __builtin_aarch64_clrsbv2si ((int32x2_t) __a);
+}
+
+__extension__ extern __inline int8x16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vclsq_u8 (uint8x16_t __a)
+{
+  return __builtin_aarch64_clrsbv16qi ((int8x16_t) __a);
+}
+
+__extension__ extern __inline int16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vclsq_u16 (uint16x8_t __a)
+{
+  return __builtin_aarch64_clrsbv8hi ((int16x8_t) __a);
+}
+
+__extension__ extern __inline int32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vclsq_u32 (uint32x4_t __a)
+{
+  return __builtin_aarch64_clrsbv4si ((int32x4_t) __a);
+}
+
 /* vclz.  */
 
 __extension__ extern __inline int8x8_t
@@ -15538,7 +15615,7 @@ vdupq_n_f64 (float64_t __a)
 
 __extension__ extern __inline poly8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdupq_n_p8 (uint32_t __a)
+vdupq_n_p8 (poly8_t __a)
 {
   return (poly8x16_t) {__a, __a, __a, __a, __a, __a, __a, __a,
 		       __a, __a, __a, __a, __a, __a, __a, __a};
@@ -15546,21 +15623,21 @@ vdupq_n_p8 (uint32_t __a)
 
 __extension__ extern __inline poly16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdupq_n_p16 (uint32_t __a)
+vdupq_n_p16 (poly16_t __a)
 {
   return (poly16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
 }
 
 __extension__ extern __inline poly64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdupq_n_p64 (uint64_t __a)
+vdupq_n_p64 (poly64_t __a)
 {
   return (poly64x2_t) {__a, __a};
 }
 
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdupq_n_s8 (int32_t __a)
+vdupq_n_s8 (int8_t __a)
 {
   return (int8x16_t) {__a, __a, __a, __a, __a, __a, __a, __a,
 		      __a, __a, __a, __a, __a, __a, __a, __a};
@@ -15568,7 +15645,7 @@ vdupq_n_s8 (int32_t __a)
 
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdupq_n_s16 (int32_t __a)
+vdupq_n_s16 (int16_t __a)
 {
   return (int16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
 }
@@ -15589,7 +15666,7 @@ vdupq_n_s64 (int64_t __a)
 
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdupq_n_u8 (uint32_t __a)
+vdupq_n_u8 (uint8_t __a)
 {
   return (uint8x16_t) {__a, __a, __a, __a, __a, __a, __a, __a,
 		       __a, __a, __a, __a, __a, __a, __a, __a};
@@ -15597,7 +15674,7 @@ vdupq_n_u8 (uint32_t __a)
 
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vdupq_n_u16 (uint32_t __a)
+vdupq_n_u16 (uint16_t __a)
 {
   return (uint16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a};
 }
@@ -19613,6 +19690,13 @@ vld4q_p64 (const poly64_t * __a)
   return ret;
 }
 
+__extension__ extern __inline poly128_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vldrq_p128 (const poly128_t * __ptr)
+{
+  return *__ptr;
+}
+
 /* vldn_dup */
 
 __extension__ extern __inline int8x8x2_t
@@ -23962,42 +24046,42 @@ __extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqmovun_s16 (int16x8_t __a)
 {
-  return (uint8x8_t) __builtin_aarch64_sqmovunv8hi (__a);
+  return __builtin_aarch64_sqmovunv8hi_us (__a);
 }
 
 __extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqmovun_s32 (int32x4_t __a)
 {
-  return (uint16x4_t) __builtin_aarch64_sqmovunv4si (__a);
+  return __builtin_aarch64_sqmovunv4si_us (__a);
 }
 
 __extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqmovun_s64 (int64x2_t __a)
 {
-  return (uint32x2_t) __builtin_aarch64_sqmovunv2di (__a);
+  return __builtin_aarch64_sqmovunv2di_us (__a);
 }
 
-__extension__ extern __inline int8_t
+__extension__ extern __inline uint8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqmovunh_s16 (int16_t __a)
 {
-  return (int8_t) __builtin_aarch64_sqmovunhi (__a);
+  return __builtin_aarch64_sqmovunhi_us (__a);
 }
 
-__extension__ extern __inline int16_t
+__extension__ extern __inline uint16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqmovuns_s32 (int32_t __a)
 {
-  return (int16_t) __builtin_aarch64_sqmovunsi (__a);
+  return __builtin_aarch64_sqmovunsi_us (__a);
 }
 
-__extension__ extern __inline int32_t
+__extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqmovund_s64 (int64_t __a)
 {
-  return (int32_t) __builtin_aarch64_sqmovundi (__a);
+  return __builtin_aarch64_sqmovundi_us (__a);
 }
 
 /* vqneg */
@@ -24253,28 +24337,28 @@ vqrshld_s64 (int64_t __a, int64_t __b)
 
 __extension__ extern __inline uint8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrshlb_u8 (uint8_t __a, uint8_t __b)
+vqrshlb_u8 (uint8_t __a, int8_t __b)
 {
   return __builtin_aarch64_uqrshlqi_uus (__a, __b);
 }
 
 __extension__ extern __inline uint16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrshlh_u16 (uint16_t __a, uint16_t __b)
+vqrshlh_u16 (uint16_t __a, int16_t __b)
 {
   return __builtin_aarch64_uqrshlhi_uus (__a, __b);
 }
 
 __extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrshls_u32 (uint32_t __a, uint32_t __b)
+vqrshls_u32 (uint32_t __a, int32_t __b)
 {
   return __builtin_aarch64_uqrshlsi_uus (__a, __b);
 }
 
 __extension__ extern __inline uint64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqrshld_u64 (uint64_t __a, uint64_t __b)
+vqrshld_u64 (uint64_t __a, int64_t __b)
 {
   return __builtin_aarch64_uqrshldi_uus (__a, __b);
 }
@@ -24553,28 +24637,28 @@ vqshld_s64 (int64_t __a, int64_t __b)
 
 __extension__ extern __inline uint8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqshlb_u8 (uint8_t __a, uint8_t __b)
+vqshlb_u8 (uint8_t __a, int8_t __b)
 {
   return __builtin_aarch64_uqshlqi_uus (__a, __b);
 }
 
 __extension__ extern __inline uint16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqshlh_u16 (uint16_t __a, uint16_t __b)
+vqshlh_u16 (uint16_t __a, int16_t __b)
 {
   return __builtin_aarch64_uqshlhi_uus (__a, __b);
 }
 
 __extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqshls_u32 (uint32_t __a, uint32_t __b)
+vqshls_u32 (uint32_t __a, int32_t __b)
 {
   return __builtin_aarch64_uqshlsi_uus (__a, __b);
 }
 
 __extension__ extern __inline uint64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vqshld_u64 (uint64_t __a, uint64_t __b)
+vqshld_u64 (uint64_t __a, int64_t __b)
 {
   return __builtin_aarch64_uqshldi_uus (__a, __b);
 }
@@ -26003,6 +26087,13 @@ vrndmq_f64 (float64x2_t __a)
 
 /* vrndn  */
 
+__extension__ extern __inline float32_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vrndns_f32 (float32_t __a)
+{
+  return __builtin_aarch64_frintnsf (__a);
+}
+
 __extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vrndn_f32 (float32x2_t __a)
@@ -26908,7 +26999,7 @@ vshld_s64 (int64_t __a, int64_t __b)
 
 __extension__ extern __inline uint64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vshld_u64 (uint64_t __a, uint64_t __b)
+vshld_u64 (uint64_t __a, int64_t __b)
 {
   return __builtin_aarch64_ushldi_uus (__a, __b);
 }
@@ -30104,6 +30195,13 @@ vst4q_p64 (poly64_t * __a, poly64x2x4_t __val)
   __builtin_aarch64_st4v2di ((__builtin_aarch64_simd_di *) __a, __o);
 }
 
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vstrq_p128 (poly128_t * __ptr, poly128_t __val)
+{
+  *__ptr = __val;
+}
+
 /* vsub */
 
 __extension__ extern __inline int64_t
@@ -30491,6 +30589,17 @@ vtrn1q_u32 (uint32x4_t __a, uint32x4_t __b)
 #endif
 }
 
+__extension__ extern __inline poly64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vtrn1q_p64 (poly64x2_t __a, poly64x2_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (poly64x2_t) {3, 1});
+#else
+  return __builtin_shuffle (__a, __b, (poly64x2_t) {0, 2});
+#endif
+}
+
 __extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vtrn1q_u64 (uint64x2_t __a, uint64x2_t __b)
@@ -30761,6 +30870,18 @@ vtrn2q_u64 (uint64x2_t __a, uint64x2_t __b)
 #endif
 }
 
+
+__extension__ extern __inline poly64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vtrn2q_p64 (poly64x2_t __a, poly64x2_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (poly64x2_t) {2, 0});
+#else
+  return __builtin_shuffle (__a, __b, (poly64x2_t) {1, 3});
+#endif
+}
+
 __extension__ extern __inline float16x4x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vtrn_f16 (float16x4_t __a, float16x4_t __b)
@@ -31407,6 +31528,17 @@ vuzp1q_u64 (uint64x2_t __a, uint64x2_t __b)
 #endif
 }
 
+__extension__ extern __inline poly64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vuzp1q_p64 (poly64x2_t __a, poly64x2_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (poly64x2_t) {3, 1});
+#else
+  return __builtin_shuffle (__a, __b, (poly64x2_t) {0, 2});
+#endif
+}
+
 __extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vuzp2_f16 (float16x4_t __a, float16x4_t __b)
@@ -31666,6 +31798,17 @@ vuzp2q_u64 (uint64x2_t __a, uint64x2_t __b)
 #endif
 }
 
+__extension__ extern __inline poly64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vuzp2q_p64 (poly64x2_t __a, poly64x2_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (poly64x2_t) {2, 0});
+#else
+  return __builtin_shuffle (__a, __b, (poly64x2_t) {1, 3});
+#endif
+}
+
 __INTERLEAVE_LIST (uzp)
 
 /* vzip */
@@ -31934,6 +32077,17 @@ vzip1q_u64 (uint64x2_t __a, uint64x2_t __b)
 #endif
 }
 
+__extension__ extern __inline poly64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vzip1q_p64 (poly64x2_t __a, poly64x2_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (poly64x2_t) {3, 1});
+#else
+  return __builtin_shuffle (__a, __b, (poly64x2_t) {0, 2});
+#endif
+}
+
 __extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vzip2_f16 (float16x4_t __a, float16x4_t __b)
@@ -32198,6 +32352,17 @@ vzip2q_u64 (uint64x2_t __a, uint64x2_t __b)
 #endif
 }
 
+__extension__ extern __inline poly64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vzip2q_p64 (poly64x2_t __a, poly64x2_t __b)
+{
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__a, __b, (poly64x2_t) {2, 0});
+#else
+  return __builtin_shuffle (__a, __b, (poly64x2_t) {1, 3});
+#endif
+}
+
 __INTERLEAVE_LIST (zip)
 
 #undef __INTERLEAVE_LIST
@@ -35659,6 +35824,55 @@ vusmmlaq_s32 (int32x4_t __r, uint8x16_t __a, int8x16_t __b)
 
 #pragma GCC pop_options
 
+__extension__ extern __inline poly8x8_t
+__attribute ((__always_inline__, __gnu_inline__, __artificial__))
+vadd_p8 (poly8x8_t __a, poly8x8_t __b)
+{
+  return __a ^ __b;
+}
+
+__extension__ extern __inline poly16x4_t
+__attribute ((__always_inline__, __gnu_inline__, __artificial__))
+vadd_p16 (poly16x4_t __a, poly16x4_t __b)
+{
+  return __a ^ __b;
+}
+
+__extension__ extern __inline poly64x1_t
+__attribute ((__always_inline__, __gnu_inline__, __artificial__))
+vadd_p64 (poly64x1_t __a, poly64x1_t __b)
+{
+  return __a ^ __b;
+}
+
+__extension__ extern __inline poly8x16_t
+__attribute ((__always_inline__, __gnu_inline__, __artificial__))
+vaddq_p8 (poly8x16_t __a, poly8x16_t __b)
+{
+  return __a ^ __b;
+}
+
+__extension__ extern __inline poly16x8_t
+__attribute ((__always_inline__, __gnu_inline__, __artificial__))
+vaddq_p16 (poly16x8_t __a, poly16x8_t __b)
+{
+  return __a ^__b;
+}
+
+__extension__ extern __inline poly64x2_t
+__attribute ((__always_inline__, __gnu_inline__, __artificial__))
+vaddq_p64 (poly64x2_t __a, poly64x2_t __b)
+{
+  return __a ^ __b;
+}
+
+__extension__ extern __inline poly128_t
+__attribute ((__always_inline__, __gnu_inline__, __artificial__))
+vaddq_p128 (poly128_t __a, poly128_t __b)
+{
+  return __a ^ __b;
+}
+
 #undef __aarch64_vget_lane_any
 
 #undef __aarch64_vdup_lane_any
diff --git a/gcc/config/arm/arm-builtins.c b/gcc/config/arm/arm-builtins.c
index 33e8015..db505a4 100644
--- a/gcc/config/arm/arm-builtins.c
+++ b/gcc/config/arm/arm-builtins.c
@@ -811,23 +811,23 @@ arm_ldrgbwbu_z_qualifiers[SIMD_MAX_BUILTIN_ARGS]
 
 static enum arm_type_qualifiers
 arm_strsbwbs_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-  = { qualifier_void, qualifier_unsigned, qualifier_const, qualifier_none};
+  = { qualifier_unsigned, qualifier_unsigned, qualifier_const, qualifier_none};
 #define STRSBWBS_QUALIFIERS (arm_strsbwbs_qualifiers)
 
 static enum arm_type_qualifiers
 arm_strsbwbu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-  = { qualifier_void, qualifier_unsigned, qualifier_const, qualifier_unsigned};
+  = { qualifier_unsigned, qualifier_unsigned, qualifier_const, qualifier_unsigned};
 #define STRSBWBU_QUALIFIERS (arm_strsbwbu_qualifiers)
 
 static enum arm_type_qualifiers
 arm_strsbwbs_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-  = { qualifier_void, qualifier_unsigned, qualifier_const,
+  = { qualifier_unsigned, qualifier_unsigned, qualifier_const,
       qualifier_none, qualifier_unsigned};
 #define STRSBWBS_P_QUALIFIERS (arm_strsbwbs_p_qualifiers)
 
 static enum arm_type_qualifiers
 arm_strsbwbu_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-  = { qualifier_void, qualifier_unsigned, qualifier_const,
+  = { qualifier_unsigned, qualifier_unsigned, qualifier_const,
       qualifier_unsigned, qualifier_unsigned};
 #define STRSBWBU_P_QUALIFIERS (arm_strsbwbu_p_qualifiers)
 
diff --git a/gcc/config/arm/arm-cpus.in b/gcc/config/arm/arm-cpus.in
index c98f8ed..8c61ad0 100644
--- a/gcc/config/arm/arm-cpus.in
+++ b/gcc/config/arm/arm-cpus.in
@@ -135,10 +135,6 @@ define feature armv8_1m_main
 # Floating point and Neon extensions.
 # VFPv1 is not supported in GCC.
 
-# This feature bit is enabled for all VFP, MVE and
-# MVE with floating point extensions.
-define feature vfp_base
-
 # Vector floating point v2.
 define feature vfpv2
 
@@ -251,7 +247,7 @@ define fgroup ALL_SIMD	ALL_SIMD_INTERNAL ALL_SIMD_EXTERNAL
 
 # List of all FPU bits to strip out if -mfpu is used to override the
 # default.  fp16 is deliberately missing from this list.
-define fgroup ALL_FPU_INTERNAL	vfp_base vfpv2 vfpv3 vfpv4 fpv5 fp16conv fp_dbl ALL_SIMD_INTERNAL
+define fgroup ALL_FPU_INTERNAL	vfpv2 vfpv3 vfpv4 fpv5 fp16conv fp_dbl ALL_SIMD_INTERNAL
 # Similarly, but including fp16 and other extensions that aren't part of
 # -mfpu support.
 define fgroup ALL_FPU_EXTERNAL fp16 bf16
@@ -296,11 +292,11 @@ define fgroup ARMv8r      ARMv8a
 define fgroup ARMv8_1m_main ARMv8m_main armv8_1m_main
 
 # Useful combinations.
-define fgroup VFPv2	vfp_base vfpv2
+define fgroup VFPv2	vfpv2
 define fgroup VFPv3	VFPv2 vfpv3
 define fgroup VFPv4	VFPv3 vfpv4 fp16conv
 define fgroup FPv5	VFPv4 fpv5
-define fgroup MVE      mve vfp_base armv7em
+define fgroup MVE      mve armv7em
 define fgroup MVE_FP   MVE FPv5 fp16 mve_float
 
 define fgroup FP_DBL	fp_dbl
@@ -310,6 +306,18 @@ define fgroup NEON	FP_D32 neon
 define fgroup CRYPTO	NEON crypto
 define fgroup DOTPROD	NEON dotprod
 
+# Implied feature bits.  These are for non-named features shared between fgroups.
+# Shared feature f belonging to fgroups A and B will be erroneously removed if:
+# A and B are enabled by default AND A is disabled by a removal flag.
+# To ensure that f is retained, we must add such bits to the ISA after
+# processing the removal flags.  This is implemented by 'implied bits':
+# define implied <name> [<feature-or-fgroup>]+
+# This indicates that, if any of the listed features are enabled, or if any
+# member of a listed fgroup is enabled, then <name> will be implicitly enabled.
+
+# Enabled for all VFP, MVE and MVE with floating point extensions.
+define implied vfp_base MVE MVE_FP ALL_FP
+
 # List of all quirk bits to strip out when comparing CPU features with
 # architectures.
 # xscale isn't really a 'quirk', but it isn't an architecture either and we
@@ -1447,6 +1455,39 @@ begin cpu cortex-a77
  part d0d
 end cpu cortex-a77
 
+begin cpu cortex-a78
+ cname cortexa78
+ tune for cortex-a57
+ tune flags LDSCHED
+ architecture armv8.2-a+fp16+dotprod
+ option crypto add FP_ARMv8 CRYPTO
+ costs cortex_a57
+ vendor 41
+ part d41
+end cpu cortex-a78
+
+begin cpu cortex-a78ae
+ cname cortexa78ae
+ tune for cortex-a57
+ tune flags LDSCHED
+ architecture armv8.2-a+fp16+dotprod
+ option crypto add FP_ARMv8 CRYPTO
+ costs cortex_a57
+ vendor 41
+ part d42
+end cpu cortex-a78ae
+
+begin cpu cortex-x1
+ cname cortexx1
+ tune for cortex-a57
+ tune flags LDSCHED
+ architecture armv8.2-a+fp16+dotprod
+ option crypto add FP_ARMv8 CRYPTO
+ costs cortex_a57
+ vendor 41
+ part d44
+end cpu cortex-x1
+
 begin cpu neoverse-n1
  cname neoversen1
  alias !ares
@@ -1478,6 +1519,30 @@ begin cpu cortex-a76.cortex-a55
  costs cortex_a57
 end cpu cortex-a76.cortex-a55
 
+# Armv8.4 A-profile Architecture Processors
+begin cpu neoverse-v1
+  cname neoversev1
+  tune for cortex-a57
+  tune flags LDSCHED
+  architecture armv8.4-a+fp16+bf16+i8mm
+  option crypto add FP_ARMv8 CRYPTO
+  costs cortex_a57
+  vendor 41
+  part 0xd40
+end cpu neoverse-v1
+
+# Armv8.5 A-profile Architecture Processors
+begin cpu neoverse-n2
+  cname neoversen2
+  tune for cortex-a57
+  tune flags LDSCHED
+  architecture armv8.5-a+fp16+bf16+i8mm
+  option crypto add FP_ARMv8 CRYPTO
+  costs cortex_a57
+  vendor 41
+  part 0xd49
+end cpu neoverse-n2
+
 # V8 M-profile implementations.
 begin cpu cortex-m23
  cname cortexm23
@@ -1508,6 +1573,10 @@ begin cpu cortex-m55
  cname cortexm55
  tune flags LDSCHED
  architecture armv8.1-m.main+mve.fp+fp.dp
+ option nomve.fp remove mve_float
+ option nomve remove mve mve_float
+ option nofp remove ALL_FP mve_float
+ option nodsp remove MVE mve_float
  isa quirk_no_asmcpu
  costs v7m
  vendor 41
diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index 0cc0ae7..703d616 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -120,7 +120,6 @@ extern int arm_coproc_mem_operand_no_writeback (rtx);
 extern int arm_coproc_mem_operand_wb (rtx, int);
 extern int neon_vector_mem_operand (rtx, int, bool);
 extern int mve_vector_mem_operand (machine_mode, rtx, bool);
-bool arm_mve_mode_and_operands_type_check (machine_mode, rtx, rtx);
 extern int neon_struct_mem_operand (rtx);
 
 extern rtx *neon_vcmla_lane_prepare_operands (rtx *);
@@ -373,9 +372,11 @@ extern void arm_emit_coreregs_64bit_shift (enum rtx_code, rtx, rtx, rtx, rtx,
 extern bool arm_fusion_enabled_p (tune_params::fuse_ops);
 extern bool arm_valid_symbolic_address_p (rtx);
 extern bool arm_validize_comparison (rtx *, rtx *, rtx *);
+extern bool arm_expand_vector_compare (rtx, rtx_code, rtx, rtx, bool);
 #endif /* RTX_CODE */
 
 extern bool arm_gen_setmem (rtx *);
+extern void arm_expand_vcond (rtx *, machine_mode);
 extern void arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel);
 
 extern bool arm_autoinc_modes_ok_p (machine_mode, enum arm_auto_incmodes);
diff --git a/gcc/config/arm/arm-tables.opt b/gcc/config/arm/arm-tables.opt
index ce35661..05f5c08 100644
--- a/gcc/config/arm/arm-tables.opt
+++ b/gcc/config/arm/arm-tables.opt
@@ -241,6 +241,15 @@ EnumValue
 Enum(processor_type) String(cortex-a77) Value( TARGET_CPU_cortexa77)
 
 EnumValue
+Enum(processor_type) String(cortex-a78) Value( TARGET_CPU_cortexa78)
+
+EnumValue
+Enum(processor_type) String(cortex-a78ae) Value( TARGET_CPU_cortexa78ae)
+
+EnumValue
+Enum(processor_type) String(cortex-x1) Value( TARGET_CPU_cortexx1)
+
+EnumValue
 Enum(processor_type) String(neoverse-n1) Value( TARGET_CPU_neoversen1)
 
 EnumValue
@@ -250,6 +259,12 @@ EnumValue
 Enum(processor_type) String(cortex-a76.cortex-a55) Value( TARGET_CPU_cortexa76cortexa55)
 
 EnumValue
+Enum(processor_type) String(neoverse-v1) Value( TARGET_CPU_neoversev1)
+
+EnumValue
+Enum(processor_type) String(neoverse-n2) Value( TARGET_CPU_neoversen2)
+
+EnumValue
 Enum(processor_type) String(cortex-m23) Value( TARGET_CPU_cortexm23)
 
 EnumValue
diff --git a/gcc/config/arm/arm-tune.md b/gcc/config/arm/arm-tune.md
index 8ea9435..32657da 100644
--- a/gcc/config/arm/arm-tune.md
+++ b/gcc/config/arm/arm-tune.md
@@ -45,7 +45,9 @@
 	cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,
 	cortexa73cortexa53,cortexa55,cortexa75,
 	cortexa76,cortexa76ae,cortexa77,
+	cortexa78,cortexa78ae,cortexx1,
 	neoversen1,cortexa75cortexa55,cortexa76cortexa55,
-	cortexm23,cortexm33,cortexm35p,
-	cortexm55,cortexr52"
+	neoversev1,neoversen2,cortexm23,
+	cortexm33,cortexm35p,cortexm55,
+	cortexr52"
 	(const (symbol_ref "((enum attr_tune) arm_tune)")))
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 022ef6c..dfadaca 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -3391,6 +3391,20 @@ arm_configure_build_target (struct arm_build_target *target,
       bitmap_ior (target->isa, target->isa, fpu_bits);
     }
 
+  /* There may be implied bits which we still need to enable. These are
+     non-named features which are needed to complete other sets of features,
+     but cannot be enabled from arm-cpus.in due to being shared between
+     multiple fgroups. Each entry in all_implied_fbits is of the form
+     ante -> cons, meaning that if the feature "ante" is enabled, we should
+     implicitly enable "cons".  */
+  const struct fbit_implication *impl = all_implied_fbits;
+  while (impl->ante)
+    {
+      if (bitmap_bit_p (target->isa, impl->ante))
+	bitmap_set_bit (target->isa, impl->cons);
+      impl++;
+    }
+
   if (!arm_selected_tune)
     arm_selected_tune = arm_selected_cpu;
   else /* Validate the features passed to -mtune.  */
@@ -3415,8 +3429,9 @@ arm_option_override (void)
 {
   static const enum isa_feature fpu_bitlist_internal[]
     = { ISA_ALL_FPU_INTERNAL, isa_nobit };
+  /* isa_bit_mve_float is also part of FP bit list for arch v8.1-m.main.  */
   static const enum isa_feature fp_bitlist[]
-    = { ISA_ALL_FP, isa_nobit };
+    = { ISA_ALL_FP, isa_bit_mve_float, isa_nobit };
   static const enum isa_feature quirk_bitlist[] = { ISA_ALL_QUIRKS, isa_nobit};
   cl_target_option opts;
 
@@ -13277,14 +13292,18 @@ arm_coproc_mem_operand_wb (rtx op, int wb_level)
 
   /* Match:
      (plus (reg)
-	   (const)).  */
+	   (const))
+
+     The encoded immediate for 16-bit modes is multiplied by 2,
+     while the encoded immediate for 32-bit and 64-bit modes is
+     multiplied by 4.  */
+  int factor = MIN (GET_MODE_SIZE (GET_MODE (op)), 4);
   if (GET_CODE (ind) == PLUS
       && REG_P (XEXP (ind, 0))
       && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
       && CONST_INT_P (XEXP (ind, 1))
-      && INTVAL (XEXP (ind, 1)) > -1024
-      && INTVAL (XEXP (ind, 1)) <  1024
-      && (INTVAL (XEXP (ind, 1)) & 3) == 0)
+      && IN_RANGE (INTVAL (XEXP (ind, 1)), -255 * factor, 255 * factor)
+      && (INTVAL (XEXP (ind, 1)) & (factor - 1)) == 0)
     return TRUE;
 
   return FALSE;
@@ -28946,6 +28965,30 @@ arm_preferred_simd_mode (scalar_mode mode)
       default:;
       }
 
+  if (TARGET_HAVE_MVE)
+    switch (mode)
+      {
+      case E_QImode:
+	return V16QImode;
+      case E_HImode:
+	return V8HImode;
+      case E_SImode:
+	return V4SImode;
+
+      default:;
+      }
+
+  if (TARGET_HAVE_MVE_FLOAT)
+    switch (mode)
+      {
+      case E_HFmode:
+	return V8HFmode;
+      case E_SFmode:
+	return V4SFmode;
+
+      default:;
+      }
+
   return word_mode;
 }
 
@@ -30630,6 +30673,127 @@ arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
     arm_post_atomic_barrier (model);
 }
 
+/* Expand code to compare vectors OP0 and OP1 using condition CODE.
+   If CAN_INVERT, store either the result or its inverse in TARGET
+   and return true if TARGET contains the inverse.  If !CAN_INVERT,
+   always store the result in TARGET, never its inverse.
+
+   Note that the handling of floating-point comparisons is not
+   IEEE compliant.  */
+
+bool
+arm_expand_vector_compare (rtx target, rtx_code code, rtx op0, rtx op1,
+			   bool can_invert)
+{
+  machine_mode cmp_result_mode = GET_MODE (target);
+  machine_mode cmp_mode = GET_MODE (op0);
+
+  bool inverted;
+  switch (code)
+    {
+    /* For these we need to compute the inverse of the requested
+       comparison.  */
+    case UNORDERED:
+    case UNLT:
+    case UNLE:
+    case UNGT:
+    case UNGE:
+    case UNEQ:
+    case NE:
+      code = reverse_condition_maybe_unordered (code);
+      if (!can_invert)
+	{
+	  /* Recursively emit the inverted comparison into a temporary
+	     and then store its inverse in TARGET.  This avoids reusing
+	     TARGET (which for integer NE could be one of the inputs).  */
+	  rtx tmp = gen_reg_rtx (cmp_result_mode);
+	  if (arm_expand_vector_compare (tmp, code, op0, op1, true))
+	    gcc_unreachable ();
+	  emit_insn (gen_rtx_SET (target, gen_rtx_NOT (cmp_result_mode, tmp)));
+	  return false;
+	}
+      inverted = true;
+      break;
+
+    default:
+      inverted = false;
+      break;
+    }
+
+  switch (code)
+    {
+    /* These are natively supported for zero comparisons, but otherwise
+       require the operands to be swapped.  */
+    case LE:
+    case LT:
+      if (op1 != CONST0_RTX (cmp_mode))
+	{
+	  code = swap_condition (code);
+	  std::swap (op0, op1);
+	}
+      /* Fall through.  */
+
+    /* These are natively supported for both register and zero operands.  */
+    case EQ:
+    case GE:
+    case GT:
+      emit_insn (gen_neon_vc (code, cmp_mode, target, op0, op1));
+      return inverted;
+
+    /* These are natively supported for register operands only.
+       Comparisons with zero aren't useful and should be folded
+       or canonicalized by target-independent code.  */
+    case GEU:
+    case GTU:
+      emit_insn (gen_neon_vc (code, cmp_mode, target,
+			      op0, force_reg (cmp_mode, op1)));
+      return inverted;
+
+    /* These require the operands to be swapped and likewise do not
+       support comparisons with zero.  */
+    case LEU:
+    case LTU:
+      emit_insn (gen_neon_vc (swap_condition (code), cmp_mode,
+			      target, force_reg (cmp_mode, op1), op0));
+      return inverted;
+
+    /* These need a combination of two comparisons.  */
+    case LTGT:
+    case ORDERED:
+      {
+	/* Operands are LTGT iff (a > b || a > b).
+	   Operands are ORDERED iff (a > b || a <= b).  */
+	rtx gt_res = gen_reg_rtx (cmp_result_mode);
+	rtx alt_res = gen_reg_rtx (cmp_result_mode);
+	rtx_code alt_code = (code == LTGT ? LT : LE);
+	if (arm_expand_vector_compare (gt_res, GT, op0, op1, true)
+	    || arm_expand_vector_compare (alt_res, alt_code, op0, op1, true))
+	  gcc_unreachable ();
+	emit_insn (gen_rtx_SET (target, gen_rtx_IOR (cmp_result_mode,
+						     gt_res, alt_res)));
+	return inverted;
+      }
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Expand a vcond or vcondu pattern with operands OPERANDS.
+   CMP_RESULT_MODE is the mode of the comparison result.  */
+
+void
+arm_expand_vcond (rtx *operands, machine_mode cmp_result_mode)
+{
+  rtx mask = gen_reg_rtx (cmp_result_mode);
+  bool inverted = arm_expand_vector_compare (mask, GET_CODE (operands[3]),
+					     operands[4], operands[5], true);
+  if (inverted)
+    std::swap (operands[1], operands[2]);
+  emit_insn (gen_neon_vbsl (GET_MODE (operands[0]), operands[0],
+			    mask, operands[1], operands[2]));
+}
+
 #define MAX_VECT_LEN 16
 
 struct expand_vec_perm_d
@@ -33112,9 +33276,7 @@ arm_expand_divmod_libfunc (rtx libfunc, machine_mode mode,
     = smallest_int_mode_for_size (2 * GET_MODE_BITSIZE (mode));
 
   rtx libval = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
-					libval_mode,
-					op0, GET_MODE (op0),
-					op1, GET_MODE (op1));
+					libval_mode, op0, mode, op1, mode);
 
   rtx quotient = simplify_gen_subreg (mode, libval, libval_mode, 0);
   rtx remainder = simplify_gen_subreg (mode, libval, libval_mode,
@@ -33578,17 +33740,4 @@ arm_mode_base_reg_class (machine_mode mode)
 
 struct gcc_target targetm = TARGET_INITIALIZER;
 
-bool
-arm_mve_mode_and_operands_type_check (machine_mode mode, rtx op0, rtx op1)
-{
-  if (!(TARGET_HAVE_MVE || TARGET_HAVE_MVE_FLOAT))
-    return true;
-  else if (mode == E_BFmode)
-    return false;
-  else if ((s_register_operand (op0, mode) && MEM_P (op1))
-	   || (s_register_operand (op1, mode) && MEM_P (op0)))
-    return false;
-  return true;
-}
-
 #include "gt-arm.h"
diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h
index f4d3676..4a63d33 100644
--- a/gcc/config/arm/arm.h
+++ b/gcc/config/arm/arm.h
@@ -1110,6 +1110,47 @@ extern const int arm_arch_cde_coproc_bits[];
 #define VALID_MVE_STRUCT_MODE(MODE) \
   ((MODE) == TImode || (MODE) == OImode || (MODE) == XImode)
 
+/* The conditions under which vector modes are supported for general
+   arithmetic using Neon.  */
+
+#define ARM_HAVE_NEON_V8QI_ARITH TARGET_NEON
+#define ARM_HAVE_NEON_V4HI_ARITH TARGET_NEON
+#define ARM_HAVE_NEON_V2SI_ARITH TARGET_NEON
+
+#define ARM_HAVE_NEON_V16QI_ARITH TARGET_NEON
+#define ARM_HAVE_NEON_V8HI_ARITH TARGET_NEON
+#define ARM_HAVE_NEON_V4SI_ARITH TARGET_NEON
+#define ARM_HAVE_NEON_V2DI_ARITH TARGET_NEON
+
+/* HF operations have their own flush-to-zero control (FPSCR.FZ16).  */
+#define ARM_HAVE_NEON_V4HF_ARITH TARGET_NEON_FP16INST
+#define ARM_HAVE_NEON_V8HF_ARITH TARGET_NEON_FP16INST
+
+/* SF operations always flush to zero, regardless of FPSCR.FZ, so we can
+   only use them for general arithmetic when -funsafe-math-optimizations
+   is in effect.  */
+#define ARM_HAVE_NEON_V2SF_ARITH \
+  (TARGET_NEON && flag_unsafe_math_optimizations)
+#define ARM_HAVE_NEON_V4SF_ARITH ARM_HAVE_NEON_V2SF_ARITH
+
+/* The conditions under which vector modes are supported for general
+   arithmetic by any vector extension.  */
+
+#define ARM_HAVE_V8QI_ARITH (ARM_HAVE_NEON_V8QI_ARITH || TARGET_REALLY_IWMMXT)
+#define ARM_HAVE_V4HI_ARITH (ARM_HAVE_NEON_V4HI_ARITH || TARGET_REALLY_IWMMXT)
+#define ARM_HAVE_V2SI_ARITH (ARM_HAVE_NEON_V2SI_ARITH || TARGET_REALLY_IWMMXT)
+
+#define ARM_HAVE_V16QI_ARITH (ARM_HAVE_NEON_V16QI_ARITH || TARGET_HAVE_MVE)
+#define ARM_HAVE_V8HI_ARITH (ARM_HAVE_NEON_V8HI_ARITH || TARGET_HAVE_MVE)
+#define ARM_HAVE_V4SI_ARITH (ARM_HAVE_NEON_V4SI_ARITH || TARGET_HAVE_MVE)
+#define ARM_HAVE_V2DI_ARITH ARM_HAVE_NEON_V2DI_ARITH
+
+#define ARM_HAVE_V4HF_ARITH ARM_HAVE_NEON_V4HF_ARITH
+#define ARM_HAVE_V2SF_ARITH ARM_HAVE_NEON_V2SF_ARITH
+
+#define ARM_HAVE_V8HF_ARITH (ARM_HAVE_NEON_V8HF_ARITH || TARGET_HAVE_MVE_FLOAT)
+#define ARM_HAVE_V4SF_ARITH (ARM_HAVE_NEON_V4SF_ARITH || TARGET_HAVE_MVE_FLOAT)
+
 /* The register numbers in sequence, for passing to arm_gen_load_multiple.  */
 extern int arm_regs_in_sequence[];
 
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index bffdb0b..1a8e498 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -7289,7 +7289,9 @@
 (define_insn "*arm32_mov<mode>"
   [(set (match_operand:HFBF 0 "nonimmediate_operand" "=r,m,r,r")
 	(match_operand:HFBF 1 "general_operand"	   " m,r,r,F"))]
-  "TARGET_32BIT && !TARGET_HARD_FLOAT
+  "TARGET_32BIT
+   && !TARGET_HARD_FLOAT
+   && !TARGET_HAVE_MVE
    && (	  s_register_operand (operands[0], <MODE>mode)
        || s_register_operand (operands[1], <MODE>mode))"
   "*
@@ -7355,7 +7357,7 @@
   if (arm_disable_literal_pool
       && (REG_P (operands[0]) || SUBREG_P (operands[0]))
       && CONST_DOUBLE_P (operands[1])
-      && TARGET_HARD_FLOAT
+      && TARGET_VFP_BASE
       && !vfp3_const_double_rtx (operands[1]))
     {
       rtx clobreg = gen_reg_rtx (SFmode);
@@ -7452,7 +7454,7 @@
   if (arm_disable_literal_pool
       && (REG_P (operands[0]) || SUBREG_P (operands[0]))
       && CONSTANT_P (operands[1])
-      && TARGET_HARD_FLOAT
+      && TARGET_VFP_BASE
       && !arm_const_double_rtx (operands[1])
       && !(TARGET_VFP_DOUBLE && vfp3_const_double_rtx (operands[1])))
     {
@@ -9212,7 +9214,7 @@
 	operands[2] = operands[1];
       else
 	{
-	  rtx mem = XEXP (force_const_mem (SImode, operands[1]), 0);
+	  rtx mem = force_const_mem (SImode, operands[1]);
 	  emit_move_insn (operands[2], mem);
 	}
     }
@@ -9295,7 +9297,7 @@
 	operands[3] = operands[1];
       else
 	{
-	  rtx mem = XEXP (force_const_mem (SImode, operands[1]), 0);
+	  rtx mem = force_const_mem (SImode, operands[1]);
 	  emit_move_insn (operands[3], mem);
 	}
     }
diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h
index a801705..6c0d1e2 100644
--- a/gcc/config/arm/arm_mve.h
+++ b/gcc/config/arm/arm_mve.h
@@ -141,6 +141,7 @@
 #define vrev64q_m(__inactive, __a, __p) __arm_vrev64q_m(__inactive, __a, __p)
 #define vqrdmlashq(__a, __b, __c) __arm_vqrdmlashq(__a, __b, __c)
 #define vqrdmlahq(__a, __b, __c) __arm_vqrdmlahq(__a, __b, __c)
+#define vqdmlashq(__a, __b, __c) __arm_vqdmlashq(__a, __b, __c)
 #define vqdmlahq(__a, __b, __c) __arm_vqdmlahq(__a, __b, __c)
 #define vmvnq_m(__inactive, __a, __p) __arm_vmvnq_m(__inactive, __a, __p)
 #define vmlasq(__a, __b, __c) __arm_vmlasq(__a, __b, __c)
@@ -260,6 +261,7 @@
 #define vorrq_m(__inactive, __a, __b, __p) __arm_vorrq_m(__inactive, __a, __b, __p)
 #define vqaddq_m(__inactive, __a, __b, __p) __arm_vqaddq_m(__inactive, __a, __b, __p)
 #define vqdmladhq_m(__inactive, __a, __b, __p) __arm_vqdmladhq_m(__inactive, __a, __b, __p)
+#define vqdmlashq_m(__a, __b, __c, __p) __arm_vqdmlashq_m(__a, __b, __c, __p)
 #define vqdmladhxq_m(__inactive, __a, __b, __p) __arm_vqdmladhxq_m(__inactive, __a, __b, __p)
 #define vqdmlahq_m(__a, __b, __c, __p) __arm_vqdmlahq_m(__a, __b, __c, __p)
 #define vqdmlsdhq_m(__inactive, __a, __b, __p) __arm_vqdmlsdhq_m(__inactive, __a, __b, __p)
@@ -643,6 +645,7 @@
 #define vcvtpq_u16_f16(__a) __arm_vcvtpq_u16_f16(__a)
 #define vcvtpq_u32_f32(__a) __arm_vcvtpq_u32_f32(__a)
 #define vcvtnq_u16_f16(__a) __arm_vcvtnq_u16_f16(__a)
+#define vcvtnq_u32_f32(__a) __arm_vcvtnq_u32_f32(__a)
 #define vcvtmq_u16_f16(__a) __arm_vcvtmq_u16_f16(__a)
 #define vcvtmq_u32_f32(__a) __arm_vcvtmq_u32_f32(__a)
 #define vcvtaq_u16_f16(__a) __arm_vcvtaq_u16_f16(__a)
@@ -1234,9 +1237,6 @@
 #define vpselq_u8(__a, __b, __p) __arm_vpselq_u8(__a, __b, __p)
 #define vpselq_s8(__a, __b, __p) __arm_vpselq_s8(__a, __b, __p)
 #define vrev64q_m_u8(__inactive, __a, __p) __arm_vrev64q_m_u8(__inactive, __a, __p)
-#define vqrdmlashq_n_u8(__a, __b, __c) __arm_vqrdmlashq_n_u8(__a, __b, __c)
-#define vqrdmlahq_n_u8(__a, __b, __c) __arm_vqrdmlahq_n_u8(__a, __b, __c)
-#define vqdmlahq_n_u8(__a, __b, __c) __arm_vqdmlahq_n_u8(__a, __b, __c)
 #define vmvnq_m_u8(__inactive, __a, __p) __arm_vmvnq_m_u8(__inactive, __a, __p)
 #define vmlasq_n_u8(__a, __b, __c) __arm_vmlasq_n_u8(__a, __b, __c)
 #define vmlaq_n_u8(__a, __b, __c) __arm_vmlaq_n_u8(__a, __b, __c)
@@ -1306,6 +1306,7 @@
 #define vqdmlsdhxq_s8(__inactive, __a, __b) __arm_vqdmlsdhxq_s8(__inactive, __a, __b)
 #define vqdmlsdhq_s8(__inactive, __a, __b) __arm_vqdmlsdhq_s8(__inactive, __a, __b)
 #define vqdmlahq_n_s8(__a, __b, __c) __arm_vqdmlahq_n_s8(__a, __b, __c)
+#define vqdmlashq_n_s8(__a, __b, __c) __arm_vqdmlashq_n_s8(__a, __b, __c)
 #define vqdmladhxq_s8(__inactive, __a, __b) __arm_vqdmladhxq_s8(__inactive, __a, __b)
 #define vqdmladhq_s8(__inactive, __a, __b) __arm_vqdmladhq_s8(__inactive, __a, __b)
 #define vmlsdavaxq_s8(__a, __b, __c) __arm_vmlsdavaxq_s8(__a, __b, __c)
@@ -1319,9 +1320,6 @@
 #define vpselq_u16(__a, __b, __p) __arm_vpselq_u16(__a, __b, __p)
 #define vpselq_s16(__a, __b, __p) __arm_vpselq_s16(__a, __b, __p)
 #define vrev64q_m_u16(__inactive, __a, __p) __arm_vrev64q_m_u16(__inactive, __a, __p)
-#define vqrdmlashq_n_u16(__a, __b, __c) __arm_vqrdmlashq_n_u16(__a, __b, __c)
-#define vqrdmlahq_n_u16(__a, __b, __c) __arm_vqrdmlahq_n_u16(__a, __b, __c)
-#define vqdmlahq_n_u16(__a, __b, __c) __arm_vqdmlahq_n_u16(__a, __b, __c)
 #define vmvnq_m_u16(__inactive, __a, __p) __arm_vmvnq_m_u16(__inactive, __a, __p)
 #define vmlasq_n_u16(__a, __b, __c) __arm_vmlasq_n_u16(__a, __b, __c)
 #define vmlaq_n_u16(__a, __b, __c) __arm_vmlaq_n_u16(__a, __b, __c)
@@ -1390,6 +1388,7 @@
 #define vqrdmladhq_s16(__inactive, __a, __b) __arm_vqrdmladhq_s16(__inactive, __a, __b)
 #define vqdmlsdhxq_s16(__inactive, __a, __b) __arm_vqdmlsdhxq_s16(__inactive, __a, __b)
 #define vqdmlsdhq_s16(__inactive, __a, __b) __arm_vqdmlsdhq_s16(__inactive, __a, __b)
+#define vqdmlashq_n_s16(__a, __b, __c) __arm_vqdmlashq_n_s16(__a, __b, __c)
 #define vqdmlahq_n_s16(__a, __b, __c) __arm_vqdmlahq_n_s16(__a, __b, __c)
 #define vqdmladhxq_s16(__inactive, __a, __b) __arm_vqdmladhxq_s16(__inactive, __a, __b)
 #define vqdmladhq_s16(__inactive, __a, __b) __arm_vqdmladhq_s16(__inactive, __a, __b)
@@ -1404,9 +1403,6 @@
 #define vpselq_u32(__a, __b, __p) __arm_vpselq_u32(__a, __b, __p)
 #define vpselq_s32(__a, __b, __p) __arm_vpselq_s32(__a, __b, __p)
 #define vrev64q_m_u32(__inactive, __a, __p) __arm_vrev64q_m_u32(__inactive, __a, __p)
-#define vqrdmlashq_n_u32(__a, __b, __c) __arm_vqrdmlashq_n_u32(__a, __b, __c)
-#define vqrdmlahq_n_u32(__a, __b, __c) __arm_vqrdmlahq_n_u32(__a, __b, __c)
-#define vqdmlahq_n_u32(__a, __b, __c) __arm_vqdmlahq_n_u32(__a, __b, __c)
 #define vmvnq_m_u32(__inactive, __a, __p) __arm_vmvnq_m_u32(__inactive, __a, __p)
 #define vmlasq_n_u32(__a, __b, __c) __arm_vmlasq_n_u32(__a, __b, __c)
 #define vmlaq_n_u32(__a, __b, __c) __arm_vmlaq_n_u32(__a, __b, __c)
@@ -1475,6 +1471,7 @@
 #define vqrdmladhq_s32(__inactive, __a, __b) __arm_vqrdmladhq_s32(__inactive, __a, __b)
 #define vqdmlsdhxq_s32(__inactive, __a, __b) __arm_vqdmlsdhxq_s32(__inactive, __a, __b)
 #define vqdmlsdhq_s32(__inactive, __a, __b) __arm_vqdmlsdhq_s32(__inactive, __a, __b)
+#define vqdmlashq_n_s32(__a, __b, __c) __arm_vqdmlashq_n_s32(__a, __b, __c)
 #define vqdmlahq_n_s32(__a, __b, __c) __arm_vqdmlahq_n_s32(__a, __b, __c)
 #define vqdmladhxq_s32(__inactive, __a, __b) __arm_vqdmladhxq_s32(__inactive, __a, __b)
 #define vqdmladhq_s32(__inactive, __a, __b) __arm_vqdmladhq_s32(__inactive, __a, __b)
@@ -1901,6 +1898,9 @@
 #define vqdmladhxq_m_s8(__inactive, __a, __b, __p) __arm_vqdmladhxq_m_s8(__inactive, __a, __b, __p)
 #define vqdmladhxq_m_s32(__inactive, __a, __b, __p) __arm_vqdmladhxq_m_s32(__inactive, __a, __b, __p)
 #define vqdmladhxq_m_s16(__inactive, __a, __b, __p) __arm_vqdmladhxq_m_s16(__inactive, __a, __b, __p)
+#define vqdmlashq_m_n_s8(__a, __b, __c, __p) __arm_vqdmlashq_m_n_s8(__a, __b, __c, __p)
+#define vqdmlashq_m_n_s32(__a, __b, __c, __p) __arm_vqdmlashq_m_n_s32(__a, __b, __c, __p)
+#define vqdmlashq_m_n_s16(__a, __b, __c, __p) __arm_vqdmlashq_m_n_s16(__a, __b, __c, __p)
 #define vqdmlahq_m_n_s8(__a, __b, __c, __p) __arm_vqdmlahq_m_n_s8(__a, __b, __c, __p)
 #define vqdmlahq_m_n_s32(__a, __b, __c, __p) __arm_vqdmlahq_m_n_s32(__a, __b, __c, __p)
 #define vqdmlahq_m_n_s16(__a, __b, __c, __p) __arm_vqdmlahq_m_n_s16(__a, __b, __c, __p)
@@ -2024,8 +2024,6 @@
 #define vmlaldavaq_p_u16(__a, __b, __c, __p) __arm_vmlaldavaq_p_u16(__a, __b, __c, __p)
 #define vmlaldavaxq_p_s32(__a, __b, __c, __p) __arm_vmlaldavaxq_p_s32(__a, __b, __c, __p)
 #define vmlaldavaxq_p_s16(__a, __b, __c, __p) __arm_vmlaldavaxq_p_s16(__a, __b, __c, __p)
-#define vmlaldavaxq_p_u32(__a, __b, __c, __p) __arm_vmlaldavaxq_p_u32(__a, __b, __c, __p)
-#define vmlaldavaxq_p_u16(__a, __b, __c, __p) __arm_vmlaldavaxq_p_u16(__a, __b, __c, __p)
 #define vmlsldavaq_p_s32(__a, __b, __c, __p) __arm_vmlsldavaq_p_s32(__a, __b, __c, __p)
 #define vmlsldavaq_p_s16(__a, __b, __c, __p) __arm_vmlsldavaq_p_s16(__a, __b, __c, __p)
 #define vmlsldavaxq_p_s32(__a, __b, __c, __p) __arm_vmlsldavaxq_p_s32(__a, __b, __c, __p)
@@ -6961,27 +6959,6 @@ __arm_vrev64q_m_u8 (uint8x16_t __inactive, uint8x16_t __a, mve_pred16_t __p)
 
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmlashq_n_u8 (uint8x16_t __a, uint8x16_t __b, uint8_t __c)
-{
-  return __builtin_mve_vqrdmlashq_n_uv16qi (__a, __b, __c);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmlahq_n_u8 (uint8x16_t __a, uint8x16_t __b, uint8_t __c)
-{
-  return __builtin_mve_vqrdmlahq_n_uv16qi (__a, __b, __c);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmlahq_n_u8 (uint8x16_t __a, uint8x16_t __b, uint8_t __c)
-{
-  return __builtin_mve_vqdmlahq_n_uv16qi (__a, __b, __c);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmvnq_m_u8 (uint8x16_t __inactive, uint8x16_t __a, mve_pred16_t __p)
 {
   return __builtin_mve_vmvnq_m_uv16qi (__inactive, __a, __p);
@@ -7424,6 +7401,13 @@ __arm_vqrdmlashq_n_s8 (int8x16_t __a, int8x16_t __b, int8_t __c)
 
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vqdmlashq_n_s8 (int8x16_t __a, int8x16_t __b, int8_t __c)
+{
+  return __builtin_mve_vqdmlashq_n_sv16qi (__a, __b, __c);
+}
+
+__extension__ extern __inline int8x16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqrdmlahq_n_s8 (int8x16_t __a, int8x16_t __b, int8_t __c)
 {
   return __builtin_mve_vqrdmlahq_n_sv16qi (__a, __b, __c);
@@ -7557,27 +7541,6 @@ __arm_vrev64q_m_u16 (uint16x8_t __inactive, uint16x8_t __a, mve_pred16_t __p)
 
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmlashq_n_u16 (uint16x8_t __a, uint16x8_t __b, uint16_t __c)
-{
-  return __builtin_mve_vqrdmlashq_n_uv8hi (__a, __b, __c);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmlahq_n_u16 (uint16x8_t __a, uint16x8_t __b, uint16_t __c)
-{
-  return __builtin_mve_vqrdmlahq_n_uv8hi (__a, __b, __c);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmlahq_n_u16 (uint16x8_t __a, uint16x8_t __b, uint16_t __c)
-{
-  return __builtin_mve_vqdmlahq_n_uv8hi (__a, __b, __c);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmvnq_m_u16 (uint16x8_t __inactive, uint16x8_t __a, mve_pred16_t __p)
 {
   return __builtin_mve_vmvnq_m_uv8hi (__inactive, __a, __p);
@@ -8019,6 +7982,13 @@ __arm_vqrdmlashq_n_s16 (int16x8_t __a, int16x8_t __b, int16_t __c)
 
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vqdmlashq_n_s16 (int16x8_t __a, int16x8_t __b, int16_t __c)
+{
+  return __builtin_mve_vqdmlashq_n_sv8hi (__a, __b, __c);
+}
+
+__extension__ extern __inline int16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqrdmlahq_n_s16 (int16x8_t __a, int16x8_t __b, int16_t __c)
 {
   return __builtin_mve_vqrdmlahq_n_sv8hi (__a, __b, __c);
@@ -8152,27 +8122,6 @@ __arm_vrev64q_m_u32 (uint32x4_t __inactive, uint32x4_t __a, mve_pred16_t __p)
 
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmlashq_n_u32 (uint32x4_t __a, uint32x4_t __b, uint32_t __c)
-{
-  return __builtin_mve_vqrdmlashq_n_uv4si (__a, __b, __c);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmlahq_n_u32 (uint32x4_t __a, uint32x4_t __b, uint32_t __c)
-{
-  return __builtin_mve_vqrdmlahq_n_uv4si (__a, __b, __c);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmlahq_n_u32 (uint32x4_t __a, uint32x4_t __b, uint32_t __c)
-{
-  return __builtin_mve_vqdmlahq_n_uv4si (__a, __b, __c);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmvnq_m_u32 (uint32x4_t __inactive, uint32x4_t __a, mve_pred16_t __p)
 {
   return __builtin_mve_vmvnq_m_uv4si (__inactive, __a, __p);
@@ -8614,6 +8563,13 @@ __arm_vqrdmlashq_n_s32 (int32x4_t __a, int32x4_t __b, int32_t __c)
 
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vqdmlashq_n_s32 (int32x4_t __a, int32x4_t __b, int32_t __c)
+{
+  return __builtin_mve_vqdmlashq_n_sv4si (__a, __b, __c);
+}
+
+__extension__ extern __inline int32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqrdmlahq_n_s32 (int32x4_t __a, int32x4_t __b, int32_t __c)
 {
   return __builtin_mve_vqrdmlahq_n_sv4si (__a, __b, __c);
@@ -11141,6 +11097,27 @@ __arm_vqrdmlashq_m_n_s16 (int16x8_t __a, int16x8_t __b, int16_t __c, mve_pred16_
 
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vqdmlashq_m_n_s8 (int8x16_t __a, int8x16_t __b, int8_t __c, mve_pred16_t __p)
+{
+  return __builtin_mve_vqdmlashq_m_n_sv16qi (__a, __b, __c, __p);
+}
+
+__extension__ extern __inline int16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vqdmlashq_m_n_s16 (int16x8_t __a, int16x8_t __b, int16_t __c, mve_pred16_t __p)
+{
+  return __builtin_mve_vqdmlashq_m_n_sv8hi (__a, __b, __c, __p);
+}
+
+__extension__ extern __inline int32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vqdmlashq_m_n_s32 (int32x4_t __a, int32x4_t __b, int32_t __c, mve_pred16_t __p)
+{
+  return __builtin_mve_vqdmlashq_m_n_sv4si (__a, __b, __c, __p);
+}
+
+__extension__ extern __inline int8x16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqrdmlsdhq_m_s8 (int8x16_t __inactive, int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
 {
   return __builtin_mve_vqrdmlsdhq_m_sv16qi (__inactive, __a, __b, __p);
@@ -11811,20 +11788,6 @@ __arm_vmlaldavaxq_p_s16 (int64_t __a, int16x8_t __b, int16x8_t __c, mve_pred16_t
   return __builtin_mve_vmlaldavaxq_p_sv8hi (__a, __b, __c, __p);
 }
 
-__extension__ extern __inline uint64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlaldavaxq_p_u32 (uint64_t __a, uint32x4_t __b, uint32x4_t __c, mve_pred16_t __p)
-{
-  return __builtin_mve_vmlaldavaxq_p_uv4si (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline uint64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlaldavaxq_p_u16 (uint64_t __a, uint16x8_t __b, uint16x8_t __c, mve_pred16_t __p)
-{
-  return __builtin_mve_vmlaldavaxq_p_uv8hi (__a, __b, __c, __p);
-}
-
 __extension__ extern __inline int64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmlsldavaq_p_s32 (int64_t __a, int32x4_t __b, int32x4_t __c, mve_pred16_t __p)
@@ -13993,64 +13956,56 @@ __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vstrdq_scatter_base_wb_s64 (uint64x2_t * __addr, const int __offset, int64x2_t __value)
 {
-  __builtin_mve_vstrdq_scatter_base_wb_sv2di (*__addr, __offset, __value);
-  __builtin_mve_vstrdq_scatter_base_wb_add_sv2di (*__addr, __offset, *__addr);
+  *__addr = __builtin_mve_vstrdq_scatter_base_wb_sv2di (*__addr, __offset, __value);
 }
 
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vstrdq_scatter_base_wb_u64 (uint64x2_t * __addr, const int __offset, uint64x2_t __value)
 {
-  __builtin_mve_vstrdq_scatter_base_wb_uv2di (*__addr, __offset, __value);
-  __builtin_mve_vstrdq_scatter_base_wb_add_uv2di (*__addr, __offset, *__addr);
+  *__addr = __builtin_mve_vstrdq_scatter_base_wb_uv2di (*__addr, __offset, __value);
 }
 
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vstrdq_scatter_base_wb_p_s64 (uint64x2_t * __addr, const int __offset, int64x2_t __value, mve_pred16_t __p)
 {
-  __builtin_mve_vstrdq_scatter_base_wb_p_sv2di (*__addr, __offset, __value, __p);
-  __builtin_mve_vstrdq_scatter_base_wb_p_add_sv2di (*__addr, __offset, *__addr, __p);
+ *__addr =  __builtin_mve_vstrdq_scatter_base_wb_p_sv2di (*__addr, __offset, __value, __p);
 }
 
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vstrdq_scatter_base_wb_p_u64 (uint64x2_t * __addr, const int __offset, uint64x2_t __value, mve_pred16_t __p)
 {
-  __builtin_mve_vstrdq_scatter_base_wb_p_uv2di (*__addr, __offset, __value, __p);
-  __builtin_mve_vstrdq_scatter_base_wb_p_add_uv2di (*__addr, __offset, *__addr, __p);
+  *__addr = __builtin_mve_vstrdq_scatter_base_wb_p_uv2di (*__addr, __offset, __value, __p);
 }
 
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vstrwq_scatter_base_wb_p_s32 (uint32x4_t * __addr, const int __offset, int32x4_t __value, mve_pred16_t __p)
 {
-  __builtin_mve_vstrwq_scatter_base_wb_p_sv4si (*__addr, __offset, __value, __p);
-  __builtin_mve_vstrwq_scatter_base_wb_p_add_sv4si (*__addr, __offset, *__addr, __p);
+  *__addr = __builtin_mve_vstrwq_scatter_base_wb_p_sv4si (*__addr, __offset, __value, __p);
 }
 
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vstrwq_scatter_base_wb_p_u32 (uint32x4_t * __addr, const int __offset, uint32x4_t __value, mve_pred16_t __p)
 {
-  __builtin_mve_vstrwq_scatter_base_wb_p_uv4si (*__addr, __offset, __value, __p);
-  __builtin_mve_vstrwq_scatter_base_wb_p_add_uv4si (*__addr, __offset, *__addr, __p);
+  *__addr = __builtin_mve_vstrwq_scatter_base_wb_p_uv4si (*__addr, __offset, __value, __p);
 }
 
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vstrwq_scatter_base_wb_s32 (uint32x4_t * __addr, const int __offset, int32x4_t __value)
 {
-  __builtin_mve_vstrwq_scatter_base_wb_sv4si (*__addr, __offset, __value);
-  __builtin_mve_vstrwq_scatter_base_wb_add_sv4si (*__addr, __offset, *__addr);
+  *__addr = __builtin_mve_vstrwq_scatter_base_wb_sv4si (*__addr, __offset, __value);
 }
 
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vstrwq_scatter_base_wb_u32 (uint32x4_t * __addr, const int __offset, uint32x4_t __value)
 {
-  __builtin_mve_vstrwq_scatter_base_wb_uv4si (*__addr, __offset, __value);
-  __builtin_mve_vstrwq_scatter_base_wb_add_uv4si (*__addr, __offset, *__addr);
+  *__addr = __builtin_mve_vstrwq_scatter_base_wb_uv4si (*__addr, __offset, __value);
 }
 
 __extension__ extern __inline uint8x16_t
@@ -17012,6 +16967,13 @@ __arm_vcvtnq_u16_f16 (float16x8_t __a)
   return __builtin_mve_vcvtnq_uv8hi (__a);
 }
 
+__extension__ extern __inline uint32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vcvtnq_u32_f32 (float32x4_t __a)
+{
+  return __builtin_mve_vcvtnq_uv4si (__a);
+}
+
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vcvtmq_u16_f16 (float16x8_t __a)
@@ -19158,16 +19120,14 @@ __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vstrwq_scatter_base_wb_f32 (uint32x4_t * __addr, const int __offset, float32x4_t __value)
 {
-  __builtin_mve_vstrwq_scatter_base_wb_fv4sf (*__addr, __offset, __value);
-  __builtin_mve_vstrwq_scatter_base_wb_add_fv4sf (*__addr, __offset, *__addr);
+  *__addr = __builtin_mve_vstrwq_scatter_base_wb_fv4sf (*__addr, __offset, __value);
 }
 
 __extension__ extern __inline void
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vstrwq_scatter_base_wb_p_f32 (uint32x4_t * __addr, const int __offset, float32x4_t __value, mve_pred16_t __p)
 {
-  __builtin_mve_vstrwq_scatter_base_wb_p_fv4sf (*__addr, __offset, __value, __p);
-  __builtin_mve_vstrwq_scatter_base_wb_p_add_fv4sf (*__addr, __offset, *__addr, __p);
+  *__addr = __builtin_mve_vstrwq_scatter_base_wb_p_fv4sf (*__addr, __offset, __value, __p);
 }
 
 __extension__ extern __inline float16x8_t
@@ -23742,27 +23702,6 @@ __arm_vrev64q_m (uint8x16_t __inactive, uint8x16_t __a, mve_pred16_t __p)
 
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmlashq (uint8x16_t __a, uint8x16_t __b, uint8_t __c)
-{
- return __arm_vqrdmlashq_n_u8 (__a, __b, __c);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmlahq (uint8x16_t __a, uint8x16_t __b, uint8_t __c)
-{
- return __arm_vqrdmlahq_n_u8 (__a, __b, __c);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmlahq (uint8x16_t __a, uint8x16_t __b, uint8_t __c)
-{
- return __arm_vqdmlahq_n_u8 (__a, __b, __c);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmvnq_m (uint8x16_t __inactive, uint8x16_t __a, mve_pred16_t __p)
 {
  return __arm_vmvnq_m_u8 (__inactive, __a, __p);
@@ -24204,6 +24143,13 @@ __arm_vqrdmlashq (int8x16_t __a, int8x16_t __b, int8_t __c)
 
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vqdmlashq (int8x16_t __a, int8x16_t __b, int8_t __c)
+{
+ return __arm_vqdmlashq_n_s8 (__a, __b, __c);
+}
+
+__extension__ extern __inline int8x16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqrdmlahq (int8x16_t __a, int8x16_t __b, int8_t __c)
 {
  return __arm_vqrdmlahq_n_s8 (__a, __b, __c);
@@ -24337,27 +24283,6 @@ __arm_vrev64q_m (uint16x8_t __inactive, uint16x8_t __a, mve_pred16_t __p)
 
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmlashq (uint16x8_t __a, uint16x8_t __b, uint16_t __c)
-{
- return __arm_vqrdmlashq_n_u16 (__a, __b, __c);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmlahq (uint16x8_t __a, uint16x8_t __b, uint16_t __c)
-{
- return __arm_vqrdmlahq_n_u16 (__a, __b, __c);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmlahq (uint16x8_t __a, uint16x8_t __b, uint16_t __c)
-{
- return __arm_vqdmlahq_n_u16 (__a, __b, __c);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmvnq_m (uint16x8_t __inactive, uint16x8_t __a, mve_pred16_t __p)
 {
  return __arm_vmvnq_m_u16 (__inactive, __a, __p);
@@ -24799,6 +24724,13 @@ __arm_vqrdmlashq (int16x8_t __a, int16x8_t __b, int16_t __c)
 
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vqdmlashq (int16x8_t __a, int16x8_t __b, int16_t __c)
+{
+ return __arm_vqdmlashq_n_s16 (__a, __b, __c);
+}
+
+__extension__ extern __inline int16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqrdmlahq (int16x8_t __a, int16x8_t __b, int16_t __c)
 {
  return __arm_vqrdmlahq_n_s16 (__a, __b, __c);
@@ -24932,27 +24864,6 @@ __arm_vrev64q_m (uint32x4_t __inactive, uint32x4_t __a, mve_pred16_t __p)
 
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmlashq (uint32x4_t __a, uint32x4_t __b, uint32_t __c)
-{
- return __arm_vqrdmlashq_n_u32 (__a, __b, __c);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqrdmlahq (uint32x4_t __a, uint32x4_t __b, uint32_t __c)
-{
- return __arm_vqrdmlahq_n_u32 (__a, __b, __c);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vqdmlahq (uint32x4_t __a, uint32x4_t __b, uint32_t __c)
-{
- return __arm_vqdmlahq_n_u32 (__a, __b, __c);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmvnq_m (uint32x4_t __inactive, uint32x4_t __a, mve_pred16_t __p)
 {
  return __arm_vmvnq_m_u32 (__inactive, __a, __p);
@@ -25394,6 +25305,13 @@ __arm_vqrdmlashq (int32x4_t __a, int32x4_t __b, int32_t __c)
 
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vqdmlashq (int32x4_t __a, int32x4_t __b, int32_t __c)
+{
+ return __arm_vqdmlashq_n_s32 (__a, __b, __c);
+}
+
+__extension__ extern __inline int32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqrdmlahq (int32x4_t __a, int32x4_t __b, int32_t __c)
 {
  return __arm_vqrdmlahq_n_s32 (__a, __b, __c);
@@ -27921,6 +27839,27 @@ __arm_vqrdmlashq_m (int16x8_t __a, int16x8_t __b, int16_t __c, mve_pred16_t __p)
 
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vqdmlashq_m (int8x16_t __a, int8x16_t __b, int8_t __c, mve_pred16_t __p)
+{
+ return __arm_vqdmlashq_m_n_s8 (__a, __b, __c, __p);
+}
+
+__extension__ extern __inline int16x8_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vqdmlashq_m (int16x8_t __a, int16x8_t __b, int16_t __c, mve_pred16_t __p)
+{
+ return __arm_vqdmlashq_m_n_s16 (__a, __b, __c, __p);
+}
+
+__extension__ extern __inline int32x4_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__arm_vqdmlashq_m (int32x4_t __a, int32x4_t __b, int32_t __c, mve_pred16_t __p)
+{
+ return __arm_vqdmlashq_m_n_s32 (__a, __b, __c, __p);
+}
+
+__extension__ extern __inline int8x16_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vqrdmlsdhq_m (int8x16_t __inactive, int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
 {
  return __arm_vqrdmlsdhq_m_s8 (__inactive, __a, __b, __p);
@@ -28591,20 +28530,6 @@ __arm_vmlaldavaxq_p (int64_t __a, int16x8_t __b, int16x8_t __c, mve_pred16_t __p
  return __arm_vmlaldavaxq_p_s16 (__a, __b, __c, __p);
 }
 
-__extension__ extern __inline uint64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlaldavaxq_p (uint64_t __a, uint32x4_t __b, uint32x4_t __c, mve_pred16_t __p)
-{
- return __arm_vmlaldavaxq_p_u32 (__a, __b, __c, __p);
-}
-
-__extension__ extern __inline uint64_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vmlaldavaxq_p (uint64_t __a, uint16x8_t __b, uint16x8_t __c, mve_pred16_t __p)
-{
- return __arm_vmlaldavaxq_p_u16 (__a, __b, __c, __p);
-}
-
 __extension__ extern __inline int64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vmlsldavaq_p (int64_t __a, int32x4_t __b, int32x4_t __c, mve_pred16_t __p)
@@ -35651,6 +35576,7 @@ enum {
 	short: __ARM_mve_type_int_n, \
 	int: __ARM_mve_type_int_n, \
 	long: __ARM_mve_type_int_n, \
+	double: __ARM_mve_type_fp_n, \
 	long long: __ARM_mve_type_int_n, \
 	unsigned char: __ARM_mve_type_int_n, \
 	unsigned short: __ARM_mve_type_int_n, \
@@ -35723,6 +35649,8 @@ extern void *__ARM_undef;
     _Generic(param, type: param, default: *(type *)__ARM_undef)
 #define __ARM_mve_coerce1(param, type) \
     _Generic(param, type: param, const type: param, default: *(type *)__ARM_undef)
+#define __ARM_mve_coerce2(param, type) \
+    _Generic(param, type: param, float16_t: param, float32_t: param, default: *(type *)__ARM_undef)
 
 #if (__ARM_FEATURE_MVE & 2) /* MVE Floating point.  */
 
@@ -35939,14 +35867,14 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vaddq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t)), \
   int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vaddq_f16 (__ARM_mve_coerce(p0, float16x8_t), __ARM_mve_coerce(p1, float16x8_t)), \
   int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vaddq_f32 (__ARM_mve_coerce(p0, float32x4_t), __ARM_mve_coerce(p1, float32x4_t)), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vaddq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8_t)), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vaddq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16_t)), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vaddq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32_t)), \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vaddq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8_t)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vaddq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16_t)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vaddq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32_t)), \
-  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vaddq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16_t)), \
-  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vaddq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32_t)));})
+  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vaddq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, int)), \
+  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vaddq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, int)), \
+  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vaddq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, int)), \
+  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vaddq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int)), \
+  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vaddq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int)), \
+  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vaddq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int)), \
+  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vaddq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce2(__p1, double)), \
+  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vaddq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce2(__p1, double)));})
 
 #define __arm_vandq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
@@ -35997,8 +35925,8 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vmulq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8_t)), \
   int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vmulq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16_t)), \
   int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vmulq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32_t)), \
-  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vmulq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16_t)), \
-  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vmulq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32_t)), \
+  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vmulq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce2(__p1, double)), \
+  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vmulq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce2(__p1, double)), \
   int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vmulq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \
   int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmulq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \
   int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmulq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \
@@ -36029,8 +35957,8 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vcmpeqq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8_t)), \
   int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vcmpeqq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16_t)), \
   int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vcmpeqq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32_t)), \
-  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vcmpeqq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16_t)), \
-  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vcmpeqq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32_t)), \
+  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vcmpeqq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce2(__p1, double)), \
+  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vcmpeqq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce2(__p1, double)), \
   int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vcmpeqq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \
   int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vcmpeqq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \
   int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vcmpeqq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \
@@ -36069,8 +35997,8 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vcmpeqq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32_t), p2), \
   int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcmpeqq_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), p2), \
   int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcmpeqq_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), p2), \
-  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vcmpeqq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16_t), p2), \
-  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vcmpeqq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32_t), p2));})
+  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vcmpeqq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce2(__p1, double), p2), \
+  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vcmpeqq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce2(__p1, double), p2));})
 
 #define __arm_vcmpgtq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
@@ -36083,8 +36011,8 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vcmpgtq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32_t)), \
   int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcmpgtq_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t)), \
   int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcmpgtq_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t)), \
-  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vcmpgtq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16_t)), \
-  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vcmpgtq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32_t)));})
+  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vcmpgtq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce2(__p1, double)), \
+  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vcmpgtq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce2(__p1, double)));})
 
 #define __arm_vcmpleq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
@@ -36097,8 +36025,8 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vcmpleq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8_t)), \
   int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vcmpleq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16_t)), \
   int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vcmpleq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32_t)), \
-  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vcmpleq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16_t)), \
-  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vcmpleq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32_t)));})
+  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vcmpleq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce2(__p1, double)), \
+  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vcmpleq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce2(__p1, double)));})
 
 #define __arm_vcmpltq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
@@ -36111,8 +36039,8 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vcmpltq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32_t)), \
   int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcmpltq_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t)), \
   int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcmpltq_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t)), \
-  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vcmpltq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16_t)), \
-  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vcmpltq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32_t)));})
+  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vcmpltq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce2(__p1, double)), \
+  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vcmpltq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce2(__p1, double)));})
 
 #define __arm_vcmpneq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
@@ -36123,8 +36051,8 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vcmpneq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8_t)), \
   int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vcmpneq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16_t)), \
   int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vcmpneq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32_t)), \
-  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vcmpneq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16_t)), \
-  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vcmpneq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32_t)), \
+  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vcmpneq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce2(__p1, double)), \
+  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vcmpneq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce2(__p1, double)), \
   int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vcmpneq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \
   int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vcmpneq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \
   int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vcmpneq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \
@@ -36179,8 +36107,8 @@ extern void *__ARM_undef;
 #define __arm_vmaxnmavq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float16x8_t]: __arm_vmaxnmavq_f16 (__ARM_mve_coerce(__p0, float16_t), __ARM_mve_coerce(__p1, float16x8_t)), \
-  int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float32x4_t]: __arm_vmaxnmavq_f32 (__ARM_mve_coerce(__p0, float32_t), __ARM_mve_coerce(__p1, float32x4_t)));})
+  int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float16x8_t]: __arm_vmaxnmavq_f16 (__ARM_mve_coerce2(__p0, double), __ARM_mve_coerce(__p1, float16x8_t)), \
+  int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float32x4_t]: __arm_vmaxnmavq_f32 (__ARM_mve_coerce2(__p0, double), __ARM_mve_coerce(__p1, float32x4_t)));})
 
 #define __arm_vmaxnmq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
@@ -36191,14 +36119,14 @@ extern void *__ARM_undef;
 #define __arm_vmaxnmvq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float16x8_t]: __arm_vmaxnmvq_f16 (__ARM_mve_coerce(__p0, float16_t), __ARM_mve_coerce(__p1, float16x8_t)), \
-  int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float32x4_t]: __arm_vmaxnmvq_f32 (__ARM_mve_coerce(__p0, float32_t), __ARM_mve_coerce(__p1, float32x4_t)));})
+  int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float16x8_t]: __arm_vmaxnmvq_f16 (__ARM_mve_coerce2(__p0, double), __ARM_mve_coerce(__p1, float16x8_t)), \
+  int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float32x4_t]: __arm_vmaxnmvq_f32 (__ARM_mve_coerce2(__p0, double), __ARM_mve_coerce(__p1, float32x4_t)));})
 
 #define __arm_vmaxnmvq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float16x8_t]: __arm_vmaxnmvq_f16 (__ARM_mve_coerce(__p0, float16_t), __ARM_mve_coerce(__p1, float16x8_t)), \
-  int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float32x4_t]: __arm_vmaxnmvq_f32 (__ARM_mve_coerce(__p0, float32_t), __ARM_mve_coerce(__p1, float32x4_t)));})
+  int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float16x8_t]: __arm_vmaxnmvq_f16 (__ARM_mve_coerce2(__p0, double), __ARM_mve_coerce(__p1, float16x8_t)), \
+  int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float32x4_t]: __arm_vmaxnmvq_f32 (__ARM_mve_coerce2(__p0, double), __ARM_mve_coerce(__p1, float32x4_t)));})
 
 #define __arm_vminnmaq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
@@ -36209,8 +36137,8 @@ extern void *__ARM_undef;
 #define __arm_vminnmavq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float16x8_t]: __arm_vminnmavq_f16 (__ARM_mve_coerce(__p0, float16_t), __ARM_mve_coerce(__p1, float16x8_t)), \
-  int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float32x4_t]: __arm_vminnmavq_f32 (__ARM_mve_coerce(__p0, float32_t), __ARM_mve_coerce(__p1, float32x4_t)));})
+  int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float16x8_t]: __arm_vminnmavq_f16 (__ARM_mve_coerce2(__p0, double), __ARM_mve_coerce(__p1, float16x8_t)), \
+  int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float32x4_t]: __arm_vminnmavq_f32 (__ARM_mve_coerce2(__p0, double), __ARM_mve_coerce(__p1, float32x4_t)));})
 
 #define __arm_vbrsrq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
@@ -36232,8 +36160,8 @@ extern void *__ARM_undef;
 #define __arm_vsubq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vsubq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16_t)), \
-  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vsubq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32_t)), \
+  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vsubq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce2(__p1, double)), \
+  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vsubq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce2(__p1, double)), \
   int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vsubq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8_t)), \
   int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vsubq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16_t)), \
   int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vsubq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32_t)), \
@@ -36252,8 +36180,8 @@ extern void *__ARM_undef;
 #define __arm_vminnmvq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float16x8_t]: __arm_vminnmvq_f16 (__ARM_mve_coerce(__p0, float16_t), __ARM_mve_coerce(__p1, float16x8_t)), \
-  int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float32x4_t]: __arm_vminnmvq_f32 (__ARM_mve_coerce(__p0, float32_t), __ARM_mve_coerce(__p1, float32x4_t)));})
+  int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float16x8_t]: __arm_vminnmvq_f16 (__ARM_mve_coerce2(__p0, double), __ARM_mve_coerce(__p1, float16x8_t)), \
+  int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float32x4_t]: __arm_vminnmvq_f32 (__ARM_mve_coerce2(__p0, double), __ARM_mve_coerce(__p1, float32x4_t)));})
 
 #define __arm_vshlq_r(p0,p1) ({ __typeof(p0) __p0 = (p0); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
@@ -36782,10 +36710,15 @@ extern void *__ARM_undef;
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
   int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vqrdmlashq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8_t)), \
   int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vqrdmlashq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vqrdmlashq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t)), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vqrdmlashq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8_t)), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vqrdmlashq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16_t)), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vqrdmlashq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32_t)));})
+	    int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vqrdmlashq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t)));})
+
+#define __arm_vqdmlashq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
+  __typeof(p1) __p1 = (p1); \
+  __typeof(p2) __p2 = (p2); \
+  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
+  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vqdmlashq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8_t)), \
+  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vqdmlashq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t)), \
+	    int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vqdmlashq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t)));})
 
 #define __arm_vqrdmlahq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
@@ -36793,10 +36726,7 @@ extern void *__ARM_undef;
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
   int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vqrdmlahq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8_t)), \
   int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vqrdmlahq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vqrdmlahq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t)), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vqrdmlahq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8_t)), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vqrdmlahq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16_t)), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vqrdmlahq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32_t)));})
+  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vqrdmlahq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t)));})
 
 #define __arm_vmlasq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
@@ -36815,10 +36745,7 @@ extern void *__ARM_undef;
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
   int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vqdmlahq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8_t)), \
   int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vqdmlahq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vqdmlahq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t)), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vqdmlahq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8_t)), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vqdmlahq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16_t)), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vqdmlahq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32_t)));})
+  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vqdmlahq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t)));})
 
 #define __arm_vqrdmladhxq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
@@ -37011,8 +36938,8 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vcmpgtq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8_t), p2), \
   int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vcmpgtq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16_t), p2), \
   int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vcmpgtq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32_t), p2), \
-  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vcmpgtq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16_t), p2), \
-  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vcmpgtq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32_t), p2), \
+  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vcmpgtq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce2(__p1, double), p2), \
+  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vcmpgtq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce2(__p1, double), p2), \
   int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcmpgtq_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), p2), \
   int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcmpgtq_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), p2));})
 
@@ -37027,8 +36954,8 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vcmpleq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8_t), p2), \
   int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vcmpleq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16_t), p2), \
   int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vcmpleq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32_t), p2), \
-  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vcmpleq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16_t), p2), \
-  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vcmpleq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32_t), p2));})
+  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vcmpleq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce2(__p1, double), p2), \
+  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vcmpleq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce2(__p1, double), p2));})
 
 #define __arm_vcmpltq_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
@@ -37041,8 +36968,8 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vcmpltq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8_t), p2), \
   int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vcmpltq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16_t), p2), \
   int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vcmpltq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32_t), p2), \
-  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vcmpltq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16_t), p2), \
-  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vcmpltq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32_t), p2));})
+  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vcmpltq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce2(__p1, double), p2), \
+  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vcmpltq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce2(__p1, double), p2));})
 
 #define __arm_vcmpneq_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
@@ -37061,8 +36988,8 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vcmpneq_m_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8_t), p2), \
   int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vcmpneq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16_t), p2), \
   int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vcmpneq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32_t), p2), \
-  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vcmpneq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16_t), p2), \
-  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vcmpneq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32_t), p2));})
+  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vcmpneq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce2(__p1, double), p2), \
+  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vcmpneq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce2(__p1, double), p2));})
 
 #define __arm_vcvtbq_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
@@ -37116,8 +37043,8 @@ extern void *__ARM_undef;
   __typeof(p1) __p1 = (p1); \
   __typeof(p2) __p2 = (p2); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vfmaq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16_t)), \
-  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vfmaq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32_t)), \
+  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vfmaq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce2(__p2, double)), \
+  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vfmaq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce2(__p2, double)), \
   int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vfmaq_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t)), \
   int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vfmaq_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t)));})
 
@@ -37132,8 +37059,8 @@ extern void *__ARM_undef;
   __typeof(p1) __p1 = (p1); \
   __typeof(p2) __p2 = (p2); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vfmasq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16_t)), \
-  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vfmasq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32_t)));})
+  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vfmasq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce2(__p2, double)), \
+  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vfmasq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce2(__p2, double)));})
 
 #define __arm_vmaxnmaq_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
@@ -37156,14 +37083,14 @@ extern void *__ARM_undef;
 #define __arm_vmaxnmavq_p(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float16x8_t]: __arm_vmaxnmavq_p_f16 (__ARM_mve_coerce(__p0, float16_t), __ARM_mve_coerce(__p1, float16x8_t), p2), \
-  int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float32x4_t]: __arm_vmaxnmavq_p_f32 (__ARM_mve_coerce(__p0, float32_t), __ARM_mve_coerce(__p1, float32x4_t), p2));})
+  int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float16x8_t]: __arm_vmaxnmavq_p_f16 (__ARM_mve_coerce2(__p0, double), __ARM_mve_coerce(__p1, float16x8_t), p2), \
+  int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float32x4_t]: __arm_vmaxnmavq_p_f32 (__ARM_mve_coerce2(__p0, double), __ARM_mve_coerce(__p1, float32x4_t), p2));})
 
 #define __arm_vmaxnmvq_p(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float16x8_t]: __arm_vmaxnmvq_p_f16 (__ARM_mve_coerce(__p0, float16_t), __ARM_mve_coerce(__p1, float16x8_t), p2), \
-  int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float32x4_t]: __arm_vmaxnmvq_p_f32 (__ARM_mve_coerce(__p0, float32_t), __ARM_mve_coerce(__p1, float32x4_t), p2));})
+  int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float16x8_t]: __arm_vmaxnmvq_p_f16 (__ARM_mve_coerce2(__p0, double), __ARM_mve_coerce(__p1, float16x8_t), p2), \
+  int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float32x4_t]: __arm_vmaxnmvq_p_f32 (__ARM_mve_coerce2(__p0, double), __ARM_mve_coerce(__p1, float32x4_t), p2));})
 
 #define __arm_vminnmaq_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
@@ -37174,14 +37101,14 @@ extern void *__ARM_undef;
 #define __arm_vminnmavq_p(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float16x8_t]: __arm_vminnmavq_p_f16 (__ARM_mve_coerce(__p0, float16_t), __ARM_mve_coerce(__p1, float16x8_t), p2), \
-  int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float32x4_t]: __arm_vminnmavq_p_f32 (__ARM_mve_coerce(__p0, float32_t), __ARM_mve_coerce(__p1, float32x4_t), p2));})
+  int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float16x8_t]: __arm_vminnmavq_p_f16 (__ARM_mve_coerce2(__p0, double), __ARM_mve_coerce(__p1, float16x8_t), p2), \
+  int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float32x4_t]: __arm_vminnmavq_p_f32 (__ARM_mve_coerce2(__p0, double), __ARM_mve_coerce(__p1, float32x4_t), p2));})
 
 #define __arm_vminnmvq_p(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float16x8_t]: __arm_vminnmvq_p_f16 (__ARM_mve_coerce(__p0, float16_t), __ARM_mve_coerce(__p1, float16x8_t), p2), \
-  int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float32x4_t]: __arm_vminnmvq_p_f32 (__ARM_mve_coerce(__p0, float32_t), __ARM_mve_coerce(__p1, float32x4_t), p2));})
+  int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float16x8_t]: __arm_vminnmvq_p_f16 (__ARM_mve_coerce2(__p0, double), __ARM_mve_coerce(__p1, float16x8_t), p2), \
+  int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float32x4_t]: __arm_vminnmvq_p_f32 (__ARM_mve_coerce2(__p0, double), __ARM_mve_coerce(__p1, float32x4_t), p2));})
 
 #define __arm_vrndnq_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
@@ -37248,8 +37175,8 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vcmpgeq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32_t)), \
   int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcmpgeq_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t)), \
   int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcmpgeq_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t)), \
-  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vcmpgeq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16_t)), \
-  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vcmpgeq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32_t)));})
+  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vcmpgeq_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce2(__p1, double)), \
+  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vcmpgeq_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce2(__p1, double)));})
 
 #define __arm_vrshrnbq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
@@ -37353,8 +37280,8 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vcmpgeq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8_t), p2), \
   int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vcmpgeq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16_t), p2), \
   int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vcmpgeq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32_t), p2), \
-  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vcmpgeq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16_t), p2), \
-  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vcmpgeq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32_t), p2), \
+  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vcmpgeq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce2(__p1, double), p2), \
+  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vcmpgeq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce2(__p1, double), p2), \
   int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcmpgeq_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), p2), \
   int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcmpgeq_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), p2));})
 
@@ -37389,8 +37316,8 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vaddq_m_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, int), p3), \
   int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vaddq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, int), p3), \
   int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vaddq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, int), p3), \
-  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vaddq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16_t), p3), \
-  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vaddq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32_t), p3));})
+  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vaddq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce2(__p2, double), p3), \
+  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vaddq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce2(__p2, double), p3));})
 
 #define __arm_vandq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
@@ -37531,15 +37458,15 @@ extern void *__ARM_undef;
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
   int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vfmaq_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3), \
   int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vfmaq_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t), p3), \
-  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vfmaq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16_t), p3), \
-  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vfmaq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32_t), p3));})
+  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vfmaq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce2(__p2, double), p3), \
+  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vfmaq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce2(__p2, double), p3));})
 
 #define __arm_vfmasq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   __typeof(p2) __p2 = (p2); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vfmasq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16_t), p3), \
-  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vfmasq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32_t), p3));})
+  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vfmasq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce2(__p2, double), p3), \
+  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vfmasq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce2(__p2, double), p3));})
 
 #define __arm_vfmsq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
@@ -37580,8 +37507,8 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vmulq_m_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8_t), p3), \
   int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vmulq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16_t), p3), \
   int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vmulq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32_t), p3), \
-  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vmulq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16_t), p3), \
-  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vmulq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32_t), p3));})
+  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vmulq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce2(__p2, double), p3), \
+  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vmulq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce2(__p2, double), p3));})
 
 #define __arm_vornq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
@@ -37614,8 +37541,8 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vsubq_m_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8_t), p3), \
   int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vsubq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16_t), p3), \
   int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vsubq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32_t), p3), \
-  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vsubq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16_t), p3), \
-  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vsubq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32_t), p3));})
+  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vsubq_m_n_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce2(__p2, double), p3), \
+  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vsubq_m_n_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce2(__p2, double), p3));})
 
 #define __arm_vorrq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
@@ -38113,8 +38040,8 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vaddq_x_n_u32 (__ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32_t), p3), \
   int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vaddq_x_f16 (__ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3), \
   int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vaddq_x_f32 (__ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t), p3), \
-  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vaddq_x_n_f16 (__ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16_t), p3), \
-  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vaddq_x_n_f32 (__ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32_t), p3));})
+  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vaddq_x_n_f16 (__ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce2(__p2, double), p3), \
+  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vaddq_x_n_f32 (__ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce2(__p2, double), p3));})
 
 #define __arm_vandq_x(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
   __typeof(p2) __p2 = (p2); \
@@ -38248,8 +38175,8 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vmulq_x_n_u32 (__ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32_t), p3), \
   int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vmulq_x_f16 (__ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3), \
   int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vmulq_x_f32 (__ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t), p3), \
-  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vmulq_x_n_f16 (__ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16_t), p3), \
-  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vmulq_x_n_f32 (__ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32_t), p3));})
+  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vmulq_x_n_f16 (__ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce2(__p2, double), p3), \
+  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vmulq_x_n_f32 (__ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce2(__p2, double), p3));})
 
 #define __arm_vnegq_x(p1,p2) ({ __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p1)])0, \
@@ -38337,8 +38264,8 @@ extern void *__ARM_undef;
   _Generic( (int (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
   int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vsubq_x_f16 (__ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3), \
   int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vsubq_x_f32 (__ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t), p3), \
-  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vsubq_x_n_f16 (__ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16_t), p3), \
-  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vsubq_x_n_f32 (__ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32_t), p3));})
+  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_fp_n]: __arm_vsubq_x_n_f16 (__ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce2(__p2, double), p3), \
+  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_fp_n]: __arm_vsubq_x_n_f32 (__ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce2(__p2, double), p3));})
 
 #define __arm_vcmulq_rot90_x(p1,p2,p3)  ({ __typeof(p1) __p1 = (p1); \
   __typeof(p2) __p2 = (p2); \
@@ -38370,8 +38297,8 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint16x8_t]: __arm_vsetq_lane_u16 (__ARM_mve_coerce(__p0, uint16_t), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
   int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint32x4_t]: __arm_vsetq_lane_u32 (__ARM_mve_coerce(__p0, uint32_t), __ARM_mve_coerce(__p1, uint32x4_t), p2), \
   int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint64x2_t]: __arm_vsetq_lane_u64 (__ARM_mve_coerce(__p0, uint64_t), __ARM_mve_coerce(__p1, uint64x2_t), p2), \
-  int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float16x8_t]: __arm_vsetq_lane_f16 (__ARM_mve_coerce(__p0, float16_t), __ARM_mve_coerce(__p1, float16x8_t), p2), \
-  int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float32x4_t]: __arm_vsetq_lane_f32 (__ARM_mve_coerce(__p0, float32_t), __ARM_mve_coerce(__p1, float32x4_t), p2));})
+  int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float16x8_t]: __arm_vsetq_lane_f16 (__ARM_mve_coerce2(__p0, double), __ARM_mve_coerce(__p1, float16x8_t), p2), \
+  int (*)[__ARM_mve_type_fp_n][__ARM_mve_type_float32x4_t]: __arm_vsetq_lane_f32 (__ARM_mve_coerce2(__p0, double), __ARM_mve_coerce(__p1, float32x4_t), p2));})
 
 #else /* MVE Integer.  */
 
@@ -38895,12 +38822,12 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vaddq_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t)), \
   int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vaddq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t)), \
   int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vaddq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t)), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vaddq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8_t)), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vaddq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16_t)), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vaddq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32_t)), \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vaddq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8_t)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vaddq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16_t)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vaddq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32_t)));})
+  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vaddq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, int)), \
+  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vaddq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, int)), \
+  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vaddq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, int)), \
+  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vaddq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int)), \
+  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vaddq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int)), \
+  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vaddq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int)));})
 
 #define __arm_vandq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
@@ -39254,10 +39181,15 @@ extern void *__ARM_undef;
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
   int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vqrdmlashq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8_t)), \
   int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vqrdmlashq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vqrdmlashq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t)), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vqrdmlashq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8_t)), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vqrdmlashq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16_t)), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vqrdmlashq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32_t)));})
+  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vqrdmlashq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t)));})
+
+#define __arm_vqdmlashq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
+  __typeof(p1) __p1 = (p1); \
+  __typeof(p2) __p2 = (p2); \
+  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
+  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vqdmlashq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8_t)), \
+  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vqdmlashq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t)), \
+  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vqdmlashq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t)));})
 
 #define __arm_vqrdmlahq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
@@ -39265,10 +39197,7 @@ extern void *__ARM_undef;
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
   int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vqrdmlahq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8_t)), \
   int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vqrdmlahq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vqrdmlahq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t)), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vqrdmlahq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8_t)), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vqrdmlahq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16_t)), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vqrdmlahq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32_t)));})
+  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vqrdmlahq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t)));})
 
 #define __arm_vqrdmladhxq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
@@ -39399,10 +39328,7 @@ extern void *__ARM_undef;
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
   int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vqdmlahq_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8_t)), \
   int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vqdmlahq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vqdmlahq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t)), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vqdmlahq_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8_t)), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vqdmlahq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16_t)), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vqdmlahq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32_t)));})
+  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vqdmlahq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t)));})
 
 #define __arm_vqdmlsdhq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
@@ -40800,6 +40726,14 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vqrdmlashq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t), p3), \
   int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vqrdmlashq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t), p3));})
 
+#define __arm_vqdmlashq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
+  __typeof(p1) __p1 = (p1); \
+  __typeof(p2) __p2 = (p2); \
+  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
+  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int_n]: __arm_vqdmlashq_m_n_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8_t), p3), \
+  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vqdmlashq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16_t), p3), \
+  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vqdmlashq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32_t), p3));})
+
 #define __arm_vqrshlq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   __typeof(p2) __p2 = (p2); \
@@ -41057,9 +40991,7 @@ extern void *__ARM_undef;
   __typeof(p2) __p2 = (p2); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
   int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vmlaldavaxq_p_s16 (__ARM_mve_coerce(__p0, int64_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlaldavaxq_p_s32 (__ARM_mve_coerce(__p0, int64_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vmlaldavaxq_p_u16 (__ARM_mve_coerce(__p0, uint64_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vmlaldavaxq_p_u32 (__ARM_mve_coerce(__p0, uint64_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));})
+  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vmlaldavaxq_p_s32 (__ARM_mve_coerce(__p0, int64_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3));})
 
 #define __arm_vmlsldavaq_p(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
@@ -41679,16 +41611,16 @@ extern void *__ARM_undef;
 #define __arm_vmaxavq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int8x16_t]: __arm_vmaxavq_s8 (__ARM_mve_coerce(__p0, uint8_t), __ARM_mve_coerce(__p1, int8x16_t)), \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int16x8_t]: __arm_vmaxavq_s16 (__ARM_mve_coerce(__p0, uint16_t), __ARM_mve_coerce(__p1, int16x8_t)), \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t]: __arm_vmaxavq_s32 (__ARM_mve_coerce(__p0, uint32_t), __ARM_mve_coerce(__p1, int32x4_t)));})
+  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int8x16_t]: __arm_vmaxavq_s8 (__p0, __ARM_mve_coerce(__p1, int8x16_t)), \
+  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int16x8_t]: __arm_vmaxavq_s16 (__p0, __ARM_mve_coerce(__p1, int16x8_t)), \
+  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t]: __arm_vmaxavq_s32 (__p0, __ARM_mve_coerce(__p1, int32x4_t)));})
 
 #define __arm_vmaxavq_p(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int8x16_t]: __arm_vmaxavq_p_s8 (__ARM_mve_coerce(__p0, uint8_t), __ARM_mve_coerce(__p1, int8x16_t), p2), \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int16x8_t]: __arm_vmaxavq_p_s16 (__ARM_mve_coerce(__p0, uint16_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t]: __arm_vmaxavq_p_s32 (__ARM_mve_coerce(__p0, uint32_t), __ARM_mve_coerce(__p1, int32x4_t), p2));})
+  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int8x16_t]: __arm_vmaxavq_p_s8 (__p0, __ARM_mve_coerce(__p1, int8x16_t), p2), \
+  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int16x8_t]: __arm_vmaxavq_p_s16 (__p0, __ARM_mve_coerce(__p1, int16x8_t), p2), \
+  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t]: __arm_vmaxavq_p_s32 (__p0, __ARM_mve_coerce(__p1, int32x4_t), p2));})
 
 #define __arm_vmaxq_x(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
   __typeof(p2) __p2 = (p2); \
@@ -41703,36 +41635,36 @@ extern void *__ARM_undef;
 #define __arm_vmaxvq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int8x16_t]: __arm_vmaxvq_s8 (__ARM_mve_coerce(__p0, int8_t), __ARM_mve_coerce(__p1, int8x16_t)), \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int16x8_t]: __arm_vmaxvq_s16 (__ARM_mve_coerce(__p0, int16_t), __ARM_mve_coerce(__p1, int16x8_t)), \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t]: __arm_vmaxvq_s32 (__ARM_mve_coerce(__p0, int32_t), __ARM_mve_coerce(__p1, int32x4_t)), \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint8x16_t]: __arm_vmaxvq_u8 (__ARM_mve_coerce(__p0, uint8_t), __ARM_mve_coerce(__p1, uint8x16_t)), \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint16x8_t]: __arm_vmaxvq_u16 (__ARM_mve_coerce(__p0, uint16_t), __ARM_mve_coerce(__p1, uint16x8_t)), \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint32x4_t]: __arm_vmaxvq_u32 (__ARM_mve_coerce(__p0, uint32_t), __ARM_mve_coerce(__p1, uint32x4_t)));})
+  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int8x16_t]: __arm_vmaxvq_s8 (__p0, __ARM_mve_coerce(__p1, int8x16_t)), \
+  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int16x8_t]: __arm_vmaxvq_s16 (__p0, __ARM_mve_coerce(__p1, int16x8_t)), \
+  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t]: __arm_vmaxvq_s32 (__p0, __ARM_mve_coerce(__p1, int32x4_t)), \
+  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint8x16_t]: __arm_vmaxvq_u8 (__p0, __ARM_mve_coerce(__p1, uint8x16_t)), \
+  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint16x8_t]: __arm_vmaxvq_u16 (__p0, __ARM_mve_coerce(__p1, uint16x8_t)), \
+  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint32x4_t]: __arm_vmaxvq_u32 (__p0,__ARM_mve_coerce(__p1, uint32x4_t)));})
 
 #define __arm_vmaxvq_p(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int8x16_t]: __arm_vmaxvq_p_s8 (__ARM_mve_coerce(__p0, int8_t), __ARM_mve_coerce(__p1, int8x16_t), p2), \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int16x8_t]: __arm_vmaxvq_p_s16 (__ARM_mve_coerce(__p0, int16_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t]: __arm_vmaxvq_p_s32 (__ARM_mve_coerce(__p0, int32_t), __ARM_mve_coerce(__p1, int32x4_t), p2), \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint8x16_t]: __arm_vmaxvq_p_u8 (__ARM_mve_coerce(__p0, uint8_t), __ARM_mve_coerce(__p1, uint8x16_t), p2), \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint16x8_t]: __arm_vmaxvq_p_u16 (__ARM_mve_coerce(__p0, uint16_t), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint32x4_t]: __arm_vmaxvq_p_u32 (__ARM_mve_coerce(__p0, uint32_t), __ARM_mve_coerce(__p1, uint32x4_t), p2));})
+  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int8x16_t]: __arm_vmaxvq_p_s8 (__p0, __ARM_mve_coerce(__p1, int8x16_t), p2), \
+  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int16x8_t]: __arm_vmaxvq_p_s16 (__p0, __ARM_mve_coerce(__p1, int16x8_t), p2), \
+  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t]: __arm_vmaxvq_p_s32 (__p0, __ARM_mve_coerce(__p1, int32x4_t), p2), \
+  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint8x16_t]: __arm_vmaxvq_p_u8 (__p0, __ARM_mve_coerce(__p1, uint8x16_t), p2), \
+  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint16x8_t]: __arm_vmaxvq_p_u16 (__p0, __ARM_mve_coerce(__p1, uint16x8_t), p2), \
+  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint32x4_t]: __arm_vmaxvq_p_u32 (__p0, __ARM_mve_coerce(__p1, uint32x4_t), p2));})
 
 #define __arm_vminavq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int8x16_t]: __arm_vminavq_s8 (__ARM_mve_coerce(__p0, uint8_t), __ARM_mve_coerce(__p1, int8x16_t)), \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int16x8_t]: __arm_vminavq_s16 (__ARM_mve_coerce(__p0, uint16_t), __ARM_mve_coerce(__p1, int16x8_t)), \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t]: __arm_vminavq_s32 (__ARM_mve_coerce(__p0, uint32_t), __ARM_mve_coerce(__p1, int32x4_t)));})
+  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int8x16_t]: __arm_vminavq_s8 (__p0, __ARM_mve_coerce(__p1, int8x16_t)), \
+  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int16x8_t]: __arm_vminavq_s16 (__p0, __ARM_mve_coerce(__p1, int16x8_t)), \
+  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t]: __arm_vminavq_s32 (__p0, __ARM_mve_coerce(__p1, int32x4_t)));})
 
 #define __arm_vminavq_p(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int8x16_t]: __arm_vminavq_p_s8 (__ARM_mve_coerce(__p0, uint8_t), __ARM_mve_coerce(__p1, int8x16_t), p2), \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int16x8_t]: __arm_vminavq_p_s16 (__ARM_mve_coerce(__p0, uint16_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t]: __arm_vminavq_p_s32 (__ARM_mve_coerce(__p0, uint32_t), __ARM_mve_coerce(__p1, int32x4_t), p2));})
+  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int8x16_t]: __arm_vminavq_p_s8 (__p0, __ARM_mve_coerce(__p1, int8x16_t), p2), \
+  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int16x8_t]: __arm_vminavq_p_s16 (__p0, __ARM_mve_coerce(__p1, int16x8_t), p2), \
+  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t]: __arm_vminavq_p_s32 (__p0, __ARM_mve_coerce(__p1, int32x4_t), p2));})
 
 #define __arm_vminq_x(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
   __typeof(p2) __p2 = (p2); \
@@ -41747,22 +41679,22 @@ extern void *__ARM_undef;
 #define __arm_vminvq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int8x16_t]: __arm_vminvq_s8 (__ARM_mve_coerce(__p0, int8_t), __ARM_mve_coerce(__p1, int8x16_t)), \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int16x8_t]: __arm_vminvq_s16 (__ARM_mve_coerce(__p0, int16_t), __ARM_mve_coerce(__p1, int16x8_t)), \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t]: __arm_vminvq_s32 (__ARM_mve_coerce(__p0, int32_t), __ARM_mve_coerce(__p1, int32x4_t)), \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint8x16_t]: __arm_vminvq_u8 (__ARM_mve_coerce(__p0, uint8_t), __ARM_mve_coerce(__p1, uint8x16_t)), \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint16x8_t]: __arm_vminvq_u16 (__ARM_mve_coerce(__p0, uint16_t), __ARM_mve_coerce(__p1, uint16x8_t)), \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint32x4_t]: __arm_vminvq_u32 (__ARM_mve_coerce(__p0, uint32_t), __ARM_mve_coerce(__p1, uint32x4_t)));})
+  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int8x16_t]: __arm_vminvq_s8 (__p0, __ARM_mve_coerce(__p1, int8x16_t)), \
+  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int16x8_t]: __arm_vminvq_s16 (__p0, __ARM_mve_coerce(__p1, int16x8_t)), \
+  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t]: __arm_vminvq_s32 (__p0, __ARM_mve_coerce(__p1, int32x4_t)), \
+  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint8x16_t]: __arm_vminvq_u8 (__p0, __ARM_mve_coerce(__p1, uint8x16_t)), \
+  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint16x8_t]: __arm_vminvq_u16 (__p0, __ARM_mve_coerce(__p1, uint16x8_t)), \
+  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint32x4_t]: __arm_vminvq_u32 (__p0, __ARM_mve_coerce(__p1, uint32x4_t)));})
 
 #define __arm_vminvq_p(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int8x16_t]: __arm_vminvq_p_s8 (__ARM_mve_coerce(__p0, int8_t), __ARM_mve_coerce(__p1, int8x16_t), p2), \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int16x8_t]: __arm_vminvq_p_s16 (__ARM_mve_coerce(__p0, int16_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t]: __arm_vminvq_p_s32 (__ARM_mve_coerce(__p0, int32_t), __ARM_mve_coerce(__p1, int32x4_t), p2), \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint8x16_t]: __arm_vminvq_p_u8 (__ARM_mve_coerce(__p0, uint8_t), __ARM_mve_coerce(__p1, uint8x16_t), p2), \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint16x8_t]: __arm_vminvq_p_u16 (__ARM_mve_coerce(__p0, uint16_t), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
-  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint32x4_t]: __arm_vminvq_p_u32 (__ARM_mve_coerce(__p0, uint32_t), __ARM_mve_coerce(__p1, uint32x4_t), p2));})
+  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int8x16_t]: __arm_vminvq_p_s8 (__p0, __ARM_mve_coerce(__p1, int8x16_t), p2), \
+  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int16x8_t]: __arm_vminvq_p_s16 (__p0, __ARM_mve_coerce(__p1, int16x8_t), p2), \
+  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_int32x4_t]: __arm_vminvq_p_s32 (__p0, __ARM_mve_coerce(__p1, int32x4_t), p2), \
+  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint8x16_t]: __arm_vminvq_p_u8 (__p0, __ARM_mve_coerce(__p1, uint8x16_t), p2), \
+  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint16x8_t]: __arm_vminvq_p_u16 (__p0, __ARM_mve_coerce(__p1, uint16x8_t), p2), \
+  int (*)[__ARM_mve_type_int_n][__ARM_mve_type_uint32x4_t]: __arm_vminvq_p_u32 (__p0, __ARM_mve_coerce(__p1, uint32x4_t), p2));})
 
 #define __arm_vmladavaq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
   __typeof(p1) __p1 = (p1); \
diff --git a/gcc/config/arm/arm_mve_builtins.def b/gcc/config/arm/arm_mve_builtins.def
index 753e40a..f38926f 100644
--- a/gcc/config/arm/arm_mve_builtins.def
+++ b/gcc/config/arm/arm_mve_builtins.def
@@ -312,9 +312,6 @@ VAR3 (TERNOP_NONE_NONE_UNONE_IMM, vshlcq_vec_s, v16qi, v8hi, v4si)
 VAR4 (TERNOP_UNONE_UNONE_UNONE_UNONE, vpselq_u, v16qi, v8hi, v4si, v2di)
 VAR4 (TERNOP_NONE_NONE_NONE_UNONE, vpselq_s, v16qi, v8hi, v4si, v2di)
 VAR3 (TERNOP_UNONE_UNONE_UNONE_UNONE, vrev64q_m_u, v16qi, v8hi, v4si)
-VAR3 (TERNOP_UNONE_UNONE_UNONE_UNONE, vqrdmlashq_n_u, v16qi, v8hi, v4si)
-VAR3 (TERNOP_UNONE_UNONE_UNONE_UNONE, vqrdmlahq_n_u, v16qi, v8hi, v4si)
-VAR3 (TERNOP_UNONE_UNONE_UNONE_UNONE, vqdmlahq_n_u, v16qi, v8hi, v4si)
 VAR3 (TERNOP_UNONE_UNONE_UNONE_UNONE, vmvnq_m_u, v16qi, v8hi, v4si)
 VAR3 (TERNOP_UNONE_UNONE_UNONE_UNONE, vmlasq_n_u, v16qi, v8hi, v4si)
 VAR3 (TERNOP_UNONE_UNONE_UNONE_UNONE, vmlaq_n_u, v16qi, v8hi, v4si)
@@ -384,6 +381,7 @@ VAR3 (TERNOP_NONE_NONE_NONE_NONE, vqrdmladhq_s, v16qi, v8hi, v4si)
 VAR3 (TERNOP_NONE_NONE_NONE_NONE, vqdmlsdhxq_s, v16qi, v8hi, v4si)
 VAR3 (TERNOP_NONE_NONE_NONE_NONE, vqdmlsdhq_s, v16qi, v8hi, v4si)
 VAR3 (TERNOP_NONE_NONE_NONE_NONE, vqdmlahq_n_s, v16qi, v8hi, v4si)
+VAR3 (TERNOP_NONE_NONE_NONE_NONE, vqdmlashq_n_s, v16qi, v8hi, v4si)
 VAR3 (TERNOP_NONE_NONE_NONE_NONE, vqdmladhxq_s, v16qi, v8hi, v4si)
 VAR3 (TERNOP_NONE_NONE_NONE_NONE, vqdmladhq_s, v16qi, v8hi, v4si)
 VAR3 (TERNOP_NONE_NONE_NONE_NONE, vmlsdavaxq_s, v16qi, v8hi, v4si)
@@ -574,6 +572,7 @@ VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vqdmulhq_m_n_s, v16qi, v8hi, v4si)
 VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vqdmlsdhxq_m_s, v16qi, v8hi, v4si)
 VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vqdmlsdhq_m_s, v16qi, v8hi, v4si)
 VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vqdmlahq_m_n_s, v16qi, v8hi, v4si)
+VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vqdmlashq_m_n_s, v16qi, v8hi, v4si)
 VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vqdmladhxq_m_s, v16qi, v8hi, v4si)
 VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vqdmladhq_m_s, v16qi, v8hi, v4si)
 VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vqaddq_m_s, v16qi, v8hi, v4si)
@@ -615,7 +614,6 @@ VAR3 (QUADOP_NONE_NONE_NONE_IMM_UNONE, vrshrq_m_n_s, v16qi, v8hi, v4si)
 VAR3 (QUADOP_NONE_NONE_NONE_IMM_UNONE, vqshlq_m_n_s, v16qi, v8hi, v4si)
 VAR2 (QUADOP_UNONE_UNONE_UNONE_UNONE_UNONE, vmulltq_poly_m_p, v16qi, v8hi)
 VAR2 (QUADOP_UNONE_UNONE_UNONE_UNONE_UNONE, vmullbq_poly_m_p, v16qi, v8hi)
-VAR2 (QUADOP_UNONE_UNONE_UNONE_UNONE_UNONE, vmlaldavaxq_p_u, v8hi, v4si)
 VAR2 (QUADOP_UNONE_UNONE_UNONE_UNONE_UNONE, vmlaldavaq_p_u, v8hi, v4si)
 VAR2 (QUADOP_UNONE_UNONE_UNONE_IMM_UNONE, vshrntq_m_n_u, v8hi, v4si)
 VAR2 (QUADOP_UNONE_UNONE_UNONE_IMM_UNONE, vshrnbq_m_n_u, v8hi, v4si)
@@ -828,19 +826,9 @@ VAR3 (QUADOP_UNONE_UNONE_UNONE_IMM_UNONE, vidupq_m_n_u, v16qi, v8hi, v4si)
 VAR3 (TERNOP_UNONE_UNONE_UNONE_IMM, vdwdupq_n_u, v16qi, v4si, v8hi)
 VAR3 (TERNOP_UNONE_UNONE_UNONE_IMM, viwdupq_n_u, v16qi, v4si, v8hi)
 VAR1 (STRSBWBU, vstrwq_scatter_base_wb_u, v4si)
-VAR1 (STRSBWBU, vstrwq_scatter_base_wb_add_u, v4si)
-VAR1 (STRSBWBU, vstrwq_scatter_base_wb_add_s, v4si)
-VAR1 (STRSBWBU, vstrwq_scatter_base_wb_add_f, v4sf)
 VAR1 (STRSBWBU, vstrdq_scatter_base_wb_u, v2di)
-VAR1 (STRSBWBU, vstrdq_scatter_base_wb_add_u, v2di)
-VAR1 (STRSBWBU, vstrdq_scatter_base_wb_add_s, v2di)
 VAR1 (STRSBWBU_P, vstrwq_scatter_base_wb_p_u, v4si)
-VAR1 (STRSBWBU_P, vstrwq_scatter_base_wb_p_add_u, v4si)
-VAR1 (STRSBWBU_P, vstrwq_scatter_base_wb_p_add_s, v4si)
-VAR1 (STRSBWBU_P, vstrwq_scatter_base_wb_p_add_f, v4sf)
 VAR1 (STRSBWBU_P, vstrdq_scatter_base_wb_p_u, v2di)
-VAR1 (STRSBWBU_P, vstrdq_scatter_base_wb_p_add_u, v2di)
-VAR1 (STRSBWBU_P, vstrdq_scatter_base_wb_p_add_s, v2di)
 VAR1 (STRSBWBS, vstrwq_scatter_base_wb_s, v4si)
 VAR1 (STRSBWBS, vstrwq_scatter_base_wb_f, v4sf)
 VAR1 (STRSBWBS, vstrdq_scatter_base_wb_s, v2di)
diff --git a/gcc/config/arm/constraints.md b/gcc/config/arm/constraints.md
index ff229aa..789e333 100644
--- a/gcc/config/arm/constraints.md
+++ b/gcc/config/arm/constraints.md
@@ -454,10 +454,13 @@
 
 (define_memory_constraint "Uj"
  "@internal
-  In ARM/Thumb-2 state an VFP load/store address which does not support
-  writeback at all (eg vldr.16)."
+  In ARM/Thumb-2 state a VFP load/store address that supports writeback
+  for Neon but not for MVE"
  (and (match_code "mem")
-      (match_test "TARGET_32BIT && arm_coproc_mem_operand_no_writeback (op)")))
+      (match_test "TARGET_32BIT")
+      (match_test "TARGET_HAVE_MVE
+                   ? arm_coproc_mem_operand_no_writeback (op)
+                   : neon_vector_mem_operand (op, 2, true)")))
 
 (define_memory_constraint "Uy"
  "@internal
diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index 0bc9eba..f934872 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -66,14 +66,6 @@
 ;; Integer and float modes supported by Neon and IWMMXT.
 (define_mode_iterator VALL [V2DI V2SI V4HI V8QI V2SF V4SI V8HI V16QI V4SF])
 
-;; Integer and float modes supported by Neon, IWMMXT and MVE, used by
-;; arithmetic epxand patterns.
-(define_mode_iterator VNIM [V16QI V8HI V4SI V4SF])
-
-;; Integer and float modes supported by Neon and IWMMXT but not MVE, used by
-;; arithmetic epxand patterns.
-(define_mode_iterator VNINOTM [V2SI V4HI V8QI V2SF V2DI])
-
 ;; Integer and float modes supported by Neon, IWMMXT and MVE.
 (define_mode_iterator VNIM1 [V16QI V8HI V4SI V4SF V2DI])
 
@@ -267,6 +259,16 @@
 (define_mode_iterator VBFCVT [V4BF V8BF])
 (define_mode_iterator VBFCVTM [V2SI SF])
 
+;; MVE mode iterator.
+(define_mode_iterator MVE_types [V16QI V8HI V4SI V2DI TI V8HF V4SF V2DF])
+(define_mode_iterator MVE_VLD_ST [V16QI V8HI V4SI V8HF V4SF])
+(define_mode_iterator MVE_0 [V8HF V4SF])
+(define_mode_iterator MVE_1 [V16QI V8HI V4SI V2DI])
+(define_mode_iterator MVE_3 [V16QI V8HI])
+(define_mode_iterator MVE_2 [V16QI V8HI V4SI])
+(define_mode_iterator MVE_5 [V8HI V4SI])
+(define_mode_iterator MVE_6 [V8HI V4SI])
+
 ;;----------------------------------------------------------------------------
 ;; Code iterators
 ;;----------------------------------------------------------------------------
@@ -901,6 +903,35 @@
 (define_mode_attr cde_suffix [(SI "") (DI "d")])
 (define_mode_attr cde_dest [(SI "%0") (DI "%0, %H0")])
 
+;;MVE mode attribute.
+(define_mode_attr MVE_CNVT [(V8HI "V8HF") (V4SI "V4SF") (V8HF "V8HI")
+			    (V4SF "V4SI")])
+(define_mode_attr MVE_LANES [(V16QI "16") (V8HI "8") (V4SI "4")])
+
+(define_mode_attr MVE_constraint [ (V16QI "Ra") (V8HI "Rc") (V4SI "Re")])
+(define_mode_attr MVE_constraint1 [ (V8HI "Ra") (V4SI "Rc")])
+(define_mode_attr MVE_constraint2 [(V16QI "Rb") (V8HI "Rd") (V4SI "Rf")
+				    (V8HF "Rd") (V4SF "Rf")])
+(define_mode_attr MVE_constraint3 [ (V8HI "Rb") (V4SI "Rd")])
+
+(define_mode_attr MVE_pred [ (V16QI "mve_imm_7") (V8HI "mve_imm_15")
+				   (V4SI "mve_imm_31")])
+(define_mode_attr MVE_pred1 [ (V8HI "mve_imm_7") (V4SI "mve_imm_15")])
+(define_mode_attr MVE_pred2 [(V16QI "mve_imm_8") (V8HI "mve_imm_16")
+			     (V4SI "mve_imm_32")
+			     (V8HF "mve_imm_16") (V4SF "mve_imm_32")])
+(define_mode_attr MVE_pred3 [ (V8HI "mve_imm_8") (V4SI "mve_imm_16")])
+
+(define_mode_attr MVE_B_ELEM [ (V16QI "V16QI") (V8HI "V8QI") (V4SI "V4QI")])
+(define_mode_attr MVE_H_ELEM [ (V8HI "V8HI") (V4SI "V4HI")])
+
+(define_mode_attr V_sz_elem1 [(V16QI "b") (V8HI  "h") (V4SI "w") (V8HF "h")
+			      (V4SF "w")])
+(define_mode_attr V_extr_elem [(V16QI "u8") (V8HI "u16") (V4SI "32")
+			       (V8HF "u16") (V4SF "32")])
+(define_mode_attr earlyclobber_32 [(V16QI "=w") (V8HI "=w") (V4SI "=&w")
+						(V8HF "=w") (V4SF "=&w")])
+
 ;;----------------------------------------------------------------------------
 ;; Code attributes
 ;;----------------------------------------------------------------------------
@@ -1181,6 +1212,188 @@
 
 (define_int_attr mmla_sfx [(UNSPEC_MATMUL_S "s8") (UNSPEC_MATMUL_U "u8")
 			   (UNSPEC_MATMUL_US "s8")])
+;;MVE int attribute.
+(define_int_attr supf [(VCVTQ_TO_F_S "s") (VCVTQ_TO_F_U "u") (VREV16Q_S "s")
+		       (VREV16Q_U "u") (VMVNQ_N_S "s") (VMVNQ_N_U "u")
+		       (VCVTAQ_U "u") (VCVTAQ_S "s") (VREV64Q_S "s")
+		       (VREV64Q_U "u") (VMVNQ_S "s") (VMVNQ_U "u")
+		       (VDUPQ_N_U "u") (VDUPQ_N_S"s") (VADDVQ_S "s")
+		       (VADDVQ_U "u") (VADDVQ_S "s") (VADDVQ_U "u")
+		       (VMOVLTQ_U "u") (VMOVLTQ_S "s") (VMOVLBQ_S "s")
+		       (VMOVLBQ_U "u") (VCVTQ_FROM_F_S "s") (VCVTQ_FROM_F_U "u")
+		       (VCVTPQ_S "s") (VCVTPQ_U "u") (VCVTNQ_S "s")
+		       (VCVTNQ_U "u") (VCVTMQ_S "s") (VCVTMQ_U "u")
+		       (VCLZQ_U "u") (VCLZQ_S "s") (VREV32Q_U "u")
+		       (VREV32Q_S "s") (VADDLVQ_U "u") (VADDLVQ_S "s")
+		       (VCVTQ_N_TO_F_S "s") (VCVTQ_N_TO_F_U "u")
+		       (VCREATEQ_U "u") (VCREATEQ_S "s") (VSHRQ_N_S "s")
+		       (VSHRQ_N_U "u") (VCVTQ_N_FROM_F_S "s") (VSHLQ_U "u")
+		       (VCVTQ_N_FROM_F_U "u") (VADDLVQ_P_S "s") (VSHLQ_S "s")
+		       (VADDLVQ_P_U "u") (VCMPNEQ_U "u") (VCMPNEQ_S "s")
+		       (VABDQ_M_S "s") (VABDQ_M_U "u") (VABDQ_S "s")
+		       (VABDQ_U "u") (VADDQ_N_S "s") (VADDQ_N_U "u")
+		       (VADDVQ_P_S "s")	(VADDVQ_P_U "u") (VANDQ_S "s")
+		       (VANDQ_U "u") (VBICQ_S "s") (VBICQ_U "u")
+		       (VBRSRQ_N_S "s") (VBRSRQ_N_U "u") (VCADDQ_ROT270_S "s")
+		       (VCADDQ_ROT270_U "u") (VCADDQ_ROT90_S "s")
+		       (VCMPEQQ_S "s") (VCMPEQQ_U "u") (VCADDQ_ROT90_U "u")
+		       (VCMPEQQ_N_S "s") (VCMPEQQ_N_U "u") (VCMPNEQ_N_S "s")
+		       (VCMPNEQ_N_U "u") (VEORQ_S "s") (VEORQ_U "u")
+		       (VHADDQ_N_S "s") (VHADDQ_N_U "u") (VHADDQ_S "s")
+		       (VHADDQ_U "u") (VHSUBQ_N_S "s")	(VHSUBQ_N_U "u")
+		       (VHSUBQ_S "s") (VMAXQ_S "s") (VMAXQ_U "u") (VHSUBQ_U "u")
+		       (VMAXVQ_S "s") (VMAXVQ_U "u") (VMINQ_S "s") (VMINQ_U "u")
+		       (VMINVQ_S "s") (VMINVQ_U "u") (VMLADAVQ_S "s")
+		       (VMLADAVQ_U "u") (VMULHQ_S "s") (VMULHQ_U "u")
+		       (VMULLBQ_INT_S "s") (VMULLBQ_INT_U "u") (VQADDQ_S "s")
+		       (VMULLTQ_INT_S "s") (VMULLTQ_INT_U "u") (VQADDQ_U "u")
+		       (VMULQ_N_S "s") (VMULQ_N_U "u") (VMULQ_S "s")
+		       (VMULQ_U "u") (VORNQ_S "s") (VORNQ_U "u") (VORRQ_S "s")
+		       (VORRQ_U "u") (VQADDQ_N_S "s") (VQADDQ_N_U "u")
+		       (VQRSHLQ_N_S "s") (VQRSHLQ_N_U "u") (VQRSHLQ_S "s")
+		       (VQRSHLQ_U "u") (VQSHLQ_N_S "s")	(VQSHLQ_N_U "u")
+		       (VQSHLQ_R_S "s") (VQSHLQ_R_U "u") (VQSHLQ_S "s")
+		       (VQSHLQ_U "u") (VQSUBQ_N_S "s") (VQSUBQ_N_U "u")
+		       (VQSUBQ_S "s") (VQSUBQ_U "u") (VRHADDQ_S "s")
+		       (VRHADDQ_U "u") (VRMULHQ_S "s") (VRMULHQ_U "u")
+		       (VRSHLQ_N_S "s") (VRSHLQ_N_U "u") (VRSHLQ_S "s")
+		       (VRSHLQ_U "u") (VRSHRQ_N_S "s") (VRSHRQ_N_U "u")
+		       (VSHLQ_N_S "s") (VSHLQ_N_U "u") (VSHLQ_R_S "s")
+		       (VSHLQ_R_U "u") (VSUBQ_N_S "s") (VSUBQ_N_U "u")
+		       (VSUBQ_S "s") (VSUBQ_U "u") (VADDVAQ_S "s")
+		       (VADDVAQ_U "u") (VADDLVAQ_S "s") (VADDLVAQ_U "u")
+		       (VBICQ_N_S "s") (VBICQ_N_U "u") (VMLALDAVQ_U "u")
+		       (VMLALDAVQ_S "s") (VMLALDAVXQ_U "u") (VMLALDAVXQ_S "s")
+		       (VMOVNBQ_U "u") (VMOVNBQ_S "s") (VMOVNTQ_U "u")
+		       (VMOVNTQ_S "s") (VORRQ_N_S "s") (VORRQ_N_U "u")
+		       (VQMOVNBQ_U "u") (VQMOVNBQ_S "s") (VQMOVNTQ_S "s")
+		       (VQMOVNTQ_U "u") (VSHLLBQ_N_U "u") (VSHLLBQ_N_S "s")
+		       (VSHLLTQ_N_U "u") (VSHLLTQ_N_S "s") (VRMLALDAVHQ_U "u")
+		       (VRMLALDAVHQ_S "s") (VBICQ_M_N_S "s") (VBICQ_M_N_U "u")
+		       (VCVTAQ_M_S "s") (VCVTAQ_M_U "u") (VCVTQ_M_TO_F_S "s")
+		       (VCVTQ_M_TO_F_U "u") (VQRSHRNBQ_N_S "s")
+		       (VQRSHRNBQ_N_U "u") (VABAVQ_S "s") (VABAVQ_U "u")
+		       (VRMLALDAVHAQ_U "u") (VRMLALDAVHAQ_S "s") (VSHLCQ_S "s")
+		       (VSHLCQ_U "u") (VADDVAQ_P_S "s") (VADDVAQ_P_U "u")
+		       (VCLZQ_M_S "s") (VCLZQ_M_U "u") (VCMPEQQ_M_N_S "s")
+		       (VCMPEQQ_M_N_U "u") (VCMPEQQ_M_S "s") (VCMPEQQ_M_U "u")
+		       (VCMPNEQ_M_N_S "s") (VCMPNEQ_M_N_U "u") (VCMPNEQ_M_S "s")
+		       (VCMPNEQ_M_U "u") (VDUPQ_M_N_S "s") (VDUPQ_M_N_U "u")
+		       (VMAXVQ_P_S "s") (VMAXVQ_P_U "u") (VMINVQ_P_S "s")
+		       (VMINVQ_P_U "u") (VMLADAVAQ_S "s") (VMLADAVAQ_U "u")
+		       (VMLADAVQ_P_S "s") (VMLADAVQ_P_U "u") (VMLAQ_N_S "s")
+		       (VMLAQ_N_U "u") (VMLASQ_N_S "s") (VMLASQ_N_U "u")
+		       (VMVNQ_M_S "s") (VMVNQ_M_U "u") (VPSELQ_S "s")
+		       (VPSELQ_U "u") (VQDMLAHQ_N_S "s")
+		       (VQDMLASHQ_N_S "s")
+		       (VQRDMLAHQ_N_S "s")
+		       (VQRDMLASHQ_N_S "s")
+		       (VQRSHLQ_M_N_S "s") (VQRSHLQ_M_N_U "u")
+		       (VQSHLQ_M_R_S "s") (VQSHLQ_M_R_U "u") (VSRIQ_N_S "s")
+		       (VREV64Q_M_S "s") (VREV64Q_M_U "u") (VSRIQ_N_U "u")
+		       (VRSHLQ_M_N_S "s") (VRSHLQ_M_N_U "u") (VSHLQ_M_R_S "s")
+		       (VSHLQ_M_R_U "u") (VSLIQ_N_S "s") (VSLIQ_N_U "u")
+		       (VMLALDAVQ_P_S "s") (VQMOVNBQ_M_S "s") (VMOVLTQ_M_S "s")
+		       (VMOVNBQ_M_S "s") (VRSHRNTQ_N_S "s") (VORRQ_M_N_S "s")
+		       (VREV32Q_M_S "s") (VQRSHRNTQ_N_S "s") (VMOVNTQ_M_S "s")
+		       (VMOVLBQ_M_S "s") (VMLALDAVAQ_S "s") (VQSHRNBQ_N_S "s")
+		       (VSHRNBQ_N_S "s") (VRSHRNBQ_N_S "s") (VMLALDAVXQ_P_S "s")
+		       (VQMOVNTQ_M_S "s") (VMVNQ_M_N_S "s") (VQSHRNTQ_N_S "s")
+		       (VMLALDAVAXQ_S "s") (VSHRNTQ_N_S "s") (VMLALDAVQ_P_U "u")
+		       (VQMOVNBQ_M_U "u") (VMOVLTQ_M_U "u") (VMOVNBQ_M_U "u")
+		       (VRSHRNTQ_N_U "u") (VORRQ_M_N_U "u") (VREV32Q_M_U "u")
+		       (VREV16Q_M_S "s") (VREV16Q_M_U "u")
+		       (VQRSHRNTQ_N_U "u") (VMOVNTQ_M_U "u") (VMOVLBQ_M_U "u")
+		       (VMLALDAVAQ_U "u") (VQSHRNBQ_N_U "u") (VSHRNBQ_N_U "u")
+		       (VRSHRNBQ_N_U "u") (VMLALDAVXQ_P_U "u")
+		       (VMVNQ_M_N_U "u") (VQSHRNTQ_N_U "u") (VMLALDAVAXQ_U "u")
+		       (VQMOVNTQ_M_U "u") (VSHRNTQ_N_U "u") (VCVTMQ_M_S "s")
+		       (VCVTMQ_M_U "u") (VCVTNQ_M_S "s") (VCVTNQ_M_U "u")
+		       (VCVTPQ_M_S "s") (VCVTPQ_M_U "u") (VADDLVAQ_P_S "s")
+		       (VCVTQ_M_N_FROM_F_U "u") (VCVTQ_M_FROM_F_S "s")
+		       (VCVTQ_M_FROM_F_U "u") (VRMLALDAVHQ_P_U "u")
+		       (VRMLALDAVHQ_P_S "s") (VADDLVAQ_P_U "u")
+		       (VCVTQ_M_N_FROM_F_S "s") (VABAVQ_P_U "u")
+		       (VABAVQ_P_S "s") (VSHLQ_M_S "s") (VSHLQ_M_U "u")
+		       (VSRIQ_M_N_S "s") (VSRIQ_M_N_U "u") (VSUBQ_M_S "s")
+		       (VSUBQ_M_U "u") (VCVTQ_M_N_TO_F_S "s")
+		       (VCVTQ_M_N_TO_F_U "u") (VADDQ_M_N_U "u")
+		       (VSHLQ_M_N_S "s") (VMAXQ_M_U "u") (VHSUBQ_M_N_U "u")
+		       (VMULQ_M_N_S "s") (VQSHLQ_M_U "u") (VRHADDQ_M_S "s")
+		       (VEORQ_M_U "u") (VSHRQ_M_N_U "u") (VCADDQ_ROT90_M_U "u")
+		       (VMLADAVAQ_P_U "u") (VEORQ_M_S "s") (VBRSRQ_M_N_S "s")
+		       (VMULQ_M_U "u") (VQRDMLAHQ_M_N_S "s") (VHSUBQ_M_N_S "s")
+		       (VQRSHLQ_M_S "s") (VMULQ_M_N_U "u")
+		       (VMULQ_M_S "s") (VQSHLQ_M_N_U "u") (VSLIQ_M_N_U "u")
+		       (VMLADAVAQ_P_S "s") (VQRSHLQ_M_U "u")
+		       (VMULLBQ_INT_M_U "u") (VSHLQ_M_N_U "u") (VQSUBQ_M_U "u")
+		       (VQDMLASHQ_M_N_S "s")
+		       (VQRDMLASHQ_M_N_U "u") (VRSHRQ_M_N_S "s")
+		       (VORNQ_M_S "s") (VCADDQ_ROT270_M_S "s") (VRHADDQ_M_U "u")
+		       (VRSHRQ_M_N_U "u") (VMLASQ_M_N_U "u") (VHSUBQ_M_U "u")
+		       (VQSUBQ_M_N_S "s") (VMULLTQ_INT_M_S "s")
+		       (VORRQ_M_S "s") (VQDMLAHQ_M_N_U "u") (VRSHLQ_M_S "s")
+		       (VHADDQ_M_U "u") (VHADDQ_M_N_S "s") (VMULLTQ_INT_M_U "u")
+		       (VORRQ_M_U "u") (VHADDQ_M_S "s") (VHADDQ_M_N_U "u")
+		       (VQDMLAHQ_M_N_S "s") (VMAXQ_M_S "s") (VORNQ_M_U "u")
+		       (VCADDQ_ROT270_M_U "u") (VQADDQ_M_U "u")
+		       (VQRDMLASHQ_M_N_S "s") (VBICQ_M_U "u") (VMINQ_M_U "u")
+		       (VSUBQ_M_N_S "s") (VMULLBQ_INT_M_S "s") (VQSUBQ_M_S "s")
+		       (VCADDQ_ROT90_M_S "s") (VRMULHQ_M_S "s") (VANDQ_M_U "u")
+		       (VMULHQ_M_S "s") (VADDQ_M_S "s") (VQRDMLAHQ_M_N_U "u")
+		       (VMLASQ_M_N_S "s") (VHSUBQ_M_S "s") (VRMULHQ_M_U "u")
+		       (VQADDQ_M_N_S "s") (VSHRQ_M_N_S "s") (VANDQ_M_S "s")
+		       (VABDQ_M_U "u") (VQSHLQ_M_S "s") (VABDQ_M_S "s")
+		       (VSUBQ_M_N_U "u") (VMLAQ_M_N_S "s") (VBRSRQ_M_N_U "u")
+		       (VADDQ_M_U "u") (VRSHLQ_M_U "u") (VSLIQ_M_N_S "s")
+		       (VQADDQ_M_N_U "u") (VADDQ_M_N_S "s") (VQSUBQ_M_N_U "u")
+		       (VMLAQ_M_N_U "u") (VMINQ_M_S "s") (VMULHQ_M_U "u")
+		       (VQADDQ_M_S "s") (VBICQ_M_S "s") (VQSHLQ_M_N_S "s")
+		       (VQSHRNTQ_M_N_S "s") (VQSHRNTQ_M_N_U "u")
+		       (VSHRNTQ_M_N_U "u") (VSHRNTQ_M_N_S "s")
+		       (VSHRNBQ_M_N_S "s") (VSHRNBQ_M_N_U "u")
+		       (VSHLLTQ_M_N_S "s") (VSHLLTQ_M_N_U "u")
+		       (VSHLLBQ_M_N_S "s") (VSHLLBQ_M_N_U "u")
+		       (VRSHRNTQ_M_N_S "s") (VRSHRNTQ_M_N_U "u")
+		       (VRSHRNBQ_M_N_U "u") (VRSHRNBQ_M_N_S "s")
+		       (VQSHRNTQ_M_N_U "u") (VQSHRNTQ_M_N_S "s")
+		       (VQSHRNBQ_M_N_S "s") (VQSHRNBQ_M_N_U "u")
+		       (VQRSHRNTQ_M_N_S "s") (VQRSHRNTQ_M_N_U "u")
+		       (VQRSHRNBQ_M_N_S "s") (VQRSHRNBQ_M_N_U "u")
+		       (VMLALDAVAXQ_P_S "s")
+		       (VMLALDAVAQ_P_S "s") (VMLALDAVAQ_P_U "u")
+		       (VSTRWQSB_S "s") (VSTRWQSB_U "u") (VSTRBQSO_S "s")
+		       (VSTRBQSO_U "u") (VSTRBQ_S "s") (VSTRBQ_U "u")
+		       (VLDRBQGO_S "s") (VLDRBQGO_U "u") (VLDRBQ_S "s")
+		       (VLDRBQ_U "u") (VLDRWQGB_S "s") (VLDRWQGB_U "u")
+		       (VLD1Q_S "s") (VLD1Q_U "u") (VLDRHQGO_S "s")
+		       (VLDRHQGO_U "u") (VLDRHQGSO_S "s") (VLDRHQGSO_U "u")
+		       (VLDRHQ_S "s") (VLDRHQ_U "u") (VLDRWQ_S "s")
+		       (VLDRWQ_U "u") (VLDRDQGB_S "s") (VLDRDQGB_U "u")
+		       (VLDRDQGO_S "s") (VLDRDQGO_U "u") (VLDRDQGSO_S "s")
+		       (VLDRDQGSO_U "u") (VLDRWQGO_S "s") (VLDRWQGO_U "u")
+		       (VLDRWQGSO_S "s") (VLDRWQGSO_U "u") (VST1Q_S "s")
+		       (VST1Q_U "u") (VSTRHQSO_S "s") (VSTRHQSO_U "u")
+		       (VSTRHQSSO_S "s") (VSTRHQSSO_U "u") (VSTRHQ_S "s")
+		       (VSTRHQ_U "u") (VSTRWQ_S "s") (VSTRWQ_U "u")
+		       (VSTRDQSB_S "s") (VSTRDQSB_U "u") (VSTRDQSO_S "s")
+		       (VSTRDQSO_U "u") (VSTRDQSSO_S "s") (VSTRDQSSO_U "u")
+		       (VSTRWQSO_U "u") (VSTRWQSO_S "s") (VSTRWQSSO_U "u")
+		       (VSTRWQSSO_S "s") (VSTRWQSBWB_S "s") (VSTRWQSBWB_U "u")
+		       (VLDRWQGBWB_S "s") (VLDRWQGBWB_U "u") (VLDRDQGBWB_S "s")
+		       (VLDRDQGBWB_U "u") (VSTRDQSBWB_S "s") (VADCQ_M_S "s")
+		       (VSTRDQSBWB_U "u") (VSBCQ_U "u")  (VSBCQ_M_U "u")
+		       (VSBCQ_S "s")  (VSBCQ_M_S "s") (VSBCIQ_U "u")
+		       (VSBCIQ_M_U "u") (VSBCIQ_S "s") (VSBCIQ_M_S "s")
+		       (VADCQ_U "u")  (VADCQ_M_U "u") (VADCQ_S "s")
+		       (VADCIQ_U "u") (VADCIQ_M_U "u") (VADCIQ_S "s")
+		       (VADCIQ_M_S "s") (SQRSHRL_64 "64") (SQRSHRL_48 "48")
+		       (UQRSHLL_64 "64") (UQRSHLL_48 "48") (VSHLCQ_M_S "s")
+		       (VSHLCQ_M_U "u")])
+
+(define_int_attr mode1 [(VCTP8Q "8") (VCTP16Q "16") (VCTP32Q "32")
+			(VCTP64Q "64") (VCTP8Q_M "8") (VCTP16Q_M "16")
+			(VCTP32Q_M "32") (VCTP64Q_M "64")])
 
 ;; Both kinds of return insn.
 (define_code_iterator RETURNS [return simple_return])
@@ -1256,3 +1469,249 @@
 
 ;; An iterator for CDE MVE accumulator/non-accumulator versions.
 (define_int_attr a [(UNSPEC_VCDE "") (UNSPEC_VCDEA "a")])
+
+;; MVE int iterator.
+(define_int_iterator VCVTQ_TO_F [VCVTQ_TO_F_S VCVTQ_TO_F_U])
+(define_int_iterator VMVNQ_N [VMVNQ_N_U VMVNQ_N_S])
+(define_int_iterator VREV64Q [VREV64Q_S VREV64Q_U])
+(define_int_iterator VCVTQ_FROM_F [VCVTQ_FROM_F_S VCVTQ_FROM_F_U])
+(define_int_iterator VREV16Q [VREV16Q_U VREV16Q_S])
+(define_int_iterator VCVTAQ [VCVTAQ_U VCVTAQ_S])
+(define_int_iterator VMVNQ [VMVNQ_U VMVNQ_S])
+(define_int_iterator VDUPQ_N [VDUPQ_N_U VDUPQ_N_S])
+(define_int_iterator VCLZQ [VCLZQ_U VCLZQ_S])
+(define_int_iterator VADDVQ [VADDVQ_U VADDVQ_S])
+(define_int_iterator VREV32Q [VREV32Q_U VREV32Q_S])
+(define_int_iterator VMOVLBQ [VMOVLBQ_S VMOVLBQ_U])
+(define_int_iterator VMOVLTQ [VMOVLTQ_U VMOVLTQ_S])
+(define_int_iterator VCVTPQ [VCVTPQ_S VCVTPQ_U])
+(define_int_iterator VCVTNQ [VCVTNQ_S VCVTNQ_U])
+(define_int_iterator VCVTMQ [VCVTMQ_S VCVTMQ_U])
+(define_int_iterator VADDLVQ [VADDLVQ_U VADDLVQ_S])
+(define_int_iterator VCTPQ [VCTP8Q VCTP16Q VCTP32Q VCTP64Q])
+(define_int_iterator VCTPQ_M [VCTP8Q_M VCTP16Q_M VCTP32Q_M VCTP64Q_M])
+(define_int_iterator VCVTQ_N_TO_F [VCVTQ_N_TO_F_S VCVTQ_N_TO_F_U])
+(define_int_iterator VCREATEQ [VCREATEQ_U VCREATEQ_S])
+(define_int_iterator VSHRQ_N [VSHRQ_N_S VSHRQ_N_U])
+(define_int_iterator VCVTQ_N_FROM_F [VCVTQ_N_FROM_F_S VCVTQ_N_FROM_F_U])
+(define_int_iterator VADDLVQ_P [VADDLVQ_P_S VADDLVQ_P_U])
+(define_int_iterator VCMPNEQ [VCMPNEQ_U VCMPNEQ_S])
+(define_int_iterator VSHLQ [VSHLQ_S VSHLQ_U])
+(define_int_iterator VABDQ [VABDQ_S VABDQ_U])
+(define_int_iterator VADDQ_N [VADDQ_N_S VADDQ_N_U])
+(define_int_iterator VADDVAQ [VADDVAQ_S VADDVAQ_U])
+(define_int_iterator VADDVQ_P [VADDVQ_P_U VADDVQ_P_S])
+(define_int_iterator VANDQ [VANDQ_U VANDQ_S])
+(define_int_iterator VBICQ [VBICQ_S VBICQ_U])
+(define_int_iterator VBRSRQ_N [VBRSRQ_N_U VBRSRQ_N_S])
+(define_int_iterator VCADDQ_ROT270 [VCADDQ_ROT270_S VCADDQ_ROT270_U])
+(define_int_iterator VCADDQ_ROT90 [VCADDQ_ROT90_U VCADDQ_ROT90_S])
+(define_int_iterator VCMPEQQ [VCMPEQQ_U VCMPEQQ_S])
+(define_int_iterator VCMPEQQ_N [VCMPEQQ_N_S VCMPEQQ_N_U])
+(define_int_iterator VCMPNEQ_N [VCMPNEQ_N_U VCMPNEQ_N_S])
+(define_int_iterator VEORQ [VEORQ_U VEORQ_S])
+(define_int_iterator VHADDQ [VHADDQ_S VHADDQ_U])
+(define_int_iterator VHADDQ_N [VHADDQ_N_U VHADDQ_N_S])
+(define_int_iterator VHSUBQ [VHSUBQ_S VHSUBQ_U])
+(define_int_iterator VHSUBQ_N [VHSUBQ_N_U VHSUBQ_N_S])
+(define_int_iterator VMAXQ [VMAXQ_U VMAXQ_S])
+(define_int_iterator VMAXVQ [VMAXVQ_U VMAXVQ_S])
+(define_int_iterator VMINQ [VMINQ_S VMINQ_U])
+(define_int_iterator VMINVQ [VMINVQ_U VMINVQ_S])
+(define_int_iterator VMLADAVQ [VMLADAVQ_U VMLADAVQ_S])
+(define_int_iterator VMULHQ [VMULHQ_S VMULHQ_U])
+(define_int_iterator VMULLBQ_INT [VMULLBQ_INT_U VMULLBQ_INT_S])
+(define_int_iterator VMULLTQ_INT [VMULLTQ_INT_U VMULLTQ_INT_S])
+(define_int_iterator VMULQ [VMULQ_U VMULQ_S])
+(define_int_iterator VMULQ_N [VMULQ_N_U VMULQ_N_S])
+(define_int_iterator VORNQ [VORNQ_U VORNQ_S])
+(define_int_iterator VORRQ [VORRQ_S VORRQ_U])
+(define_int_iterator VQADDQ [VQADDQ_U VQADDQ_S])
+(define_int_iterator VQADDQ_N [VQADDQ_N_S VQADDQ_N_U])
+(define_int_iterator VQRSHLQ [VQRSHLQ_S VQRSHLQ_U])
+(define_int_iterator VQRSHLQ_N [VQRSHLQ_N_S VQRSHLQ_N_U])
+(define_int_iterator VQSHLQ [VQSHLQ_S VQSHLQ_U])
+(define_int_iterator VQSHLQ_N [VQSHLQ_N_S VQSHLQ_N_U])
+(define_int_iterator VQSHLQ_R [VQSHLQ_R_U VQSHLQ_R_S])
+(define_int_iterator VQSUBQ [VQSUBQ_U VQSUBQ_S])
+(define_int_iterator VQSUBQ_N [VQSUBQ_N_S VQSUBQ_N_U])
+(define_int_iterator VRHADDQ [VRHADDQ_S VRHADDQ_U])
+(define_int_iterator VRMULHQ [VRMULHQ_S VRMULHQ_U])
+(define_int_iterator VRSHLQ [VRSHLQ_S VRSHLQ_U])
+(define_int_iterator VRSHLQ_N [VRSHLQ_N_U VRSHLQ_N_S])
+(define_int_iterator VRSHRQ_N [VRSHRQ_N_S VRSHRQ_N_U])
+(define_int_iterator VSHLQ_N [VSHLQ_N_U VSHLQ_N_S])
+(define_int_iterator VSHLQ_R [VSHLQ_R_S VSHLQ_R_U])
+(define_int_iterator VSUBQ [VSUBQ_S VSUBQ_U])
+(define_int_iterator VSUBQ_N [VSUBQ_N_S VSUBQ_N_U])
+(define_int_iterator VADDLVAQ [VADDLVAQ_S VADDLVAQ_U])
+(define_int_iterator VBICQ_N [VBICQ_N_S VBICQ_N_U])
+(define_int_iterator VMLALDAVQ [VMLALDAVQ_U VMLALDAVQ_S])
+(define_int_iterator VMLALDAVXQ [VMLALDAVXQ_U VMLALDAVXQ_S])
+(define_int_iterator VMOVNBQ [VMOVNBQ_U VMOVNBQ_S])
+(define_int_iterator VMOVNTQ [VMOVNTQ_S VMOVNTQ_U])
+(define_int_iterator VORRQ_N [VORRQ_N_U VORRQ_N_S])
+(define_int_iterator VQMOVNBQ [VQMOVNBQ_U VQMOVNBQ_S])
+(define_int_iterator VQMOVNTQ [VQMOVNTQ_U VQMOVNTQ_S])
+(define_int_iterator VSHLLBQ_N [VSHLLBQ_N_S VSHLLBQ_N_U])
+(define_int_iterator VSHLLTQ_N [VSHLLTQ_N_U VSHLLTQ_N_S])
+(define_int_iterator VRMLALDAVHQ [VRMLALDAVHQ_U VRMLALDAVHQ_S])
+(define_int_iterator VBICQ_M_N [VBICQ_M_N_S VBICQ_M_N_U])
+(define_int_iterator VCVTAQ_M [VCVTAQ_M_S VCVTAQ_M_U])
+(define_int_iterator VCVTQ_M_TO_F [VCVTQ_M_TO_F_S VCVTQ_M_TO_F_U])
+(define_int_iterator VQRSHRNBQ_N [VQRSHRNBQ_N_U VQRSHRNBQ_N_S])
+(define_int_iterator VABAVQ [VABAVQ_S VABAVQ_U])
+(define_int_iterator VSHLCQ [VSHLCQ_S VSHLCQ_U])
+(define_int_iterator VRMLALDAVHAQ [VRMLALDAVHAQ_S VRMLALDAVHAQ_U])
+(define_int_iterator VADDVAQ_P [VADDVAQ_P_S VADDVAQ_P_U])
+(define_int_iterator VCLZQ_M [VCLZQ_M_S VCLZQ_M_U])
+(define_int_iterator VCMPEQQ_M_N [VCMPEQQ_M_N_S VCMPEQQ_M_N_U])
+(define_int_iterator VCMPEQQ_M [VCMPEQQ_M_S VCMPEQQ_M_U])
+(define_int_iterator VCMPNEQ_M_N [VCMPNEQ_M_N_S VCMPNEQ_M_N_U])
+(define_int_iterator VCMPNEQ_M [VCMPNEQ_M_S VCMPNEQ_M_U])
+(define_int_iterator VDUPQ_M_N [VDUPQ_M_N_S VDUPQ_M_N_U])
+(define_int_iterator VMAXVQ_P [VMAXVQ_P_S VMAXVQ_P_U])
+(define_int_iterator VMINVQ_P [VMINVQ_P_S VMINVQ_P_U])
+(define_int_iterator VMLADAVAQ [VMLADAVAQ_S VMLADAVAQ_U])
+(define_int_iterator VMLADAVQ_P [VMLADAVQ_P_S VMLADAVQ_P_U])
+(define_int_iterator VMLAQ_N [VMLAQ_N_S VMLAQ_N_U])
+(define_int_iterator VMLASQ_N [VMLASQ_N_S VMLASQ_N_U])
+(define_int_iterator VMVNQ_M [VMVNQ_M_S VMVNQ_M_U])
+(define_int_iterator VPSELQ [VPSELQ_S VPSELQ_U])
+(define_int_iterator VQDMLAHQ_N [VQDMLAHQ_N_S])
+(define_int_iterator VQDMLASHQ_N [VQDMLASHQ_N_S])
+(define_int_iterator VQRDMLAHQ_N [VQRDMLAHQ_N_S])
+(define_int_iterator VQRDMLASHQ_N [VQRDMLASHQ_N_S])
+(define_int_iterator VQRSHLQ_M_N [VQRSHLQ_M_N_S VQRSHLQ_M_N_U])
+(define_int_iterator VQSHLQ_M_R [VQSHLQ_M_R_S VQSHLQ_M_R_U])
+(define_int_iterator VREV64Q_M [VREV64Q_M_S VREV64Q_M_U])
+(define_int_iterator VRSHLQ_M_N [VRSHLQ_M_N_S VRSHLQ_M_N_U])
+(define_int_iterator VSHLQ_M_R [VSHLQ_M_R_S VSHLQ_M_R_U])
+(define_int_iterator VSLIQ_N [VSLIQ_N_S VSLIQ_N_U])
+(define_int_iterator VSRIQ_N [VSRIQ_N_S VSRIQ_N_U])
+(define_int_iterator VMLALDAVQ_P [VMLALDAVQ_P_U VMLALDAVQ_P_S])
+(define_int_iterator VQMOVNBQ_M [VQMOVNBQ_M_S VQMOVNBQ_M_U])
+(define_int_iterator VMOVLTQ_M [VMOVLTQ_M_U VMOVLTQ_M_S])
+(define_int_iterator VMOVNBQ_M [VMOVNBQ_M_U VMOVNBQ_M_S])
+(define_int_iterator VRSHRNTQ_N [VRSHRNTQ_N_U VRSHRNTQ_N_S])
+(define_int_iterator VORRQ_M_N [VORRQ_M_N_S VORRQ_M_N_U])
+(define_int_iterator VREV32Q_M [VREV32Q_M_S VREV32Q_M_U])
+(define_int_iterator VREV16Q_M [VREV16Q_M_S VREV16Q_M_U])
+(define_int_iterator VQRSHRNTQ_N [VQRSHRNTQ_N_U VQRSHRNTQ_N_S])
+(define_int_iterator VMOVNTQ_M [VMOVNTQ_M_U VMOVNTQ_M_S])
+(define_int_iterator VMOVLBQ_M [VMOVLBQ_M_U VMOVLBQ_M_S])
+(define_int_iterator VMLALDAVAQ [VMLALDAVAQ_S VMLALDAVAQ_U])
+(define_int_iterator VQSHRNBQ_N [VQSHRNBQ_N_U VQSHRNBQ_N_S])
+(define_int_iterator VSHRNBQ_N [VSHRNBQ_N_U VSHRNBQ_N_S])
+(define_int_iterator VRSHRNBQ_N [VRSHRNBQ_N_S VRSHRNBQ_N_U])
+(define_int_iterator VMLALDAVXQ_P [VMLALDAVXQ_P_U VMLALDAVXQ_P_S])
+(define_int_iterator VQMOVNTQ_M [VQMOVNTQ_M_U VQMOVNTQ_M_S])
+(define_int_iterator VMVNQ_M_N [VMVNQ_M_N_U VMVNQ_M_N_S])
+(define_int_iterator VQSHRNTQ_N [VQSHRNTQ_N_U VQSHRNTQ_N_S])
+(define_int_iterator VMLALDAVAXQ [VMLALDAVAXQ_S VMLALDAVAXQ_U])
+(define_int_iterator VSHRNTQ_N [VSHRNTQ_N_S VSHRNTQ_N_U])
+(define_int_iterator VCVTMQ_M [VCVTMQ_M_S VCVTMQ_M_U])
+(define_int_iterator VCVTNQ_M [VCVTNQ_M_S VCVTNQ_M_U])
+(define_int_iterator VCVTPQ_M [VCVTPQ_M_S VCVTPQ_M_U])
+(define_int_iterator VCVTQ_M_N_FROM_F [VCVTQ_M_N_FROM_F_S VCVTQ_M_N_FROM_F_U])
+(define_int_iterator VCVTQ_M_FROM_F [VCVTQ_M_FROM_F_U VCVTQ_M_FROM_F_S])
+(define_int_iterator VRMLALDAVHQ_P [VRMLALDAVHQ_P_S VRMLALDAVHQ_P_U])
+(define_int_iterator VADDLVAQ_P [VADDLVAQ_P_U VADDLVAQ_P_S])
+(define_int_iterator VABAVQ_P [VABAVQ_P_S VABAVQ_P_U])
+(define_int_iterator VSHLQ_M [VSHLQ_M_S VSHLQ_M_U])
+(define_int_iterator VSRIQ_M_N [VSRIQ_M_N_S VSRIQ_M_N_U])
+(define_int_iterator VSUBQ_M [VSUBQ_M_U VSUBQ_M_S])
+(define_int_iterator VCVTQ_M_N_TO_F [VCVTQ_M_N_TO_F_U VCVTQ_M_N_TO_F_S])
+(define_int_iterator VHSUBQ_M [VHSUBQ_M_S VHSUBQ_M_U])
+(define_int_iterator VSLIQ_M_N [VSLIQ_M_N_U VSLIQ_M_N_S])
+(define_int_iterator VRSHLQ_M [VRSHLQ_M_S VRSHLQ_M_U])
+(define_int_iterator VMINQ_M [VMINQ_M_S VMINQ_M_U])
+(define_int_iterator VMULLBQ_INT_M [VMULLBQ_INT_M_U VMULLBQ_INT_M_S])
+(define_int_iterator VMULHQ_M [VMULHQ_M_S VMULHQ_M_U])
+(define_int_iterator VMULQ_M [VMULQ_M_S VMULQ_M_U])
+(define_int_iterator VHSUBQ_M_N [VHSUBQ_M_N_S VHSUBQ_M_N_U])
+(define_int_iterator VHADDQ_M_N [VHADDQ_M_N_S VHADDQ_M_N_U])
+(define_int_iterator VORRQ_M [VORRQ_M_S VORRQ_M_U])
+(define_int_iterator VRMULHQ_M [VRMULHQ_M_U VRMULHQ_M_S])
+(define_int_iterator VQADDQ_M [VQADDQ_M_U VQADDQ_M_S])
+(define_int_iterator VRSHRQ_M_N [VRSHRQ_M_N_S VRSHRQ_M_N_U])
+(define_int_iterator VQSUBQ_M_N [VQSUBQ_M_N_U VQSUBQ_M_N_S])
+(define_int_iterator VADDQ_M [VADDQ_M_U VADDQ_M_S])
+(define_int_iterator VORNQ_M [VORNQ_M_U VORNQ_M_S])
+(define_int_iterator VRHADDQ_M [VRHADDQ_M_U VRHADDQ_M_S])
+(define_int_iterator VQSHLQ_M [VQSHLQ_M_U VQSHLQ_M_S])
+(define_int_iterator VANDQ_M [VANDQ_M_U VANDQ_M_S])
+(define_int_iterator VBICQ_M [VBICQ_M_U VBICQ_M_S])
+(define_int_iterator VSHLQ_M_N [VSHLQ_M_N_S VSHLQ_M_N_U])
+(define_int_iterator VCADDQ_ROT270_M [VCADDQ_ROT270_M_U VCADDQ_ROT270_M_S])
+(define_int_iterator VQRSHLQ_M [VQRSHLQ_M_U VQRSHLQ_M_S])
+(define_int_iterator VQADDQ_M_N [VQADDQ_M_N_U VQADDQ_M_N_S])
+(define_int_iterator VADDQ_M_N [VADDQ_M_N_S VADDQ_M_N_U])
+(define_int_iterator VMAXQ_M [VMAXQ_M_S VMAXQ_M_U])
+(define_int_iterator VQSUBQ_M [VQSUBQ_M_U VQSUBQ_M_S])
+(define_int_iterator VMLASQ_M_N [VMLASQ_M_N_U VMLASQ_M_N_S])
+(define_int_iterator VMLADAVAQ_P [VMLADAVAQ_P_U VMLADAVAQ_P_S])
+(define_int_iterator VBRSRQ_M_N [VBRSRQ_M_N_U VBRSRQ_M_N_S])
+(define_int_iterator VMULQ_M_N [VMULQ_M_N_U VMULQ_M_N_S])
+(define_int_iterator VCADDQ_ROT90_M [VCADDQ_ROT90_M_U VCADDQ_ROT90_M_S])
+(define_int_iterator VMULLTQ_INT_M [VMULLTQ_INT_M_S VMULLTQ_INT_M_U])
+(define_int_iterator VEORQ_M [VEORQ_M_S VEORQ_M_U])
+(define_int_iterator VSHRQ_M_N [VSHRQ_M_N_S VSHRQ_M_N_U])
+(define_int_iterator VSUBQ_M_N [VSUBQ_M_N_S VSUBQ_M_N_U])
+(define_int_iterator VHADDQ_M [VHADDQ_M_S VHADDQ_M_U])
+(define_int_iterator VABDQ_M [VABDQ_M_S VABDQ_M_U])
+(define_int_iterator VMLAQ_M_N [VMLAQ_M_N_S VMLAQ_M_N_U])
+(define_int_iterator VQSHLQ_M_N [VQSHLQ_M_N_S VQSHLQ_M_N_U])
+(define_int_iterator VMLALDAVAQ_P [VMLALDAVAQ_P_U VMLALDAVAQ_P_S])
+(define_int_iterator VMLALDAVAXQ_P [VMLALDAVAXQ_P_S])
+(define_int_iterator VQRSHRNBQ_M_N [VQRSHRNBQ_M_N_U VQRSHRNBQ_M_N_S])
+(define_int_iterator VQRSHRNTQ_M_N [VQRSHRNTQ_M_N_S VQRSHRNTQ_M_N_U])
+(define_int_iterator VQSHRNBQ_M_N [VQSHRNBQ_M_N_U VQSHRNBQ_M_N_S])
+(define_int_iterator VQSHRNTQ_M_N [VQSHRNTQ_M_N_S VQSHRNTQ_M_N_U])
+(define_int_iterator VRSHRNBQ_M_N [VRSHRNBQ_M_N_U VRSHRNBQ_M_N_S])
+(define_int_iterator VRSHRNTQ_M_N [VRSHRNTQ_M_N_U VRSHRNTQ_M_N_S])
+(define_int_iterator VSHLLBQ_M_N [VSHLLBQ_M_N_U VSHLLBQ_M_N_S])
+(define_int_iterator VSHLLTQ_M_N [VSHLLTQ_M_N_U VSHLLTQ_M_N_S])
+(define_int_iterator VSHRNBQ_M_N [VSHRNBQ_M_N_S VSHRNBQ_M_N_U])
+(define_int_iterator VSHRNTQ_M_N [VSHRNTQ_M_N_S VSHRNTQ_M_N_U])
+(define_int_iterator VSTRWSBQ [VSTRWQSB_S VSTRWQSB_U])
+(define_int_iterator VSTRBSOQ [VSTRBQSO_S VSTRBQSO_U])
+(define_int_iterator VSTRBQ [VSTRBQ_S VSTRBQ_U])
+(define_int_iterator VLDRBGOQ [VLDRBQGO_S VLDRBQGO_U])
+(define_int_iterator VLDRBQ [VLDRBQ_S VLDRBQ_U])
+(define_int_iterator VLDRWGBQ [VLDRWQGB_S VLDRWQGB_U])
+(define_int_iterator VLD1Q [VLD1Q_S VLD1Q_U])
+(define_int_iterator VLDRHGOQ [VLDRHQGO_S VLDRHQGO_U])
+(define_int_iterator VLDRHGSOQ [VLDRHQGSO_S VLDRHQGSO_U])
+(define_int_iterator VLDRHQ [VLDRHQ_S VLDRHQ_U])
+(define_int_iterator VLDRWQ [VLDRWQ_S VLDRWQ_U])
+(define_int_iterator VLDRDGBQ [VLDRDQGB_S VLDRDQGB_U])
+(define_int_iterator VLDRDGOQ [VLDRDQGO_S VLDRDQGO_U])
+(define_int_iterator VLDRDGSOQ [VLDRDQGSO_S VLDRDQGSO_U])
+(define_int_iterator VLDRWGOQ [VLDRWQGO_S VLDRWQGO_U])
+(define_int_iterator VLDRWGSOQ [VLDRWQGSO_S VLDRWQGSO_U])
+(define_int_iterator VST1Q [VST1Q_S VST1Q_U])
+(define_int_iterator VSTRHSOQ [VSTRHQSO_S VSTRHQSO_U])
+(define_int_iterator VSTRHSSOQ [VSTRHQSSO_S VSTRHQSSO_U])
+(define_int_iterator VSTRHQ [VSTRHQ_S VSTRHQ_U])
+(define_int_iterator VSTRWQ [VSTRWQ_S VSTRWQ_U])
+(define_int_iterator VSTRDSBQ [VSTRDQSB_S VSTRDQSB_U])
+(define_int_iterator VSTRDSOQ [VSTRDQSO_S VSTRDQSO_U])
+(define_int_iterator VSTRDSSOQ [VSTRDQSSO_S VSTRDQSSO_U])
+(define_int_iterator VSTRWSOQ [VSTRWQSO_S VSTRWQSO_U])
+(define_int_iterator VSTRWSSOQ [VSTRWQSSO_S VSTRWQSSO_U])
+(define_int_iterator VSTRWSBWBQ [VSTRWQSBWB_S VSTRWQSBWB_U])
+(define_int_iterator VLDRWGBWBQ [VLDRWQGBWB_S VLDRWQGBWB_U])
+(define_int_iterator VSTRDSBWBQ [VSTRDQSBWB_S VSTRDQSBWB_U])
+(define_int_iterator VLDRDGBWBQ [VLDRDQGBWB_S VLDRDQGBWB_U])
+(define_int_iterator VADCIQ [VADCIQ_U VADCIQ_S])
+(define_int_iterator VADCIQ_M [VADCIQ_M_U VADCIQ_M_S])
+(define_int_iterator VSBCQ [VSBCQ_U VSBCQ_S])
+(define_int_iterator VSBCQ_M [VSBCQ_M_U VSBCQ_M_S])
+(define_int_iterator VSBCIQ [VSBCIQ_U VSBCIQ_S])
+(define_int_iterator VSBCIQ_M [VSBCIQ_M_U VSBCIQ_M_S])
+(define_int_iterator VADCQ [VADCQ_U VADCQ_S])
+(define_int_iterator VADCQ_M [VADCQ_M_U VADCQ_M_S])
+(define_int_iterator UQRSHLLQ [UQRSHLL_64 UQRSHLL_48])
+(define_int_iterator SQRSHRLQ [SQRSHRL_64 SQRSHRL_48])
+(define_int_iterator VSHLCQ_M [VSHLCQ_M_S VSHLCQ_M_U])
diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index 465b39a..ecbaaa9 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -17,654 +17,6 @@
 ;; along with GCC; see the file COPYING3.  If not see
 ;; <http://www.gnu.org/licenses/>.
 
-(define_mode_iterator MVE_types [V16QI V8HI V4SI V2DI TI V8HF V4SF V2DF])
-(define_mode_iterator MVE_VLD_ST [V16QI V8HI V4SI V8HF V4SF])
-(define_mode_iterator MVE_0 [V8HF V4SF])
-(define_mode_iterator MVE_1 [V16QI V8HI V4SI V2DI])
-(define_mode_iterator MVE_3 [V16QI V8HI])
-(define_mode_iterator MVE_2 [V16QI V8HI V4SI])
-(define_mode_iterator MVE_5 [V8HI V4SI])
-(define_mode_iterator MVE_6 [V8HI V4SI])
-
-(define_c_enum "unspec" [VST4Q VRNDXQ_F VRNDQ_F VRNDPQ_F VRNDNQ_F VRNDMQ_F
-			 VRNDAQ_F VREV64Q_F VNEGQ_F VDUPQ_N_F VABSQ_F VREV32Q_F
-			 VCVTTQ_F32_F16 VCVTBQ_F32_F16 VCVTQ_TO_F_S VQNEGQ_S
-			 VCVTQ_TO_F_U VREV16Q_S VREV16Q_U VADDLVQ_S VMVNQ_N_S
-			 VMVNQ_N_U VCVTAQ_S VCVTAQ_U VREV64Q_S VREV64Q_U
-			 VQABSQ_S VNEGQ_S VMVNQ_S VMVNQ_U VDUPQ_N_U VDUPQ_N_S
-			 VCLZQ_U VCLZQ_S VCLSQ_S VADDVQ_S VADDVQ_U VABSQ_S
-			 VREV32Q_U VREV32Q_S VMOVLTQ_U VMOVLTQ_S VMOVLBQ_S
-			 VMOVLBQ_U VCVTQ_FROM_F_S VCVTQ_FROM_F_U VCVTPQ_S
-			 VCVTPQ_U VCVTNQ_S VCVTNQ_U VCVTMQ_S VCVTMQ_U
-			 VADDLVQ_U VCTP8Q VCTP16Q VCTP32Q VCTP64Q VPNOT
-			 VCREATEQ_F VCVTQ_N_TO_F_S VCVTQ_N_TO_F_U VBRSRQ_N_F
-			 VSUBQ_N_F VCREATEQ_U VCREATEQ_S VSHRQ_N_S VSHRQ_N_U
-			 VCVTQ_N_FROM_F_S VCVTQ_N_FROM_F_U VADDLVQ_P_S
-			 VADDLVQ_P_U VCMPNEQ_U VCMPNEQ_S VSHLQ_S VSHLQ_U VABDQ_S
-			 VADDQ_N_S VADDVAQ_S VADDVQ_P_S VANDQ_S VBICQ_S
-			 VBRSRQ_N_S VCADDQ_ROT270_S VCADDQ_ROT90_S VCMPEQQ_S
-			 VCMPEQQ_N_S VCMPNEQ_N_S VEORQ_S VHADDQ_S VHADDQ_N_S
-			 VHSUBQ_S VHSUBQ_N_S VMAXQ_S VMAXVQ_S VMINQ_S VMINVQ_S
-			 VMLADAVQ_S VMULHQ_S VMULLBQ_INT_S VMULLTQ_INT_S VMULQ_S
-			 VMULQ_N_S VORNQ_S VORRQ_S VQADDQ_S VQADDQ_N_S VQRSHLQ_S
-			 VQRSHLQ_N_S VQSHLQ_S VQSHLQ_N_S VQSHLQ_R_S VQSUBQ_S
-			 VQSUBQ_N_S VRHADDQ_S VRMULHQ_S VRSHLQ_S VRSHLQ_N_S
-			 VRSHRQ_N_S VSHLQ_N_S VSHLQ_R_S VSUBQ_S VSUBQ_N_S
-			 VABDQ_U VADDQ_N_U VADDVAQ_U VADDVQ_P_U VANDQ_U VBICQ_U
-			 VBRSRQ_N_U VCADDQ_ROT270_U VCADDQ_ROT90_U VCMPEQQ_U
-			 VCMPEQQ_N_U VCMPNEQ_N_U VEORQ_U VHADDQ_U VHADDQ_N_U
-			 VHSUBQ_U VHSUBQ_N_U VMAXQ_U VMAXVQ_U VMINQ_U VMINVQ_U
-			 VMLADAVQ_U VMULHQ_U VMULLBQ_INT_U VMULLTQ_INT_U VMULQ_U
-			 VMULQ_N_U VORNQ_U VORRQ_U VQADDQ_U VQADDQ_N_U VQRSHLQ_U
-			 VQRSHLQ_N_U VQSHLQ_U VQSHLQ_N_U VQSHLQ_R_U VQSUBQ_U
-			 VQSUBQ_N_U VRHADDQ_U VRMULHQ_U VRSHLQ_U VRSHLQ_N_U
-			 VRSHRQ_N_U VSHLQ_N_U VSHLQ_R_U VSUBQ_U VSUBQ_N_U
-			 VCMPGEQ_N_S VCMPGEQ_S VCMPGTQ_N_S VCMPGTQ_S VCMPLEQ_N_S
-			 VCMPLEQ_S VCMPLTQ_N_S VCMPLTQ_S VHCADDQ_ROT270_S
-			 VHCADDQ_ROT90_S VMAXAQ_S VMAXAVQ_S VMINAQ_S VMINAVQ_S
-			 VMLADAVXQ_S VMLSDAVQ_S VMLSDAVXQ_S VQDMULHQ_N_S
-			 VQDMULHQ_S VQRDMULHQ_N_S VQRDMULHQ_S VQSHLUQ_N_S
-			 VCMPCSQ_N_U VCMPCSQ_U VCMPHIQ_N_U VCMPHIQ_U VABDQ_M_S
-			 VABDQ_M_U VABDQ_F VADDQ_N_F VANDQ_F VBICQ_F
-			 VCADDQ_ROT270_F VCADDQ_ROT90_F VCMPEQQ_F VCMPEQQ_N_F
-			 VCMPGEQ_F VCMPGEQ_N_F VCMPGTQ_F VCMPGTQ_N_F VCMPLEQ_F
-			 VCMPLEQ_N_F VCMPLTQ_F VCMPLTQ_N_F VCMPNEQ_F VCMPNEQ_N_F
-			 VCMULQ_F VCMULQ_ROT180_F VCMULQ_ROT270_F VCMULQ_ROT90_F
-			 VEORQ_F VMAXNMAQ_F VMAXNMAVQ_F VMAXNMQ_F VMAXNMVQ_F
-			 VMINNMAQ_F VMINNMAVQ_F VMINNMQ_F VMINNMVQ_F VMULQ_F
-			 VMULQ_N_F VORNQ_F VORRQ_F VSUBQ_F VADDLVAQ_U
-			 VADDLVAQ_S VBICQ_N_U VBICQ_N_S VCTP8Q_M VCTP16Q_M
-			 VCTP32Q_M VCTP64Q_M VCVTBQ_F16_F32 VCVTTQ_F16_F32
-			 VMLALDAVQ_U VMLALDAVXQ_U VMLALDAVXQ_S VMLALDAVQ_S
-			 VMLSLDAVQ_S VMLSLDAVXQ_S VMOVNBQ_U VMOVNBQ_S
-			 VMOVNTQ_U VMOVNTQ_S VORRQ_N_S VORRQ_N_U VQDMULLBQ_N_S
-			 VQDMULLBQ_S VQDMULLTQ_N_S VQDMULLTQ_S VQMOVNBQ_U
-			 VQMOVNBQ_S VQMOVUNBQ_S VQMOVUNTQ_S VRMLALDAVHXQ_S
-			 VRMLSLDAVHQ_S VRMLSLDAVHXQ_S VSHLLBQ_S
-			 VSHLLBQ_U VSHLLTQ_U VSHLLTQ_S VQMOVNTQ_U VQMOVNTQ_S
-			 VSHLLBQ_N_S VSHLLBQ_N_U VSHLLTQ_N_U VSHLLTQ_N_S
-			 VRMLALDAVHQ_U VRMLALDAVHQ_S VMULLTQ_POLY_P
-			 VMULLBQ_POLY_P VBICQ_M_N_S VBICQ_M_N_U VCMPEQQ_M_F
-			 VCVTAQ_M_S VCVTAQ_M_U VCVTQ_M_TO_F_S VCVTQ_M_TO_F_U
-			 VQRSHRNBQ_N_U VQRSHRNBQ_N_S VQRSHRUNBQ_N_S
-			 VRMLALDAVHAQ_S VABAVQ_S VABAVQ_U VSHLCQ_S VSHLCQ_U
-			 VRMLALDAVHAQ_U VABSQ_M_S VADDVAQ_P_S VADDVAQ_P_U
-			 VCLSQ_M_S VCLZQ_M_S VCLZQ_M_U VCMPCSQ_M_N_U
-			 VCMPCSQ_M_U VCMPEQQ_M_N_S VCMPEQQ_M_N_U VCMPEQQ_M_S
-			 VCMPEQQ_M_U VCMPGEQ_M_N_S VCMPGEQ_M_S VCMPGTQ_M_N_S
-			 VCMPGTQ_M_S VCMPHIQ_M_N_U VCMPHIQ_M_U VCMPLEQ_M_N_S
-			 VCMPLEQ_M_S VCMPLTQ_M_N_S VCMPLTQ_M_S VCMPNEQ_M_N_S
-			 VCMPNEQ_M_N_U VCMPNEQ_M_S VCMPNEQ_M_U VDUPQ_M_N_S
-			 VDUPQ_M_N_U VDWDUPQ_N_U VDWDUPQ_WB_U VIWDUPQ_N_U
-			 VIWDUPQ_WB_U VMAXAQ_M_S VMAXAVQ_P_S VMAXVQ_P_S
-			 VMAXVQ_P_U VMINAQ_M_S VMINAVQ_P_S VMINVQ_P_S VMINVQ_P_U
-			 VMLADAVAQ_S VMLADAVAQ_U VMLADAVQ_P_S VMLADAVQ_P_U
-			 VMLADAVXQ_P_S VMLAQ_N_S VMLAQ_N_U VMLASQ_N_S VMLASQ_N_U
-			 VMLSDAVQ_P_S VMLSDAVXQ_P_S VMVNQ_M_S VMVNQ_M_U
-			 VNEGQ_M_S VPSELQ_S VPSELQ_U VQABSQ_M_S VQDMLAHQ_N_S
-			 VQDMLAHQ_N_U VQNEGQ_M_S VQRDMLADHQ_S VQRDMLADHXQ_S
-			 VQRDMLAHQ_N_S VQRDMLAHQ_N_U VQRDMLASHQ_N_S
-			 VQRDMLASHQ_N_U VQRDMLSDHQ_S VQRDMLSDHXQ_S VQRSHLQ_M_N_S
-			 VQRSHLQ_M_N_U VQSHLQ_M_R_S VQSHLQ_M_R_U VREV64Q_M_S
-			 VREV64Q_M_U VRSHLQ_M_N_S VRSHLQ_M_N_U VSHLQ_M_R_S
-			 VSHLQ_M_R_U VSLIQ_N_S VSLIQ_N_U VSRIQ_N_S VSRIQ_N_U
-			 VQDMLSDHXQ_S VQDMLSDHQ_S VQDMLADHXQ_S VQDMLADHQ_S
-			 VMLSDAVAXQ_S VMLSDAVAQ_S VMLADAVAXQ_S
-			 VCMPGEQ_M_F VCMPGTQ_M_N_F VMLSLDAVQ_P_S VRMLALDAVHAXQ_S
-			 VMLSLDAVXQ_P_S VFMAQ_F VMLSLDAVAQ_S VQSHRUNBQ_N_S
-			 VQRSHRUNTQ_N_S VCMLAQ_F VMINNMAQ_M_F VFMASQ_N_F
-			 VDUPQ_M_N_F VCMPGTQ_M_F VCMPLTQ_M_F VRMLSLDAVHQ_P_S
-			 VQSHRUNTQ_N_S VABSQ_M_F VMAXNMAVQ_P_F VFMAQ_N_F
-			 VRMLSLDAVHXQ_P_S VREV32Q_M_F VRMLSLDAVHAQ_S
-			 VRMLSLDAVHAXQ_S VCMPLTQ_M_N_F VCMPNEQ_M_F VRNDAQ_M_F
-			 VRNDPQ_M_F VADDLVAQ_P_S VQMOVUNBQ_M_S VCMPLEQ_M_F
-			 VCMLAQ_ROT180_F VMLSLDAVAXQ_S VRNDXQ_M_F VFMSQ_F
-			 VMINNMVQ_P_F VMAXNMVQ_P_F VPSELQ_F VCMLAQ_ROT90_F
-			 VQMOVUNTQ_M_S VREV64Q_M_F VNEGQ_M_F VRNDMQ_M_F
-			 VCMPLEQ_M_N_F VCMPGEQ_M_N_F VRNDNQ_M_F VMINNMAVQ_P_F
-			 VCMPNEQ_M_N_F VRMLALDAVHQ_P_S VRMLALDAVHXQ_P_S
-			 VCMPEQQ_M_N_F VCMLAQ_ROT270_F VMAXNMAQ_M_F VRNDQ_M_F
-			 VMLALDAVQ_P_U VMLALDAVQ_P_S VQMOVNBQ_M_S VQMOVNBQ_M_U
-			 VMOVLTQ_M_U VMOVLTQ_M_S VMOVNBQ_M_U VMOVNBQ_M_S
-			 VRSHRNTQ_N_U VRSHRNTQ_N_S VORRQ_M_N_S VORRQ_M_N_U
-			 VREV32Q_M_S VREV32Q_M_U VQRSHRNTQ_N_U VQRSHRNTQ_N_S
-			 VMOVNTQ_M_U VMOVNTQ_M_S VMOVLBQ_M_U VMOVLBQ_M_S
-			 VMLALDAVAQ_S VMLALDAVAQ_U VQSHRNBQ_N_U VQSHRNBQ_N_S
-			 VSHRNBQ_N_U VSHRNBQ_N_S VRSHRNBQ_N_S VRSHRNBQ_N_U
-			 VMLALDAVXQ_P_U VMLALDAVXQ_P_S VQMOVNTQ_M_U VQMOVNTQ_M_S
-			 VMVNQ_M_N_U VMVNQ_M_N_S VQSHRNTQ_N_U VQSHRNTQ_N_S
-			 VMLALDAVAXQ_S VMLALDAVAXQ_U VSHRNTQ_N_S VSHRNTQ_N_U
-			 VCVTBQ_M_F16_F32 VCVTBQ_M_F32_F16 VCVTTQ_M_F16_F32
-			 VCVTTQ_M_F32_F16 VCVTMQ_M_S VCVTMQ_M_U VCVTNQ_M_S
-			 VCVTPQ_M_S VCVTPQ_M_U VCVTQ_M_N_FROM_F_S VCVTNQ_M_U
-			 VREV16Q_M_S VREV16Q_M_U VREV32Q_M VCVTQ_M_FROM_F_U
-			 VCVTQ_M_FROM_F_S VRMLALDAVHQ_P_U VADDLVAQ_P_U
-			 VCVTQ_M_N_FROM_F_U VQSHLUQ_M_N_S VABAVQ_P_S
-			 VABAVQ_P_U VSHLQ_M_S VSHLQ_M_U VSRIQ_M_N_S
-			 VSRIQ_M_N_U VSUBQ_M_U VSUBQ_M_S VCVTQ_M_N_TO_F_U
-			 VCVTQ_M_N_TO_F_S VQADDQ_M_U VQADDQ_M_S
-			 VRSHRQ_M_N_S VSUBQ_M_N_S VSUBQ_M_N_U VBRSRQ_M_N_S
-			 VSUBQ_M_N_F VBICQ_M_F VHADDQ_M_U VBICQ_M_U VBICQ_M_S
-			 VMULQ_M_N_U VHADDQ_M_S VORNQ_M_F VMLAQ_M_N_S VQSUBQ_M_U
-			 VQSUBQ_M_S VMLAQ_M_N_U VQSUBQ_M_N_U VQSUBQ_M_N_S
-			 VMULLTQ_INT_M_S VMULLTQ_INT_M_U VMULQ_M_N_S VMULQ_M_N_F
-			 VMLASQ_M_N_U VMLASQ_M_N_S VMAXQ_M_U VQRDMLAHQ_M_N_U
-			 VCADDQ_ROT270_M_F VCADDQ_ROT270_M_U VCADDQ_ROT270_M_S
-			 VQRSHLQ_M_S VMULQ_M_F VRHADDQ_M_U VSHRQ_M_N_U
-			 VRHADDQ_M_S VMULQ_M_S VMULQ_M_U VQRDMLASHQ_M_N_S
-			 VRSHLQ_M_S VRSHLQ_M_U VRSHRQ_M_N_U VADDQ_M_N_F
-			 VADDQ_M_N_S VADDQ_M_N_U VQRDMLASHQ_M_N_U VMAXQ_M_S
-			 VQRDMLAHQ_M_N_S VORRQ_M_S VORRQ_M_U VORRQ_M_F
-			 VQRSHLQ_M_U VRMULHQ_M_U VRMULHQ_M_S VMINQ_M_S VMINQ_M_U
-			 VANDQ_M_F VANDQ_M_U VANDQ_M_S VHSUBQ_M_N_S VHSUBQ_M_N_U
-			 VMULHQ_M_S VMULHQ_M_U VMULLBQ_INT_M_U
-			 VMULLBQ_INT_M_S VCADDQ_ROT90_M_F
-			 VSHRQ_M_N_S VADDQ_M_U VSLIQ_M_N_U
-			 VQADDQ_M_N_S VBRSRQ_M_N_F VABDQ_M_F VBRSRQ_M_N_U
-			 VEORQ_M_F VSHLQ_M_N_S VQDMLAHQ_M_N_U VQDMLAHQ_M_N_S
-			 VSHLQ_M_N_U VMLADAVAQ_P_U VMLADAVAQ_P_S VSLIQ_M_N_S
-			 VQSHLQ_M_U VQSHLQ_M_S VCADDQ_ROT90_M_U VCADDQ_ROT90_M_S
-			 VORNQ_M_U VORNQ_M_S VQSHLQ_M_N_S VQSHLQ_M_N_U VADDQ_M_S
-			 VHADDQ_M_N_S VADDQ_M_F VQADDQ_M_N_U VEORQ_M_S VEORQ_M_U
-			 VHSUBQ_M_S VHSUBQ_M_U VHADDQ_M_N_U VHCADDQ_ROT90_M_S
-			 VQRDMLSDHQ_M_S VQRDMLSDHXQ_M_S VQRDMLADHXQ_M_S
-			 VQDMULHQ_M_S VMLADAVAXQ_P_S VQDMLADHXQ_M_S
-			 VQRDMULHQ_M_S VMLSDAVAXQ_P_S VQDMULHQ_M_N_S
-			 VHCADDQ_ROT270_M_S VQDMLSDHQ_M_S VQDMLSDHXQ_M_S
-			 VMLSDAVAQ_P_S VQRDMLADHQ_M_S VQDMLADHQ_M_S
-			 VMLALDAVAQ_P_U VMLALDAVAQ_P_S VMLALDAVAXQ_P_U
-			 VQRSHRNBQ_M_N_U VQRSHRNBQ_M_N_S VQRSHRNTQ_M_N_S
-			 VQSHRNBQ_M_N_U VQSHRNBQ_M_N_S VQSHRNTQ_M_N_S
-			 VRSHRNBQ_M_N_U VRSHRNBQ_M_N_S VRSHRNTQ_M_N_U
-			 VSHLLBQ_M_N_U VSHLLBQ_M_N_S VSHLLTQ_M_N_U VSHLLTQ_M_N_S
-			 VSHRNBQ_M_N_S VSHRNBQ_M_N_U VSHRNTQ_M_N_S VSHRNTQ_M_N_U
-			 VMLALDAVAXQ_P_S VQRSHRNTQ_M_N_U VQSHRNTQ_M_N_U
-			 VRSHRNTQ_M_N_S VQRDMULHQ_M_N_S VRMLALDAVHAQ_P_S
-			 VMLSLDAVAQ_P_S VMLSLDAVAXQ_P_S VMULLBQ_POLY_M_P
-			 VMULLTQ_POLY_M_P VQDMULLBQ_M_N_S VQDMULLBQ_M_S
-			 VQDMULLTQ_M_N_S VQDMULLTQ_M_S VQRSHRUNBQ_M_N_S
-			 VQRSHRUNTQ_M_N_SVQSHRUNBQ_M_N_S VQSHRUNTQ_M_N_S
-			 VRMLALDAVHAQ_P_U VRMLALDAVHAXQ_P_S VRMLSLDAVHAQ_P_S
-			 VRMLSLDAVHAXQ_P_S VQRSHRUNTQ_M_N_S VQSHRUNBQ_M_N_S
-			 VCMLAQ_M_F VCMLAQ_ROT180_M_F VCMLAQ_ROT270_M_F
-			 VCMLAQ_ROT90_M_F VCMULQ_M_F VCMULQ_ROT180_M_F
-			 VCMULQ_ROT270_M_F VCMULQ_ROT90_M_F VFMAQ_M_F
-			 VFMAQ_M_N_F VFMASQ_M_N_F VFMSQ_M_F VMAXNMQ_M_F
-			 VMINNMQ_M_F VSUBQ_M_F VSTRWQSB_S VSTRWQSB_U
-			 VSTRBQSO_S VSTRBQSO_U VSTRBQ_S VSTRBQ_U VLDRBQGO_S
-			 VLDRBQGO_U VLDRBQ_S VLDRBQ_U VLDRWQGB_S VLDRWQGB_U
-			 VLD1Q_F VLD1Q_S VLD1Q_U VLDRHQ_F VLDRHQGO_S
-			 VLDRHQGO_U VLDRHQGSO_S VLDRHQGSO_U VLDRHQ_S VLDRHQ_U
-			 VLDRWQ_F VLDRWQ_S VLDRWQ_U VLDRDQGB_S VLDRDQGB_U
-			 VLDRDQGO_S VLDRDQGO_U VLDRDQGSO_S VLDRDQGSO_U
-			 VLDRHQGO_F VLDRHQGSO_F VLDRWQGB_F VLDRWQGO_F
-			 VLDRWQGO_S VLDRWQGO_U VLDRWQGSO_F VLDRWQGSO_S
-			 VLDRWQGSO_U VSTRHQ_F VST1Q_S VST1Q_U VSTRHQSO_S
-			 VSTRHQSO_U VSTRHQSSO_S VSTRHQSSO_U VSTRHQ_S
-			 VSTRHQ_U VSTRWQ_S VSTRWQ_U VSTRWQ_F VST1Q_F VSTRDQSB_S
-			 VSTRDQSB_U VSTRDQSO_S VSTRDQSO_U VSTRDQSSO_S
-			 VSTRDQSSO_U VSTRWQSO_S VSTRWQSO_U VSTRWQSSO_S
-			 VSTRWQSSO_U VSTRHQSO_F VSTRHQSSO_F VSTRWQSB_F
-			 VSTRWQSO_F VSTRWQSSO_F VDDUPQ VDDUPQ_M VDWDUPQ
-			 VDWDUPQ_M VIDUPQ VIDUPQ_M VIWDUPQ VIWDUPQ_M
-			 VSTRWQSBWB_S VSTRWQSBWB_U VLDRWQGBWB_S VLDRWQGBWB_U
-			 VSTRWQSBWB_F VLDRWQGBWB_F VSTRDQSBWB_S VSTRDQSBWB_U
-			 VLDRDQGBWB_S VLDRDQGBWB_U VADCQ_U VADCQ_M_U VADCQ_S
-			 VADCQ_M_S VSBCIQ_U VSBCIQ_S VSBCIQ_M_U VSBCIQ_M_S
-			 VSBCQ_U VSBCQ_S VSBCQ_M_U VSBCQ_M_S VADCIQ_U VADCIQ_M_U
-			 VADCIQ_S VADCIQ_M_S VLD2Q VLD4Q VST2Q SRSHRL SRSHR
-			 URSHR URSHRL SQRSHR UQRSHL UQRSHLL_64 VSHLCQ_M_U
-			 UQRSHLL_48 SQRSHRL_64 SQRSHRL_48 VSHLCQ_M_S])
-
-(define_mode_attr MVE_CNVT [(V8HI "V8HF") (V4SI "V4SF") (V8HF "V8HI")
-			    (V4SF "V4SI")])
-
-(define_int_attr supf [(VCVTQ_TO_F_S "s") (VCVTQ_TO_F_U "u") (VREV16Q_S "s")
-		       (VREV16Q_U "u") (VMVNQ_N_S "s") (VMVNQ_N_U "u")
-		       (VCVTAQ_U "u") (VCVTAQ_S "s") (VREV64Q_S "s")
-		       (VREV64Q_U "u") (VMVNQ_S "s") (VMVNQ_U "u")
-		       (VDUPQ_N_U "u") (VDUPQ_N_S"s") (VADDVQ_S "s")
-		       (VADDVQ_U "u") (VADDVQ_S "s") (VADDVQ_U "u")
-		       (VMOVLTQ_U "u") (VMOVLTQ_S "s") (VMOVLBQ_S "s")
-		       (VMOVLBQ_U "u") (VCVTQ_FROM_F_S "s") (VCVTQ_FROM_F_U "u")
-		       (VCVTPQ_S "s") (VCVTPQ_U "u") (VCVTNQ_S "s")
-		       (VCVTNQ_U "u") (VCVTMQ_S "s") (VCVTMQ_U "u")
-		       (VCLZQ_U "u") (VCLZQ_S "s") (VREV32Q_U "u")
-		       (VREV32Q_S "s") (VADDLVQ_U "u") (VADDLVQ_S "s")
-		       (VCVTQ_N_TO_F_S "s") (VCVTQ_N_TO_F_U "u")
-		       (VCREATEQ_U "u") (VCREATEQ_S "s") (VSHRQ_N_S "s")
-		       (VSHRQ_N_U "u") (VCVTQ_N_FROM_F_S "s") (VSHLQ_U "u")
-		       (VCVTQ_N_FROM_F_U "u") (VADDLVQ_P_S "s") (VSHLQ_S "s")
-		       (VADDLVQ_P_U "u") (VCMPNEQ_U "u") (VCMPNEQ_S "s")
-		       (VABDQ_M_S "s") (VABDQ_M_U "u") (VABDQ_S "s")
-		       (VABDQ_U "u") (VADDQ_N_S "s") (VADDQ_N_U "u")
-		       (VADDVQ_P_S "s")	(VADDVQ_P_U "u") (VANDQ_S "s")
-		       (VANDQ_U "u") (VBICQ_S "s") (VBICQ_U "u")
-		       (VBRSRQ_N_S "s") (VBRSRQ_N_U "u") (VCADDQ_ROT270_S "s")
-		       (VCADDQ_ROT270_U "u") (VCADDQ_ROT90_S "s")
-		       (VCMPEQQ_S "s") (VCMPEQQ_U "u") (VCADDQ_ROT90_U "u")
-		       (VCMPEQQ_N_S "s") (VCMPEQQ_N_U "u") (VCMPNEQ_N_S "s")
-		       (VCMPNEQ_N_U "u") (VEORQ_S "s") (VEORQ_U "u")
-		       (VHADDQ_N_S "s") (VHADDQ_N_U "u") (VHADDQ_S "s")
-		       (VHADDQ_U "u") (VHSUBQ_N_S "s")	(VHSUBQ_N_U "u")
-		       (VHSUBQ_S "s") (VMAXQ_S "s") (VMAXQ_U "u") (VHSUBQ_U "u")
-		       (VMAXVQ_S "s") (VMAXVQ_U "u") (VMINQ_S "s") (VMINQ_U "u")
-		       (VMINVQ_S "s") (VMINVQ_U "u") (VMLADAVQ_S "s")
-		       (VMLADAVQ_U "u") (VMULHQ_S "s") (VMULHQ_U "u")
-		       (VMULLBQ_INT_S "s") (VMULLBQ_INT_U "u") (VQADDQ_S "s")
-		       (VMULLTQ_INT_S "s") (VMULLTQ_INT_U "u") (VQADDQ_U "u")
-		       (VMULQ_N_S "s") (VMULQ_N_U "u") (VMULQ_S "s")
-		       (VMULQ_U "u") (VORNQ_S "s") (VORNQ_U "u") (VORRQ_S "s")
-		       (VORRQ_U "u") (VQADDQ_N_S "s") (VQADDQ_N_U "u")
-		       (VQRSHLQ_N_S "s") (VQRSHLQ_N_U "u") (VQRSHLQ_S "s")
-		       (VQRSHLQ_U "u") (VQSHLQ_N_S "s")	(VQSHLQ_N_U "u")
-		       (VQSHLQ_R_S "s") (VQSHLQ_R_U "u") (VQSHLQ_S "s")
-		       (VQSHLQ_U "u") (VQSUBQ_N_S "s") (VQSUBQ_N_U "u")
-		       (VQSUBQ_S "s") (VQSUBQ_U "u") (VRHADDQ_S "s")
-		       (VRHADDQ_U "u") (VRMULHQ_S "s") (VRMULHQ_U "u")
-		       (VRSHLQ_N_S "s") (VRSHLQ_N_U "u") (VRSHLQ_S "s")
-		       (VRSHLQ_U "u") (VRSHRQ_N_S "s") (VRSHRQ_N_U "u")
-		       (VSHLQ_N_S "s") (VSHLQ_N_U "u") (VSHLQ_R_S "s")
-		       (VSHLQ_R_U "u") (VSUBQ_N_S "s") (VSUBQ_N_U "u")
-		       (VSUBQ_S "s") (VSUBQ_U "u") (VADDVAQ_S "s")
-		       (VADDVAQ_U "u") (VADDLVAQ_S "s") (VADDLVAQ_U "u")
-		       (VBICQ_N_S "s") (VBICQ_N_U "u") (VMLALDAVQ_U "u")
-		       (VMLALDAVQ_S "s") (VMLALDAVXQ_U "u") (VMLALDAVXQ_S "s")
-		       (VMOVNBQ_U "u") (VMOVNBQ_S "s") (VMOVNTQ_U "u")
-		       (VMOVNTQ_S "s") (VORRQ_N_S "s") (VORRQ_N_U "u")
-		       (VQMOVNBQ_U "u") (VQMOVNBQ_S "s") (VQMOVNTQ_S "s")
-		       (VQMOVNTQ_U "u") (VSHLLBQ_N_U "u") (VSHLLBQ_N_S "s")
-		       (VSHLLTQ_N_U "u") (VSHLLTQ_N_S "s") (VRMLALDAVHQ_U "u")
-		       (VRMLALDAVHQ_S "s") (VBICQ_M_N_S "s") (VBICQ_M_N_U "u")
-		       (VCVTAQ_M_S "s") (VCVTAQ_M_U "u") (VCVTQ_M_TO_F_S "s")
-		       (VCVTQ_M_TO_F_U "u") (VQRSHRNBQ_N_S "s")
-		       (VQRSHRNBQ_N_U "u") (VABAVQ_S "s") (VABAVQ_U "u")
-		       (VRMLALDAVHAQ_U "u") (VRMLALDAVHAQ_S "s") (VSHLCQ_S "s")
-		       (VSHLCQ_U "u") (VADDVAQ_P_S "s") (VADDVAQ_P_U "u")
-		       (VCLZQ_M_S "s") (VCLZQ_M_U "u") (VCMPEQQ_M_N_S "s")
-		       (VCMPEQQ_M_N_U "u") (VCMPEQQ_M_S "s") (VCMPEQQ_M_U "u")
-		       (VCMPNEQ_M_N_S "s") (VCMPNEQ_M_N_U "u") (VCMPNEQ_M_S "s")
-		       (VCMPNEQ_M_U "u") (VDUPQ_M_N_S "s") (VDUPQ_M_N_U "u")
-		       (VMAXVQ_P_S "s") (VMAXVQ_P_U "u") (VMINVQ_P_S "s")
-		       (VMINVQ_P_U "u") (VMLADAVAQ_S "s") (VMLADAVAQ_U "u")
-		       (VMLADAVQ_P_S "s") (VMLADAVQ_P_U "u") (VMLAQ_N_S "s")
-		       (VMLAQ_N_U "u") (VMLASQ_N_S "s") (VMLASQ_N_U "u")
-		       (VMVNQ_M_S "s") (VMVNQ_M_U "u") (VPSELQ_S "s")
-		       (VPSELQ_U "u") (VQDMLAHQ_N_S "s") (VQDMLAHQ_N_U "u")
-		       (VQRDMLAHQ_N_S "s") (VQRDMLAHQ_N_U "u")
-		       (VQRDMLASHQ_N_S "s") (VQRDMLASHQ_N_U "u")
-		       (VQRSHLQ_M_N_S "s") (VQRSHLQ_M_N_U "u")
-		       (VQSHLQ_M_R_S "s") (VQSHLQ_M_R_U "u") (VSRIQ_N_S "s")
-		       (VREV64Q_M_S "s") (VREV64Q_M_U "u") (VSRIQ_N_U "u")
-		       (VRSHLQ_M_N_S "s") (VRSHLQ_M_N_U "u") (VSHLQ_M_R_S "s")
-		       (VSHLQ_M_R_U "u") (VSLIQ_N_S "s") (VSLIQ_N_U "u")
-		       (VMLALDAVQ_P_S "s") (VQMOVNBQ_M_S "s") (VMOVLTQ_M_S "s")
-		       (VMOVNBQ_M_S "s") (VRSHRNTQ_N_S "s") (VORRQ_M_N_S "s")
-		       (VREV32Q_M_S "s") (VQRSHRNTQ_N_S "s") (VMOVNTQ_M_S "s")
-		       (VMOVLBQ_M_S "s") (VMLALDAVAQ_S "s") (VQSHRNBQ_N_S "s")
-		       (VSHRNBQ_N_S "s") (VRSHRNBQ_N_S "s") (VMLALDAVXQ_P_S "s")
-		       (VQMOVNTQ_M_S "s") (VMVNQ_M_N_S "s") (VQSHRNTQ_N_S "s")
-		       (VMLALDAVAXQ_S "s") (VSHRNTQ_N_S "s") (VMLALDAVQ_P_U "u")
-		       (VQMOVNBQ_M_U "u") (VMOVLTQ_M_U "u") (VMOVNBQ_M_U "u")
-		       (VRSHRNTQ_N_U "u") (VORRQ_M_N_U "u") (VREV32Q_M_U "u")
-		       (VREV16Q_M_S "s") (VREV16Q_M_U "u")
-		       (VQRSHRNTQ_N_U "u") (VMOVNTQ_M_U "u") (VMOVLBQ_M_U "u")
-		       (VMLALDAVAQ_U "u") (VQSHRNBQ_N_U "u") (VSHRNBQ_N_U "u")
-		       (VRSHRNBQ_N_U "u") (VMLALDAVXQ_P_U "u")
-		       (VMVNQ_M_N_U "u") (VQSHRNTQ_N_U "u") (VMLALDAVAXQ_U "u")
-		       (VQMOVNTQ_M_U "u") (VSHRNTQ_N_U "u") (VCVTMQ_M_S "s")
-		       (VCVTMQ_M_U "u") (VCVTNQ_M_S "s") (VCVTNQ_M_U "u")
-		       (VCVTPQ_M_S "s") (VCVTPQ_M_U "u") (VADDLVAQ_P_S "s")
-		       (VCVTQ_M_N_FROM_F_U "u") (VCVTQ_M_FROM_F_S "s")
-		       (VCVTQ_M_FROM_F_U "u") (VRMLALDAVHQ_P_U "u")
-		       (VRMLALDAVHQ_P_S "s") (VADDLVAQ_P_U "u")
-		       (VCVTQ_M_N_FROM_F_S "s") (VABAVQ_P_U "u")
-		       (VABAVQ_P_S "s") (VSHLQ_M_S "s") (VSHLQ_M_U "u")
-		       (VSRIQ_M_N_S "s") (VSRIQ_M_N_U "u") (VSUBQ_M_S "s")
-		       (VSUBQ_M_U "u") (VCVTQ_M_N_TO_F_S "s")
-		       (VCVTQ_M_N_TO_F_U "u") (VADDQ_M_N_U "u")
-		       (VSHLQ_M_N_S "s") (VMAXQ_M_U "u") (VHSUBQ_M_N_U "u")
-		       (VMULQ_M_N_S "s") (VQSHLQ_M_U "u") (VRHADDQ_M_S "s")
-		       (VEORQ_M_U "u") (VSHRQ_M_N_U "u") (VCADDQ_ROT90_M_U "u")
-		       (VMLADAVAQ_P_U "u") (VEORQ_M_S "s") (VBRSRQ_M_N_S "s")
-		       (VMULQ_M_U "u") (VQRDMLAHQ_M_N_S "s") (VHSUBQ_M_N_S "s")
-		       (VQRSHLQ_M_S "s") (VMULQ_M_N_U "u")
-		       (VMULQ_M_S "s") (VQSHLQ_M_N_U "u") (VSLIQ_M_N_U "u")
-		       (VMLADAVAQ_P_S "s") (VQRSHLQ_M_U "u")
-		       (VMULLBQ_INT_M_U "u") (VSHLQ_M_N_U "u") (VQSUBQ_M_U "u")
-		       (VQRDMLASHQ_M_N_U "u") (VRSHRQ_M_N_S "s")
-		       (VORNQ_M_S "s") (VCADDQ_ROT270_M_S "s") (VRHADDQ_M_U "u")
-		       (VRSHRQ_M_N_U "u") (VMLASQ_M_N_U "u") (VHSUBQ_M_U "u")
-		       (VQSUBQ_M_N_S "s") (VMULLTQ_INT_M_S "s")
-		       (VORRQ_M_S "s") (VQDMLAHQ_M_N_U "u") (VRSHLQ_M_S "s")
-		       (VHADDQ_M_U "u") (VHADDQ_M_N_S "s") (VMULLTQ_INT_M_U "u")
-		       (VORRQ_M_U "u") (VHADDQ_M_S "s") (VHADDQ_M_N_U "u")
-		       (VQDMLAHQ_M_N_S "s") (VMAXQ_M_S "s") (VORNQ_M_U "u")
-		       (VCADDQ_ROT270_M_U "u") (VQADDQ_M_U "u")
-		       (VQRDMLASHQ_M_N_S "s") (VBICQ_M_U "u") (VMINQ_M_U "u")
-		       (VSUBQ_M_N_S "s") (VMULLBQ_INT_M_S "s") (VQSUBQ_M_S "s")
-		       (VCADDQ_ROT90_M_S "s") (VRMULHQ_M_S "s") (VANDQ_M_U "u")
-		       (VMULHQ_M_S "s") (VADDQ_M_S "s") (VQRDMLAHQ_M_N_U "u")
-		       (VMLASQ_M_N_S "s") (VHSUBQ_M_S "s") (VRMULHQ_M_U "u")
-		       (VQADDQ_M_N_S "s") (VSHRQ_M_N_S "s") (VANDQ_M_S "s")
-		       (VABDQ_M_U "u") (VQSHLQ_M_S "s") (VABDQ_M_S "s")
-		       (VSUBQ_M_N_U "u") (VMLAQ_M_N_S "s") (VBRSRQ_M_N_U "u")
-		       (VADDQ_M_U "u") (VRSHLQ_M_U "u") (VSLIQ_M_N_S "s")
-		       (VQADDQ_M_N_U "u") (VADDQ_M_N_S "s") (VQSUBQ_M_N_U "u")
-		       (VMLAQ_M_N_U "u") (VMINQ_M_S "s") (VMULHQ_M_U "u")
-		       (VQADDQ_M_S "s") (VBICQ_M_S "s") (VQSHLQ_M_N_S "s")
-		       (VQSHRNTQ_M_N_S "s") (VQSHRNTQ_M_N_U "u")
-		       (VSHRNTQ_M_N_U "u") (VSHRNTQ_M_N_S "s")
-		       (VSHRNBQ_M_N_S "s") (VSHRNBQ_M_N_U "u")
-		       (VSHLLTQ_M_N_S "s") (VSHLLTQ_M_N_U "u")
-		       (VSHLLBQ_M_N_S "s") (VSHLLBQ_M_N_U "u")
-		       (VRSHRNTQ_M_N_S "s") (VRSHRNTQ_M_N_U "u")
-		       (VRSHRNBQ_M_N_U "u") (VRSHRNBQ_M_N_S "s")
-		       (VQSHRNTQ_M_N_U "u") (VQSHRNTQ_M_N_S "s")
-		       (VQSHRNBQ_M_N_S "s") (VQSHRNBQ_M_N_U "u")
-		       (VQRSHRNTQ_M_N_S "s") (VQRSHRNTQ_M_N_U "u")
-		       (VQRSHRNBQ_M_N_S "s") (VQRSHRNBQ_M_N_U "u")
-		       (VMLALDAVAXQ_P_S "s") (VMLALDAVAXQ_P_U "u")
-		       (VMLALDAVAQ_P_S "s") (VMLALDAVAQ_P_U "u")
-		       (VSTRWQSB_S "s") (VSTRWQSB_U "u") (VSTRBQSO_S "s")
-		       (VSTRBQSO_U "u") (VSTRBQ_S "s") (VSTRBQ_U "u")
-		       (VLDRBQGO_S "s") (VLDRBQGO_U "u") (VLDRBQ_S "s")
-		       (VLDRBQ_U "u") (VLDRWQGB_S "s") (VLDRWQGB_U "u")
-		       (VLD1Q_S "s") (VLD1Q_U "u") (VLDRHQGO_S "s")
-		       (VLDRHQGO_U "u") (VLDRHQGSO_S "s") (VLDRHQGSO_U "u")
-		       (VLDRHQ_S "s") (VLDRHQ_U "u") (VLDRWQ_S "s")
-		       (VLDRWQ_U "u") (VLDRDQGB_S "s") (VLDRDQGB_U "u")
-		       (VLDRDQGO_S "s") (VLDRDQGO_U "u") (VLDRDQGSO_S "s")
-		       (VLDRDQGSO_U "u") (VLDRWQGO_S "s") (VLDRWQGO_U "u")
-		       (VLDRWQGSO_S "s") (VLDRWQGSO_U "u") (VST1Q_S "s")
-		       (VST1Q_U "u") (VSTRHQSO_S "s") (VSTRHQSO_U "u")
-		       (VSTRHQSSO_S "s") (VSTRHQSSO_U "u") (VSTRHQ_S "s")
-		       (VSTRHQ_U "u") (VSTRWQ_S "s") (VSTRWQ_U "u")
-		       (VSTRDQSB_S "s") (VSTRDQSB_U "u") (VSTRDQSO_S "s")
-		       (VSTRDQSO_U "u") (VSTRDQSSO_S "s") (VSTRDQSSO_U "u")
-		       (VSTRWQSO_U "u") (VSTRWQSO_S "s") (VSTRWQSSO_U "u")
-		       (VSTRWQSSO_S "s") (VSTRWQSBWB_S "s") (VSTRWQSBWB_U "u")
-		       (VLDRWQGBWB_S "s") (VLDRWQGBWB_U "u") (VLDRDQGBWB_S "s")
-		       (VLDRDQGBWB_U "u") (VSTRDQSBWB_S "s") (VADCQ_M_S "s")
-		       (VSTRDQSBWB_U "u") (VSBCQ_U "u")  (VSBCQ_M_U "u")
-		       (VSBCQ_S "s")  (VSBCQ_M_S "s") (VSBCIQ_U "u")
-		       (VSBCIQ_M_U "u") (VSBCIQ_S "s") (VSBCIQ_M_S "s")
-		       (VADCQ_U "u")  (VADCQ_M_U "u") (VADCQ_S "s")
-		       (VADCIQ_U "u") (VADCIQ_M_U "u") (VADCIQ_S "s")
-		       (VADCIQ_M_S "s") (SQRSHRL_64 "64") (SQRSHRL_48 "48")
-		       (UQRSHLL_64 "64") (UQRSHLL_48 "48") (VSHLCQ_M_S "s")
-		       (VSHLCQ_M_U "u")])
-
-(define_int_attr mode1 [(VCTP8Q "8") (VCTP16Q "16") (VCTP32Q "32")
-			(VCTP64Q "64") (VCTP8Q_M "8") (VCTP16Q_M "16")
-			(VCTP32Q_M "32") (VCTP64Q_M "64")])
-(define_mode_attr MVE_pred2 [(V16QI "mve_imm_8") (V8HI "mve_imm_16")
-			     (V4SI "mve_imm_32")
-			     (V8HF "mve_imm_16") (V4SF "mve_imm_32")])
-(define_mode_attr MVE_constraint2 [(V16QI "Rb") (V8HI "Rd") (V4SI "Rf")
-				    (V8HF "Rd") (V4SF "Rf")])
-(define_mode_attr MVE_LANES [(V16QI "16") (V8HI "8") (V4SI "4")])
-(define_mode_attr MVE_constraint [ (V16QI "Ra") (V8HI "Rc") (V4SI "Re")])
-(define_mode_attr MVE_pred [ (V16QI "mve_imm_7") (V8HI "mve_imm_15")
-				   (V4SI "mve_imm_31")])
-(define_mode_attr MVE_constraint3 [ (V8HI "Rb") (V4SI "Rd")])
-(define_mode_attr MVE_pred3 [ (V8HI "mve_imm_8") (V4SI "mve_imm_16")])
-(define_mode_attr MVE_constraint1 [ (V8HI "Ra") (V4SI "Rc")])
-(define_mode_attr MVE_pred1 [ (V8HI "mve_imm_7") (V4SI "mve_imm_15")])
-(define_mode_attr MVE_B_ELEM [ (V16QI "V16QI") (V8HI "V8QI") (V4SI "V4QI")])
-(define_mode_attr MVE_H_ELEM [ (V8HI "V8HI") (V4SI "V4HI")])
-(define_mode_attr V_sz_elem1 [(V16QI "b") (V8HI  "h") (V4SI "w") (V8HF "h")
-			      (V4SF "w")])
-(define_mode_attr V_extr_elem [(V16QI "u8") (V8HI "u16") (V4SI "32")
-                              (V8HF "u16") (V4SF "32")])
-
-(define_mode_attr earlyclobber_32 [(V16QI "=w") (V8HI "=w") (V4SI "=&w")
-						(V8HF "=w") (V4SF "=&w")])
-
-(define_int_iterator VCVTQ_TO_F [VCVTQ_TO_F_S VCVTQ_TO_F_U])
-(define_int_iterator VMVNQ_N [VMVNQ_N_U VMVNQ_N_S])
-(define_int_iterator VREV64Q [VREV64Q_S VREV64Q_U])
-(define_int_iterator VCVTQ_FROM_F [VCVTQ_FROM_F_S VCVTQ_FROM_F_U])
-(define_int_iterator VREV16Q [VREV16Q_U VREV16Q_S])
-(define_int_iterator VCVTAQ [VCVTAQ_U VCVTAQ_S])
-(define_int_iterator VMVNQ [VMVNQ_U VMVNQ_S])
-(define_int_iterator VDUPQ_N [VDUPQ_N_U VDUPQ_N_S])
-(define_int_iterator VCLZQ [VCLZQ_U VCLZQ_S])
-(define_int_iterator VADDVQ [VADDVQ_U VADDVQ_S])
-(define_int_iterator VREV32Q [VREV32Q_U VREV32Q_S])
-(define_int_iterator VMOVLBQ [VMOVLBQ_S VMOVLBQ_U])
-(define_int_iterator VMOVLTQ [VMOVLTQ_U VMOVLTQ_S])
-(define_int_iterator VCVTPQ [VCVTPQ_S VCVTPQ_U])
-(define_int_iterator VCVTNQ [VCVTNQ_S VCVTNQ_U])
-(define_int_iterator VCVTMQ [VCVTMQ_S VCVTMQ_U])
-(define_int_iterator VADDLVQ [VADDLVQ_U VADDLVQ_S])
-(define_int_iterator VCTPQ [VCTP8Q VCTP16Q VCTP32Q VCTP64Q])
-(define_int_iterator VCTPQ_M [VCTP8Q_M VCTP16Q_M VCTP32Q_M VCTP64Q_M])
-(define_int_iterator VCVTQ_N_TO_F [VCVTQ_N_TO_F_S VCVTQ_N_TO_F_U])
-(define_int_iterator VCREATEQ [VCREATEQ_U VCREATEQ_S])
-(define_int_iterator VSHRQ_N [VSHRQ_N_S VSHRQ_N_U])
-(define_int_iterator VCVTQ_N_FROM_F [VCVTQ_N_FROM_F_S VCVTQ_N_FROM_F_U])
-(define_int_iterator VADDLVQ_P [VADDLVQ_P_S VADDLVQ_P_U])
-(define_int_iterator VCMPNEQ [VCMPNEQ_U VCMPNEQ_S])
-(define_int_iterator VSHLQ [VSHLQ_S VSHLQ_U])
-(define_int_iterator VABDQ [VABDQ_S VABDQ_U])
-(define_int_iterator VADDQ_N [VADDQ_N_S VADDQ_N_U])
-(define_int_iterator VADDVAQ [VADDVAQ_S VADDVAQ_U])
-(define_int_iterator VADDVQ_P [VADDVQ_P_U VADDVQ_P_S])
-(define_int_iterator VANDQ [VANDQ_U VANDQ_S])
-(define_int_iterator VBICQ [VBICQ_S VBICQ_U])
-(define_int_iterator VBRSRQ_N [VBRSRQ_N_U VBRSRQ_N_S])
-(define_int_iterator VCADDQ_ROT270 [VCADDQ_ROT270_S VCADDQ_ROT270_U])
-(define_int_iterator VCADDQ_ROT90 [VCADDQ_ROT90_U VCADDQ_ROT90_S])
-(define_int_iterator VCMPEQQ [VCMPEQQ_U VCMPEQQ_S])
-(define_int_iterator VCMPEQQ_N [VCMPEQQ_N_S VCMPEQQ_N_U])
-(define_int_iterator VCMPNEQ_N [VCMPNEQ_N_U VCMPNEQ_N_S])
-(define_int_iterator VEORQ [VEORQ_U VEORQ_S])
-(define_int_iterator VHADDQ [VHADDQ_S VHADDQ_U])
-(define_int_iterator VHADDQ_N [VHADDQ_N_U VHADDQ_N_S])
-(define_int_iterator VHSUBQ [VHSUBQ_S VHSUBQ_U])
-(define_int_iterator VHSUBQ_N [VHSUBQ_N_U VHSUBQ_N_S])
-(define_int_iterator VMAXQ [VMAXQ_U VMAXQ_S])
-(define_int_iterator VMAXVQ [VMAXVQ_U VMAXVQ_S])
-(define_int_iterator VMINQ [VMINQ_S VMINQ_U])
-(define_int_iterator VMINVQ [VMINVQ_U VMINVQ_S])
-(define_int_iterator VMLADAVQ [VMLADAVQ_U VMLADAVQ_S])
-(define_int_iterator VMULHQ [VMULHQ_S VMULHQ_U])
-(define_int_iterator VMULLBQ_INT [VMULLBQ_INT_U VMULLBQ_INT_S])
-(define_int_iterator VMULLTQ_INT [VMULLTQ_INT_U VMULLTQ_INT_S])
-(define_int_iterator VMULQ [VMULQ_U VMULQ_S])
-(define_int_iterator VMULQ_N [VMULQ_N_U VMULQ_N_S])
-(define_int_iterator VORNQ [VORNQ_U VORNQ_S])
-(define_int_iterator VORRQ [VORRQ_S VORRQ_U])
-(define_int_iterator VQADDQ [VQADDQ_U VQADDQ_S])
-(define_int_iterator VQADDQ_N [VQADDQ_N_S VQADDQ_N_U])
-(define_int_iterator VQRSHLQ [VQRSHLQ_S VQRSHLQ_U])
-(define_int_iterator VQRSHLQ_N [VQRSHLQ_N_S VQRSHLQ_N_U])
-(define_int_iterator VQSHLQ [VQSHLQ_S VQSHLQ_U])
-(define_int_iterator VQSHLQ_N [VQSHLQ_N_S VQSHLQ_N_U])
-(define_int_iterator VQSHLQ_R [VQSHLQ_R_U VQSHLQ_R_S])
-(define_int_iterator VQSUBQ [VQSUBQ_U VQSUBQ_S])
-(define_int_iterator VQSUBQ_N [VQSUBQ_N_S VQSUBQ_N_U])
-(define_int_iterator VRHADDQ [VRHADDQ_S VRHADDQ_U])
-(define_int_iterator VRMULHQ [VRMULHQ_S VRMULHQ_U])
-(define_int_iterator VRSHLQ [VRSHLQ_S VRSHLQ_U])
-(define_int_iterator VRSHLQ_N [VRSHLQ_N_U VRSHLQ_N_S])
-(define_int_iterator VRSHRQ_N [VRSHRQ_N_S VRSHRQ_N_U])
-(define_int_iterator VSHLQ_N [VSHLQ_N_U VSHLQ_N_S])
-(define_int_iterator VSHLQ_R [VSHLQ_R_S VSHLQ_R_U])
-(define_int_iterator VSUBQ [VSUBQ_S VSUBQ_U])
-(define_int_iterator VSUBQ_N [VSUBQ_N_S VSUBQ_N_U])
-(define_int_iterator VADDLVAQ [VADDLVAQ_S VADDLVAQ_U])
-(define_int_iterator VBICQ_N [VBICQ_N_S VBICQ_N_U])
-(define_int_iterator VMLALDAVQ [VMLALDAVQ_U VMLALDAVQ_S])
-(define_int_iterator VMLALDAVXQ [VMLALDAVXQ_U VMLALDAVXQ_S])
-(define_int_iterator VMOVNBQ [VMOVNBQ_U VMOVNBQ_S])
-(define_int_iterator VMOVNTQ [VMOVNTQ_S VMOVNTQ_U])
-(define_int_iterator VORRQ_N [VORRQ_N_U VORRQ_N_S])
-(define_int_iterator VQMOVNBQ [VQMOVNBQ_U VQMOVNBQ_S])
-(define_int_iterator VQMOVNTQ [VQMOVNTQ_U VQMOVNTQ_S])
-(define_int_iterator VSHLLBQ_N [VSHLLBQ_N_S VSHLLBQ_N_U])
-(define_int_iterator VSHLLTQ_N [VSHLLTQ_N_U VSHLLTQ_N_S])
-(define_int_iterator VRMLALDAVHQ [VRMLALDAVHQ_U VRMLALDAVHQ_S])
-(define_int_iterator VBICQ_M_N [VBICQ_M_N_S VBICQ_M_N_U])
-(define_int_iterator VCVTAQ_M [VCVTAQ_M_S VCVTAQ_M_U])
-(define_int_iterator VCVTQ_M_TO_F [VCVTQ_M_TO_F_S VCVTQ_M_TO_F_U])
-(define_int_iterator VQRSHRNBQ_N [VQRSHRNBQ_N_U VQRSHRNBQ_N_S])
-(define_int_iterator VABAVQ [VABAVQ_S VABAVQ_U])
-(define_int_iterator VSHLCQ [VSHLCQ_S VSHLCQ_U])
-(define_int_iterator VRMLALDAVHAQ [VRMLALDAVHAQ_S VRMLALDAVHAQ_U])
-(define_int_iterator VADDVAQ_P [VADDVAQ_P_S VADDVAQ_P_U])
-(define_int_iterator VCLZQ_M [VCLZQ_M_S VCLZQ_M_U])
-(define_int_iterator VCMPEQQ_M_N [VCMPEQQ_M_N_S VCMPEQQ_M_N_U])
-(define_int_iterator VCMPEQQ_M [VCMPEQQ_M_S VCMPEQQ_M_U])
-(define_int_iterator VCMPNEQ_M_N [VCMPNEQ_M_N_S VCMPNEQ_M_N_U])
-(define_int_iterator VCMPNEQ_M [VCMPNEQ_M_S VCMPNEQ_M_U])
-(define_int_iterator VDUPQ_M_N [VDUPQ_M_N_S VDUPQ_M_N_U])
-(define_int_iterator VMAXVQ_P [VMAXVQ_P_S VMAXVQ_P_U])
-(define_int_iterator VMINVQ_P [VMINVQ_P_S VMINVQ_P_U])
-(define_int_iterator VMLADAVAQ [VMLADAVAQ_S VMLADAVAQ_U])
-(define_int_iterator VMLADAVQ_P [VMLADAVQ_P_S VMLADAVQ_P_U])
-(define_int_iterator VMLAQ_N [VMLAQ_N_S VMLAQ_N_U])
-(define_int_iterator VMLASQ_N [VMLASQ_N_S VMLASQ_N_U])
-(define_int_iterator VMVNQ_M [VMVNQ_M_S VMVNQ_M_U])
-(define_int_iterator VPSELQ [VPSELQ_S VPSELQ_U])
-(define_int_iterator VQDMLAHQ_N [VQDMLAHQ_N_S VQDMLAHQ_N_U])
-(define_int_iterator VQRDMLAHQ_N [VQRDMLAHQ_N_S VQRDMLAHQ_N_U])
-(define_int_iterator VQRDMLASHQ_N [VQRDMLASHQ_N_S VQRDMLASHQ_N_U])
-(define_int_iterator VQRSHLQ_M_N [VQRSHLQ_M_N_S VQRSHLQ_M_N_U])
-(define_int_iterator VQSHLQ_M_R [VQSHLQ_M_R_S VQSHLQ_M_R_U])
-(define_int_iterator VREV64Q_M [VREV64Q_M_S VREV64Q_M_U])
-(define_int_iterator VRSHLQ_M_N [VRSHLQ_M_N_S VRSHLQ_M_N_U])
-(define_int_iterator VSHLQ_M_R [VSHLQ_M_R_S VSHLQ_M_R_U])
-(define_int_iterator VSLIQ_N [VSLIQ_N_S VSLIQ_N_U])
-(define_int_iterator VSRIQ_N [VSRIQ_N_S VSRIQ_N_U])
-(define_int_iterator VMLALDAVQ_P [VMLALDAVQ_P_U VMLALDAVQ_P_S])
-(define_int_iterator VQMOVNBQ_M [VQMOVNBQ_M_S VQMOVNBQ_M_U])
-(define_int_iterator VMOVLTQ_M [VMOVLTQ_M_U VMOVLTQ_M_S])
-(define_int_iterator VMOVNBQ_M [VMOVNBQ_M_U VMOVNBQ_M_S])
-(define_int_iterator VRSHRNTQ_N [VRSHRNTQ_N_U VRSHRNTQ_N_S])
-(define_int_iterator VORRQ_M_N [VORRQ_M_N_S VORRQ_M_N_U])
-(define_int_iterator VREV32Q_M [VREV32Q_M_S VREV32Q_M_U])
-(define_int_iterator VREV16Q_M [VREV16Q_M_S VREV16Q_M_U])
-(define_int_iterator VQRSHRNTQ_N [VQRSHRNTQ_N_U VQRSHRNTQ_N_S])
-(define_int_iterator VMOVNTQ_M [VMOVNTQ_M_U VMOVNTQ_M_S])
-(define_int_iterator VMOVLBQ_M [VMOVLBQ_M_U VMOVLBQ_M_S])
-(define_int_iterator VMLALDAVAQ [VMLALDAVAQ_S VMLALDAVAQ_U])
-(define_int_iterator VQSHRNBQ_N [VQSHRNBQ_N_U VQSHRNBQ_N_S])
-(define_int_iterator VSHRNBQ_N [VSHRNBQ_N_U VSHRNBQ_N_S])
-(define_int_iterator VRSHRNBQ_N [VRSHRNBQ_N_S VRSHRNBQ_N_U])
-(define_int_iterator VMLALDAVXQ_P [VMLALDAVXQ_P_U VMLALDAVXQ_P_S])
-(define_int_iterator VQMOVNTQ_M [VQMOVNTQ_M_U VQMOVNTQ_M_S])
-(define_int_iterator VMVNQ_M_N [VMVNQ_M_N_U VMVNQ_M_N_S])
-(define_int_iterator VQSHRNTQ_N [VQSHRNTQ_N_U VQSHRNTQ_N_S])
-(define_int_iterator VMLALDAVAXQ [VMLALDAVAXQ_S VMLALDAVAXQ_U])
-(define_int_iterator VSHRNTQ_N [VSHRNTQ_N_S VSHRNTQ_N_U])
-(define_int_iterator VCVTMQ_M [VCVTMQ_M_S VCVTMQ_M_U])
-(define_int_iterator VCVTNQ_M [VCVTNQ_M_S VCVTNQ_M_U])
-(define_int_iterator VCVTPQ_M [VCVTPQ_M_S VCVTPQ_M_U])
-(define_int_iterator VCVTQ_M_N_FROM_F [VCVTQ_M_N_FROM_F_S VCVTQ_M_N_FROM_F_U])
-(define_int_iterator VCVTQ_M_FROM_F [VCVTQ_M_FROM_F_U VCVTQ_M_FROM_F_S])
-(define_int_iterator VRMLALDAVHQ_P [VRMLALDAVHQ_P_S VRMLALDAVHQ_P_U])
-(define_int_iterator VADDLVAQ_P [VADDLVAQ_P_U VADDLVAQ_P_S])
-(define_int_iterator VABAVQ_P [VABAVQ_P_S VABAVQ_P_U])
-(define_int_iterator VSHLQ_M [VSHLQ_M_S VSHLQ_M_U])
-(define_int_iterator VSRIQ_M_N [VSRIQ_M_N_S VSRIQ_M_N_U])
-(define_int_iterator VSUBQ_M [VSUBQ_M_U VSUBQ_M_S])
-(define_int_iterator VCVTQ_M_N_TO_F [VCVTQ_M_N_TO_F_U VCVTQ_M_N_TO_F_S])
-(define_int_iterator VHSUBQ_M [VHSUBQ_M_S VHSUBQ_M_U])
-(define_int_iterator VSLIQ_M_N [VSLIQ_M_N_U VSLIQ_M_N_S])
-(define_int_iterator VRSHLQ_M [VRSHLQ_M_S VRSHLQ_M_U])
-(define_int_iterator VMINQ_M [VMINQ_M_S VMINQ_M_U])
-(define_int_iterator VMULLBQ_INT_M [VMULLBQ_INT_M_U VMULLBQ_INT_M_S])
-(define_int_iterator VMULHQ_M [VMULHQ_M_S VMULHQ_M_U])
-(define_int_iterator VMULQ_M [VMULQ_M_S VMULQ_M_U])
-(define_int_iterator VHSUBQ_M_N [VHSUBQ_M_N_S VHSUBQ_M_N_U])
-(define_int_iterator VHADDQ_M_N [VHADDQ_M_N_S VHADDQ_M_N_U])
-(define_int_iterator VORRQ_M [VORRQ_M_S VORRQ_M_U])
-(define_int_iterator VRMULHQ_M [VRMULHQ_M_U VRMULHQ_M_S])
-(define_int_iterator VQADDQ_M [VQADDQ_M_U VQADDQ_M_S])
-(define_int_iterator VRSHRQ_M_N [VRSHRQ_M_N_S VRSHRQ_M_N_U])
-(define_int_iterator VQSUBQ_M_N [VQSUBQ_M_N_U VQSUBQ_M_N_S])
-(define_int_iterator VADDQ_M [VADDQ_M_U VADDQ_M_S])
-(define_int_iterator VORNQ_M [VORNQ_M_U VORNQ_M_S])
-(define_int_iterator VRHADDQ_M [VRHADDQ_M_U VRHADDQ_M_S])
-(define_int_iterator VQSHLQ_M [VQSHLQ_M_U VQSHLQ_M_S])
-(define_int_iterator VANDQ_M [VANDQ_M_U VANDQ_M_S])
-(define_int_iterator VBICQ_M [VBICQ_M_U VBICQ_M_S])
-(define_int_iterator VSHLQ_M_N [VSHLQ_M_N_S VSHLQ_M_N_U])
-(define_int_iterator VCADDQ_ROT270_M [VCADDQ_ROT270_M_U VCADDQ_ROT270_M_S])
-(define_int_iterator VQRSHLQ_M [VQRSHLQ_M_U VQRSHLQ_M_S])
-(define_int_iterator VQADDQ_M_N [VQADDQ_M_N_U VQADDQ_M_N_S])
-(define_int_iterator VADDQ_M_N [VADDQ_M_N_S VADDQ_M_N_U])
-(define_int_iterator VMAXQ_M [VMAXQ_M_S VMAXQ_M_U])
-(define_int_iterator VQSUBQ_M [VQSUBQ_M_U VQSUBQ_M_S])
-(define_int_iterator VMLASQ_M_N [VMLASQ_M_N_U VMLASQ_M_N_S])
-(define_int_iterator VMLADAVAQ_P [VMLADAVAQ_P_U VMLADAVAQ_P_S])
-(define_int_iterator VBRSRQ_M_N [VBRSRQ_M_N_U VBRSRQ_M_N_S])
-(define_int_iterator VMULQ_M_N [VMULQ_M_N_U VMULQ_M_N_S])
-(define_int_iterator VCADDQ_ROT90_M [VCADDQ_ROT90_M_U VCADDQ_ROT90_M_S])
-(define_int_iterator VMULLTQ_INT_M [VMULLTQ_INT_M_S VMULLTQ_INT_M_U])
-(define_int_iterator VEORQ_M [VEORQ_M_S VEORQ_M_U])
-(define_int_iterator VSHRQ_M_N [VSHRQ_M_N_S VSHRQ_M_N_U])
-(define_int_iterator VSUBQ_M_N [VSUBQ_M_N_S VSUBQ_M_N_U])
-(define_int_iterator VHADDQ_M [VHADDQ_M_S VHADDQ_M_U])
-(define_int_iterator VABDQ_M [VABDQ_M_S VABDQ_M_U])
-(define_int_iterator VMLAQ_M_N [VMLAQ_M_N_S VMLAQ_M_N_U])
-(define_int_iterator VQSHLQ_M_N [VQSHLQ_M_N_S VQSHLQ_M_N_U])
-(define_int_iterator VMLALDAVAQ_P [VMLALDAVAQ_P_U VMLALDAVAQ_P_S])
-(define_int_iterator VMLALDAVAXQ_P [VMLALDAVAXQ_P_U VMLALDAVAXQ_P_S])
-(define_int_iterator VQRSHRNBQ_M_N [VQRSHRNBQ_M_N_U VQRSHRNBQ_M_N_S])
-(define_int_iterator VQRSHRNTQ_M_N [VQRSHRNTQ_M_N_S VQRSHRNTQ_M_N_U])
-(define_int_iterator VQSHRNBQ_M_N [VQSHRNBQ_M_N_U VQSHRNBQ_M_N_S])
-(define_int_iterator VQSHRNTQ_M_N [VQSHRNTQ_M_N_S VQSHRNTQ_M_N_U])
-(define_int_iterator VRSHRNBQ_M_N [VRSHRNBQ_M_N_U VRSHRNBQ_M_N_S])
-(define_int_iterator VRSHRNTQ_M_N [VRSHRNTQ_M_N_U VRSHRNTQ_M_N_S])
-(define_int_iterator VSHLLBQ_M_N [VSHLLBQ_M_N_U VSHLLBQ_M_N_S])
-(define_int_iterator VSHLLTQ_M_N [VSHLLTQ_M_N_U VSHLLTQ_M_N_S])
-(define_int_iterator VSHRNBQ_M_N [VSHRNBQ_M_N_S VSHRNBQ_M_N_U])
-(define_int_iterator VSHRNTQ_M_N [VSHRNTQ_M_N_S VSHRNTQ_M_N_U])
-(define_int_iterator VSTRWSBQ [VSTRWQSB_S VSTRWQSB_U])
-(define_int_iterator VSTRBSOQ [VSTRBQSO_S VSTRBQSO_U])
-(define_int_iterator VSTRBQ [VSTRBQ_S VSTRBQ_U])
-(define_int_iterator VLDRBGOQ [VLDRBQGO_S VLDRBQGO_U])
-(define_int_iterator VLDRBQ [VLDRBQ_S VLDRBQ_U])
-(define_int_iterator VLDRWGBQ [VLDRWQGB_S VLDRWQGB_U])
-(define_int_iterator VLD1Q [VLD1Q_S VLD1Q_U])
-(define_int_iterator VLDRHGOQ [VLDRHQGO_S VLDRHQGO_U])
-(define_int_iterator VLDRHGSOQ [VLDRHQGSO_S VLDRHQGSO_U])
-(define_int_iterator VLDRHQ [VLDRHQ_S VLDRHQ_U])
-(define_int_iterator VLDRWQ [VLDRWQ_S VLDRWQ_U])
-(define_int_iterator VLDRDGBQ [VLDRDQGB_S VLDRDQGB_U])
-(define_int_iterator VLDRDGOQ [VLDRDQGO_S VLDRDQGO_U])
-(define_int_iterator VLDRDGSOQ [VLDRDQGSO_S VLDRDQGSO_U])
-(define_int_iterator VLDRWGOQ [VLDRWQGO_S VLDRWQGO_U])
-(define_int_iterator VLDRWGSOQ [VLDRWQGSO_S VLDRWQGSO_U])
-(define_int_iterator VST1Q [VST1Q_S VST1Q_U])
-(define_int_iterator VSTRHSOQ [VSTRHQSO_S VSTRHQSO_U])
-(define_int_iterator VSTRHSSOQ [VSTRHQSSO_S VSTRHQSSO_U])
-(define_int_iterator VSTRHQ [VSTRHQ_S VSTRHQ_U])
-(define_int_iterator VSTRWQ [VSTRWQ_S VSTRWQ_U])
-(define_int_iterator VSTRDSBQ [VSTRDQSB_S VSTRDQSB_U])
-(define_int_iterator VSTRDSOQ [VSTRDQSO_S VSTRDQSO_U])
-(define_int_iterator VSTRDSSOQ [VSTRDQSSO_S VSTRDQSSO_U])
-(define_int_iterator VSTRWSOQ [VSTRWQSO_S VSTRWQSO_U])
-(define_int_iterator VSTRWSSOQ [VSTRWQSSO_S VSTRWQSSO_U])
-(define_int_iterator VSTRWSBWBQ [VSTRWQSBWB_S VSTRWQSBWB_U])
-(define_int_iterator VLDRWGBWBQ [VLDRWQGBWB_S VLDRWQGBWB_U])
-(define_int_iterator VSTRDSBWBQ [VSTRDQSBWB_S VSTRDQSBWB_U])
-(define_int_iterator VLDRDGBWBQ [VLDRDQGBWB_S VLDRDQGBWB_U])
-(define_int_iterator VADCIQ [VADCIQ_U VADCIQ_S])
-(define_int_iterator VADCIQ_M [VADCIQ_M_U VADCIQ_M_S])
-(define_int_iterator VSBCQ [VSBCQ_U VSBCQ_S])
-(define_int_iterator VSBCQ_M [VSBCQ_M_U VSBCQ_M_S])
-(define_int_iterator VSBCIQ [VSBCIQ_U VSBCIQ_S])
-(define_int_iterator VSBCIQ_M [VSBCIQ_M_U VSBCIQ_M_S])
-(define_int_iterator VADCQ [VADCQ_U VADCQ_S])
-(define_int_iterator VADCQ_M [VADCQ_M_U VADCQ_M_S])
-(define_int_iterator UQRSHLLQ [UQRSHLL_64 UQRSHLL_48])
-(define_int_iterator SQRSHRLQ [SQRSHRL_64 SQRSHRL_48])
-(define_int_iterator VSHLCQ_M [VSHLCQ_M_S VSHLCQ_M_U])
-
 (define_insn "*mve_mov<mode>"
   [(set (match_operand:MVE_types 0 "nonimmediate_operand" "=w,w,r,w,w,r,w,Ux,w")
 	(match_operand:MVE_types 1 "general_operand" "w,r,w,Dn,Uxi,r,Dm,w,Ul"))]
@@ -1977,15 +1329,25 @@
 ;;
 ;; [vmaxq_u, vmaxq_s])
 ;;
-(define_insn "mve_vmaxq_<supf><mode>"
+(define_insn "mve_vmaxq_s<mode>"
   [
    (set (match_operand:MVE_2 0 "s_register_operand" "=w")
-	(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "w")
-		       (match_operand:MVE_2 2 "s_register_operand" "w")]
-	 VMAXQ))
+	(smax:MVE_2 (match_operand:MVE_2 1 "s_register_operand" "w")
+		    (match_operand:MVE_2 2 "s_register_operand" "w")))
+  ]
+  "TARGET_HAVE_MVE"
+  "vmax.%#<V_s_elem>\t%q0, %q1, %q2"
+  [(set_attr "type" "mve_move")
+])
+
+(define_insn "mve_vmaxq_u<mode>"
+  [
+   (set (match_operand:MVE_2 0 "s_register_operand" "=w")
+	(umax:MVE_2 (match_operand:MVE_2 1 "s_register_operand" "w")
+		    (match_operand:MVE_2 2 "s_register_operand" "w")))
   ]
   "TARGET_HAVE_MVE"
-  "vmax.<supf>%#<V_sz_elem>\t%q0, %q1, %q2"
+  "vmax.%#<V_u_elem>\t%q0, %q1, %q2"
   [(set_attr "type" "mve_move")
 ])
 
@@ -2037,15 +1399,25 @@
 ;;
 ;; [vminq_s, vminq_u])
 ;;
-(define_insn "mve_vminq_<supf><mode>"
+(define_insn "mve_vminq_s<mode>"
   [
    (set (match_operand:MVE_2 0 "s_register_operand" "=w")
-	(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "w")
-		       (match_operand:MVE_2 2 "s_register_operand" "w")]
-	 VMINQ))
+	(smin:MVE_2 (match_operand:MVE_2 1 "s_register_operand" "w")
+		    (match_operand:MVE_2 2 "s_register_operand" "w")))
+  ]
+  "TARGET_HAVE_MVE"
+  "vmin.%#<V_s_elem>\t%q0, %q1, %q2"
+  [(set_attr "type" "mve_move")
+])
+
+(define_insn "mve_vminq_u<mode>"
+  [
+   (set (match_operand:MVE_2 0 "s_register_operand" "=w")
+	(umin:MVE_2 (match_operand:MVE_2 1 "s_register_operand" "w")
+		    (match_operand:MVE_2 2 "s_register_operand" "w")))
   ]
   "TARGET_HAVE_MVE"
-  "vmin.<supf>%#<V_sz_elem>\t%q0, %q1, %q2"
+  "vmin.%#<V_u_elem>\t%q0, %q1, %q2"
   [(set_attr "type" "mve_move")
 ])
 
@@ -2199,6 +1571,17 @@
   [(set_attr "type" "mve_move")
 ])
 
+(define_insn "mve_vmulq<mode>"
+  [
+   (set (match_operand:MVE_2 0 "s_register_operand" "=w")
+	(mult:MVE_2 (match_operand:MVE_2 1 "s_register_operand" "w")
+		    (match_operand:MVE_2 2 "s_register_operand" "w")))
+  ]
+  "TARGET_HAVE_MVE"
+  "vmul.i%#<V_sz_elem>\t%q0, %q1, %q2"
+  [(set_attr "type" "mve_move")
+])
+
 ;;
 ;; [vornq_u, vornq_s])
 ;;
@@ -2574,6 +1957,17 @@
   [(set_attr "type" "mve_move")
 ])
 
+(define_insn "mve_vsubq<mode>"
+  [
+   (set (match_operand:MVE_2 0 "s_register_operand" "=w")
+	(minus:MVE_2 (match_operand:MVE_2 1 "s_register_operand" "w")
+		     (match_operand:MVE_2 2 "s_register_operand" "w")))
+  ]
+  "TARGET_HAVE_MVE"
+  "vsub.i%#<V_sz_elem>\t%q0, %q1, %q2"
+  [(set_attr "type" "mve_move")
+])
+
 ;;
 ;; [vabdq_f])
 ;;
@@ -3030,9 +2424,8 @@
 (define_insn "mve_vmaxnmq_f<mode>"
   [
    (set (match_operand:MVE_0 0 "s_register_operand" "=w")
-	(unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "w")
-		       (match_operand:MVE_0 2 "s_register_operand" "w")]
-	 VMAXNMQ_F))
+	(smax:MVE_0 (match_operand:MVE_0 1 "s_register_operand" "w")
+		    (match_operand:MVE_0 2 "s_register_operand" "w")))
   ]
   "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
   "vmaxnm.f%#<V_sz_elem>	%q0, %q1, %q2"
@@ -3090,9 +2483,8 @@
 (define_insn "mve_vminnmq_f<mode>"
   [
    (set (match_operand:MVE_0 0 "s_register_operand" "=w")
-	(unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "w")
-		       (match_operand:MVE_0 2 "s_register_operand" "w")]
-	 VMINNMQ_F))
+	(smin:MVE_0 (match_operand:MVE_0 1 "s_register_operand" "w")
+		    (match_operand:MVE_0 2 "s_register_operand" "w")))
   ]
   "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
   "vminnm.f%#<V_sz_elem>	%q0, %q1, %q2"
@@ -3210,9 +2602,8 @@
 (define_insn "mve_vmulq_f<mode>"
   [
    (set (match_operand:MVE_0 0 "s_register_operand" "=w")
-	(unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "w")
-		       (match_operand:MVE_0 2 "s_register_operand" "w")]
-	 VMULQ_F))
+	(mult:MVE_0 (match_operand:MVE_0 1 "s_register_operand" "w")
+		    (match_operand:MVE_0 2 "s_register_operand" "w")))
   ]
   "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
   "vmul.f%#<V_sz_elem>	%q0, %q1, %q2"
@@ -3480,9 +2871,8 @@
 (define_insn "mve_vsubq_f<mode>"
   [
    (set (match_operand:MVE_0 0 "s_register_operand" "=w")
-	(unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "w")
-		       (match_operand:MVE_0 2 "s_register_operand" "w")]
-	 VSUBQ_F))
+	(minus:MVE_0 (match_operand:MVE_0 1 "s_register_operand" "w")
+		     (match_operand:MVE_0 2 "s_register_operand" "w")))
   ]
   "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
   "vsub.f%#<V_sz_elem>\t%q0, %q1, %q2"
@@ -4310,7 +3700,7 @@
    (set_attr "length""8")])
 
 ;;
-;; [vqdmlahq_n_s, vqdmlahq_n_u])
+;; [vqdmlahq_n_s])
 ;;
 (define_insn "mve_vqdmlahq_n_<supf><mode>"
   [
@@ -4326,6 +3716,22 @@
 ])
 
 ;;
+;; [vqdmlashq_n_s])
+;;
+(define_insn "mve_vqdmlashq_n_<supf><mode>"
+  [
+   (set (match_operand:MVE_2 0 "s_register_operand" "=w")
+	(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0")
+		       (match_operand:MVE_2 2 "s_register_operand" "w")
+		       (match_operand:<V_elem> 3 "s_register_operand" "r")]
+	 VQDMLASHQ_N))
+  ]
+  "TARGET_HAVE_MVE"
+  "vqdmlash.s%#<V_sz_elem>\t%q0, %q2, %3"
+  [(set_attr "type" "mve_move")
+])
+
+;;
 ;; [vqnegq_m_s])
 ;;
 (define_insn "mve_vqnegq_m_s<mode>"
@@ -4374,7 +3780,7 @@
 ])
 
 ;;
-;; [vqrdmlahq_n_s, vqrdmlahq_n_u])
+;; [vqrdmlahq_n_s])
 ;;
 (define_insn "mve_vqrdmlahq_n_<supf><mode>"
   [
@@ -4390,7 +3796,7 @@
 ])
 
 ;;
-;; [vqrdmlashq_n_s, vqrdmlashq_n_u])
+;; [vqrdmlashq_n_s])
 ;;
 (define_insn "mve_vqrdmlashq_n_<supf><mode>"
   [
@@ -6552,6 +5958,23 @@
    (set_attr "length""8")])
 
 ;;
+;; [vqdmlashq_m_n_s])
+;;
+(define_insn "mve_vqdmlashq_m_n_s<mode>"
+  [
+   (set (match_operand:MVE_2 0 "s_register_operand" "=w")
+	(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0")
+		       (match_operand:MVE_2 2 "s_register_operand" "w")
+		       (match_operand:<V_elem> 3 "s_register_operand" "r")
+		       (match_operand:HI 4 "vpr_register_operand" "Up")]
+	 VQDMLASHQ_M_N_S))
+  ]
+  "TARGET_HAVE_MVE"
+  "vpst\;vqdmlasht.s%#<V_sz_elem>\t%q0, %q2, %3"
+  [(set_attr "type" "mve_move")
+   (set_attr "length""8")])
+
+;;
 ;; [vqrdmlahq_m_n_s])
 ;;
 (define_insn "mve_vqrdmlahq_m_n_s<mode>"
@@ -7113,7 +6536,7 @@
    (set_attr "length""8")])
 
 ;;
-;; [vmlaldavaxq_p_u, vmlaldavaxq_p_s])
+;; [vmlaldavaxq_p_s])
 ;;
 (define_insn "mve_vmlaldavaxq_p_<supf><mode>"
   [
@@ -10315,38 +9738,10 @@
   [(set_attr "type" "mve_move")
    (set_attr "length""8")])
 
-(define_expand "mve_vstrwq_scatter_base_wb_<supf>v4si"
-  [(match_operand:V4SI 0 "s_register_operand" "=w")
-   (match_operand:SI 1 "mve_vldrd_immediate" "Ri")
-   (match_operand:V4SI 2 "s_register_operand" "w")
-   (unspec:V4SI [(const_int 0)] VSTRWSBWBQ)]
-  "TARGET_HAVE_MVE"
-{
-  rtx ignore_wb = gen_reg_rtx (V4SImode);
-  emit_insn (
-  gen_mve_vstrwq_scatter_base_wb_<supf>v4si_insn (ignore_wb, operands[0],
-						  operands[1], operands[2]));
-  DONE;
-})
-
-(define_expand "mve_vstrwq_scatter_base_wb_add_<supf>v4si"
-  [(match_operand:V4SI 0 "s_register_operand" "=w")
-   (match_operand:SI 1 "mve_vldrd_immediate" "Ri")
-   (match_operand:V4SI 2 "s_register_operand" "0")
-   (unspec:V4SI [(const_int 0)] VSTRWSBWBQ)]
-  "TARGET_HAVE_MVE"
-{
-  rtx ignore_vec = gen_reg_rtx (V4SImode);
-  emit_insn (
-  gen_mve_vstrwq_scatter_base_wb_<supf>v4si_insn (operands[0], operands[2],
-						  operands[1], ignore_vec));
-  DONE;
-})
-
 ;;
-;; [vstrwq_scatter_base_wb_s vstrdq_scatter_base_wb_u]
+;; [vstrwq_scatter_base_wb_s vstrwq_scatter_base_wb_u]
 ;;
-(define_insn "mve_vstrwq_scatter_base_wb_<supf>v4si_insn"
+(define_insn "mve_vstrwq_scatter_base_wb_<supf>v4si"
   [(set (mem:BLK (scratch))
 	(unspec:BLK
 		[(match_operand:V4SI 1 "s_register_operand" "0")
@@ -10368,42 +9763,10 @@
 }
   [(set_attr "length" "4")])
 
-(define_expand "mve_vstrwq_scatter_base_wb_p_<supf>v4si"
-  [(match_operand:V4SI 0 "s_register_operand" "=w")
-   (match_operand:SI 1 "mve_vldrd_immediate" "Ri")
-   (match_operand:V4SI 2 "s_register_operand" "w")
-   (match_operand:HI 3 "vpr_register_operand")
-   (unspec:V4SI [(const_int 0)] VSTRWSBWBQ)]
-  "TARGET_HAVE_MVE"
-{
-  rtx ignore_wb = gen_reg_rtx (V4SImode);
-  emit_insn (
-  gen_mve_vstrwq_scatter_base_wb_p_<supf>v4si_insn (ignore_wb, operands[0],
-						    operands[1], operands[2],
-						    operands[3]));
-  DONE;
-})
-
-(define_expand "mve_vstrwq_scatter_base_wb_p_add_<supf>v4si"
-  [(match_operand:V4SI 0 "s_register_operand" "=w")
-   (match_operand:SI 1 "mve_vldrd_immediate" "Ri")
-   (match_operand:V4SI 2 "s_register_operand" "0")
-   (match_operand:HI 3 "vpr_register_operand")
-   (unspec:V4SI [(const_int 0)] VSTRWSBWBQ)]
-  "TARGET_HAVE_MVE"
-{
-  rtx ignore_vec = gen_reg_rtx (V4SImode);
-  emit_insn (
-  gen_mve_vstrwq_scatter_base_wb_p_<supf>v4si_insn (operands[0], operands[2],
-						    operands[1], ignore_vec,
-						    operands[3]));
-  DONE;
-})
-
 ;;
 ;; [vstrwq_scatter_base_wb_p_s vstrwq_scatter_base_wb_p_u]
 ;;
-(define_insn "mve_vstrwq_scatter_base_wb_p_<supf>v4si_insn"
+(define_insn "mve_vstrwq_scatter_base_wb_p_<supf>v4si"
  [(set (mem:BLK (scratch))
        (unspec:BLK
 		[(match_operand:V4SI 1 "s_register_operand" "0")
@@ -10426,38 +9789,10 @@
 }
   [(set_attr "length" "8")])
 
-(define_expand "mve_vstrwq_scatter_base_wb_fv4sf"
-  [(match_operand:V4SI 0 "s_register_operand" "=w")
-   (match_operand:SI 1 "mve_vldrd_immediate" "Ri")
-   (match_operand:V4SF 2 "s_register_operand" "w")
-   (unspec:V4SI [(const_int 0)] VSTRWQSBWB_F)]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-{
-  rtx ignore_wb = gen_reg_rtx (V4SImode);
-  emit_insn (
-  gen_mve_vstrwq_scatter_base_wb_fv4sf_insn (ignore_wb,operands[0],
-					     operands[1], operands[2]));
-  DONE;
-})
-
-(define_expand "mve_vstrwq_scatter_base_wb_add_fv4sf"
-  [(match_operand:V4SI 0 "s_register_operand" "=w")
-   (match_operand:SI 1 "mve_vldrd_immediate" "Ri")
-   (match_operand:V4SI 2 "s_register_operand" "0")
-   (unspec:V4SI [(const_int 0)] VSTRWQSBWB_F)]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-{
-  rtx ignore_vec = gen_reg_rtx (V4SFmode);
-  emit_insn (
-  gen_mve_vstrwq_scatter_base_wb_fv4sf_insn (operands[0], operands[2],
-					     operands[1], ignore_vec));
-  DONE;
-})
-
 ;;
 ;; [vstrwq_scatter_base_wb_f]
 ;;
-(define_insn "mve_vstrwq_scatter_base_wb_fv4sf_insn"
+(define_insn "mve_vstrwq_scatter_base_wb_fv4sf"
  [(set (mem:BLK (scratch))
        (unspec:BLK
 		[(match_operand:V4SI 1 "s_register_operand" "0")
@@ -10479,42 +9814,10 @@
 }
   [(set_attr "length" "4")])
 
-(define_expand "mve_vstrwq_scatter_base_wb_p_fv4sf"
-  [(match_operand:V4SI 0 "s_register_operand" "=w")
-   (match_operand:SI 1 "mve_vldrd_immediate" "Ri")
-   (match_operand:V4SF 2 "s_register_operand" "w")
-   (match_operand:HI 3 "vpr_register_operand")
-   (unspec:V4SI [(const_int 0)] VSTRWQSBWB_F)]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-{
-  rtx ignore_wb = gen_reg_rtx (V4SImode);
-  emit_insn (
-  gen_mve_vstrwq_scatter_base_wb_p_fv4sf_insn (ignore_wb, operands[0],
-					       operands[1], operands[2],
-					       operands[3]));
-  DONE;
-})
-
-(define_expand "mve_vstrwq_scatter_base_wb_p_add_fv4sf"
-  [(match_operand:V4SI 0 "s_register_operand" "=w")
-   (match_operand:SI 1 "mve_vldrd_immediate" "Ri")
-   (match_operand:V4SI 2 "s_register_operand" "0")
-   (match_operand:HI 3 "vpr_register_operand")
-   (unspec:V4SI [(const_int 0)] VSTRWQSBWB_F)]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-{
-  rtx ignore_vec = gen_reg_rtx (V4SFmode);
-  emit_insn (
-  gen_mve_vstrwq_scatter_base_wb_p_fv4sf_insn (operands[0], operands[2],
-					       operands[1], ignore_vec,
-					       operands[3]));
-  DONE;
-})
-
 ;;
 ;; [vstrwq_scatter_base_wb_p_f]
 ;;
-(define_insn "mve_vstrwq_scatter_base_wb_p_fv4sf_insn"
+(define_insn "mve_vstrwq_scatter_base_wb_p_fv4sf"
  [(set (mem:BLK (scratch))
        (unspec:BLK
 		[(match_operand:V4SI 1 "s_register_operand" "0")
@@ -10537,38 +9840,10 @@
 }
   [(set_attr "length" "8")])
 
-(define_expand "mve_vstrdq_scatter_base_wb_<supf>v2di"
-  [(match_operand:V2DI 0 "s_register_operand" "=w")
-   (match_operand:SI 1 "mve_vldrd_immediate" "Ri")
-   (match_operand:V2DI 2 "s_register_operand" "w")
-   (unspec:V2DI [(const_int 0)] VSTRDSBWBQ)]
-  "TARGET_HAVE_MVE"
-{
-  rtx ignore_wb = gen_reg_rtx (V2DImode);
-  emit_insn (
-  gen_mve_vstrdq_scatter_base_wb_<supf>v2di_insn (ignore_wb, operands[0],
-						  operands[1], operands[2]));
-  DONE;
-})
-
-(define_expand "mve_vstrdq_scatter_base_wb_add_<supf>v2di"
-  [(match_operand:V2DI 0 "s_register_operand" "=w")
-   (match_operand:SI 1 "mve_vldrd_immediate" "Ri")
-   (match_operand:V2DI 2 "s_register_operand" "0")
-   (unspec:V2DI [(const_int 0)] VSTRDSBWBQ)]
-  "TARGET_HAVE_MVE"
-{
-  rtx ignore_vec = gen_reg_rtx (V2DImode);
-  emit_insn (
-  gen_mve_vstrdq_scatter_base_wb_<supf>v2di_insn (operands[0], operands[2],
-						  operands[1], ignore_vec));
-  DONE;
-})
-
 ;;
 ;; [vstrdq_scatter_base_wb_s vstrdq_scatter_base_wb_u]
 ;;
-(define_insn "mve_vstrdq_scatter_base_wb_<supf>v2di_insn"
+(define_insn "mve_vstrdq_scatter_base_wb_<supf>v2di"
   [(set (mem:BLK (scratch))
 	(unspec:BLK
 		[(match_operand:V2DI 1 "s_register_operand" "0")
@@ -10590,42 +9865,10 @@
 }
   [(set_attr "length" "4")])
 
-(define_expand "mve_vstrdq_scatter_base_wb_p_<supf>v2di"
-  [(match_operand:V2DI 0 "s_register_operand" "=w")
-   (match_operand:SI 1 "mve_vldrd_immediate" "Ri")
-   (match_operand:V2DI 2 "s_register_operand" "w")
-   (match_operand:HI 3 "vpr_register_operand")
-   (unspec:V2DI [(const_int 0)] VSTRDSBWBQ)]
-  "TARGET_HAVE_MVE"
-{
-  rtx ignore_wb = gen_reg_rtx (V2DImode);
-  emit_insn (
-  gen_mve_vstrdq_scatter_base_wb_p_<supf>v2di_insn (ignore_wb, operands[0],
-						    operands[1], operands[2],
-						    operands[3]));
-  DONE;
-})
-
-(define_expand "mve_vstrdq_scatter_base_wb_p_add_<supf>v2di"
-  [(match_operand:V2DI 0 "s_register_operand" "=w")
-   (match_operand:SI 1 "mve_vldrd_immediate" "Ri")
-   (match_operand:V2DI 2 "s_register_operand" "0")
-   (match_operand:HI 3 "vpr_register_operand")
-   (unspec:V2DI [(const_int 0)] VSTRDSBWBQ)]
-  "TARGET_HAVE_MVE"
-{
-  rtx ignore_vec = gen_reg_rtx (V2DImode);
-  emit_insn (
-  gen_mve_vstrdq_scatter_base_wb_p_<supf>v2di_insn (operands[0], operands[2],
-						    operands[1], ignore_vec,
-						    operands[3]));
-  DONE;
-})
-
 ;;
 ;; [vstrdq_scatter_base_wb_p_s vstrdq_scatter_base_wb_p_u]
 ;;
-(define_insn "mve_vstrdq_scatter_base_wb_p_<supf>v2di_insn"
+(define_insn "mve_vstrdq_scatter_base_wb_p_<supf>v2di"
   [(set (mem:BLK (scratch))
 	(unspec:BLK
 		[(match_operand:V2DI 1 "s_register_operand" "0")
@@ -10643,7 +9886,7 @@
    ops[0] = operands[1];
    ops[1] = operands[2];
    ops[2] = operands[3];
-   output_asm_insn ("vpst\;\tvstrdt.u64\t%q2, [%q0, %1]!",ops);
+   output_asm_insn ("vpst;vstrdt.u64\t%q2, [%q0, %1]!",ops);
    return "";
 }
   [(set_attr "length" "8")])
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index 3e7b51d..2d76769 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -501,7 +501,7 @@
   [(set (match_operand:VDQ 0 "s_register_operand" "=w")
         (plus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
 		  (match_operand:VDQ 2 "s_register_operand" "w")))]
-  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+  "ARM_HAVE_NEON_<MODE>_ARITH"
   "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
   [(set (attr "type")
       (if_then_else (match_test "<Is_float_mode>")
@@ -509,54 +509,11 @@
                     (const_string "neon_add<q>")))]
 )
 
-;; As with SFmode, full support for HFmode vector arithmetic is only available
-;; when flag-unsafe-math-optimizations is enabled.
-
-;; Add pattern with modes V8HF and V4HF is split into separate patterns to add
-;; support for standard pattern addv8hf3 in MVE.  Following pattern is called
-;; from "addv8hf3" standard pattern inside vec-common.md file.
-
-(define_insn "addv8hf3_neon"
-  [(set
-    (match_operand:V8HF 0 "s_register_operand" "=w")
-    (plus:V8HF
-     (match_operand:V8HF 1 "s_register_operand" "w")
-     (match_operand:V8HF 2 "s_register_operand" "w")))]
- "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
- "vadd.f16\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
- [(set_attr "type" "neon_fp_addsub_s_q")]
-)
-
-(define_insn "addv4hf3"
-  [(set
-    (match_operand:V4HF 0 "s_register_operand" "=w")
-    (plus:V4HF
-     (match_operand:V4HF 1 "s_register_operand" "w")
-     (match_operand:V4HF 2 "s_register_operand" "w")))]
- "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
- "vadd.f16\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
- [(set_attr "type" "neon_fp_addsub_s_q")]
-)
-
-(define_insn "add<mode>3_fp16"
-  [(set
-    (match_operand:VH 0 "s_register_operand" "=w")
-    (plus:VH
-     (match_operand:VH 1 "s_register_operand" "w")
-     (match_operand:VH 2 "s_register_operand" "w")))]
- "TARGET_NEON_FP16INST"
- "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
- [(set (attr "type")
-   (if_then_else (match_test "<Is_float_mode>")
-    (const_string "neon_fp_addsub_s<q>")
-    (const_string "neon_add<q>")))]
-)
-
 (define_insn "*sub<mode>3_neon"
   [(set (match_operand:VDQ 0 "s_register_operand" "=w")
         (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
                    (match_operand:VDQ 2 "s_register_operand" "w")))]
-  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+  "ARM_HAVE_NEON_<MODE>_ARITH"
   "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
   [(set (attr "type")
       (if_then_else (match_test "<Is_float_mode>")
@@ -564,33 +521,11 @@
                     (const_string "neon_sub<q>")))]
 )
 
-(define_insn "sub<mode>3"
- [(set
-   (match_operand:VH 0 "s_register_operand" "=w")
-   (minus:VH
-    (match_operand:VH 1 "s_register_operand" "w")
-    (match_operand:VH 2 "s_register_operand" "w")))]
- "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
- "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
- [(set_attr "type" "neon_sub<q>")]
-)
-
-(define_insn "sub<mode>3_fp16"
- [(set
-   (match_operand:VH 0 "s_register_operand" "=w")
-   (minus:VH
-    (match_operand:VH 1 "s_register_operand" "w")
-    (match_operand:VH 2 "s_register_operand" "w")))]
- "TARGET_NEON_FP16INST"
- "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
- [(set_attr "type" "neon_sub<q>")]
-)
-
 (define_insn "*mul<mode>3_neon"
   [(set (match_operand:VDQW 0 "s_register_operand" "=w")
         (mult:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
                    (match_operand:VDQW 2 "s_register_operand" "w")))]
-  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+  "ARM_HAVE_NEON_<MODE>_ARITH"
   "vmul.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
   [(set (attr "type")
       (if_then_else (match_test "<Is_float_mode>")
@@ -635,7 +570,7 @@
         (plus:VDQW (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
                             (match_operand:VDQW 3 "s_register_operand" "w"))
 		  (match_operand:VDQW 1 "s_register_operand" "0")))]
-  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+  "ARM_HAVE_NEON_<MODE>_ARITH"
   "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
   [(set (attr "type")
       (if_then_else (match_test "<Is_float_mode>")
@@ -648,7 +583,7 @@
 	(plus:VH (mult:VH (match_operand:VH 2 "s_register_operand" "w")
 			  (match_operand:VH 3 "s_register_operand" "w"))
 		  (match_operand:VH 1 "s_register_operand" "0")))]
-  "TARGET_NEON_FP16INST && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+  "ARM_HAVE_NEON_<MODE>_ARITH"
   "vmla.f16\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
   [(set_attr "type" "neon_fp_mla_s<q>")]
 )
@@ -658,7 +593,7 @@
         (minus:VDQW (match_operand:VDQW 1 "s_register_operand" "0")
                     (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
                                (match_operand:VDQW 3 "s_register_operand" "w"))))]
-  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+  "ARM_HAVE_NEON_<MODE>_ARITH"
   "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
   [(set (attr "type")
       (if_then_else (match_test "<Is_float_mode>")
@@ -676,7 +611,7 @@
         (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
 		 (match_operand:VCVTF 2 "register_operand" "w")
 		 (match_operand:VCVTF 3 "register_operand" "0")))]
-  "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
+  "ARM_HAVE_NEON_<MODE>_ARITH && TARGET_FMA"
   "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
   [(set_attr "type" "neon_fp_mla_s<q>")]
 )
@@ -697,18 +632,7 @@
     (match_operand:VH 1 "register_operand" "w")
     (match_operand:VH 2 "register_operand" "w")
     (match_operand:VH 3 "register_operand" "0")))]
- "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
- "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
- [(set_attr "type" "neon_fp_mla_s<q>")]
-)
-
-(define_insn "fma<VH:mode>4_intrinsic"
- [(set (match_operand:VH 0 "register_operand" "=w")
-   (fma:VH
-    (match_operand:VH 1 "register_operand" "w")
-    (match_operand:VH 2 "register_operand" "w")
-    (match_operand:VH 3 "register_operand" "0")))]
- "TARGET_NEON_FP16INST"
+ "ARM_HAVE_NEON_<MODE>_ARITH"
  "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
  [(set_attr "type" "neon_fp_mla_s<q>")]
 )
@@ -718,7 +642,7 @@
         (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
 		   (match_operand:VCVTF 2 "register_operand" "w")
 		   (match_operand:VCVTF 3 "register_operand" "0")))]
-  "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
+  "ARM_HAVE_NEON_<MODE>_ARITH && TARGET_FMA"
   "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
   [(set_attr "type" "neon_fp_mla_s<q>")]
 )
@@ -1238,7 +1162,7 @@
                            (parallel [(const_int 0) (const_int 1)]))
           (vec_select:V2SF (match_dup 1)
                            (parallel [(const_int 2) (const_int 3)]))))]
-  "TARGET_NEON && flag_unsafe_math_optimizations"
+  "ARM_HAVE_NEON_V4SF_ARITH"
   "<VQH_mnem>.f32\t%P0, %e1, %f1"
   [(set_attr "vqh_mnem" "<VQH_mnem>")
    (set_attr "type" "neon_fp_reduc_<VQH_type>_s_q")]
@@ -1305,7 +1229,7 @@
 (define_expand "reduc_plus_scal_<mode>"
   [(match_operand:<V_elem> 0 "nonimmediate_operand")
    (match_operand:VD 1 "s_register_operand")]
-  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+  "ARM_HAVE_NEON_<MODE>_ARITH"
 {
   rtx vec = gen_reg_rtx (<MODE>mode);
   neon_pairwise_reduce (vec, operands[1], <MODE>mode,
@@ -1318,8 +1242,7 @@
 (define_expand "reduc_plus_scal_<mode>"
   [(match_operand:<V_elem> 0 "nonimmediate_operand")
    (match_operand:VQ 1 "s_register_operand")]
-  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
-   && !BYTES_BIG_ENDIAN"
+  "ARM_HAVE_NEON_<MODE>_ARITH && !BYTES_BIG_ENDIAN"
 {
   rtx step1 = gen_reg_rtx (<V_HALF>mode);
 
@@ -1354,7 +1277,7 @@
 (define_expand "reduc_smin_scal_<mode>"
   [(match_operand:<V_elem> 0 "nonimmediate_operand")
    (match_operand:VD 1 "s_register_operand")]
-  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+  "ARM_HAVE_NEON_<MODE>_ARITH"
 {
   rtx vec = gen_reg_rtx (<MODE>mode);
 
@@ -1368,8 +1291,7 @@
 (define_expand "reduc_smin_scal_<mode>"
   [(match_operand:<V_elem> 0 "nonimmediate_operand")
    (match_operand:VQ 1 "s_register_operand")]
-  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
-   && !BYTES_BIG_ENDIAN"
+  "ARM_HAVE_NEON_<MODE>_ARITH && !BYTES_BIG_ENDIAN"
 {
   rtx step1 = gen_reg_rtx (<V_HALF>mode);
 
@@ -1382,7 +1304,7 @@
 (define_expand "reduc_smax_scal_<mode>"
   [(match_operand:<V_elem> 0 "nonimmediate_operand")
    (match_operand:VD 1 "s_register_operand")]
-  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+  "ARM_HAVE_NEON_<MODE>_ARITH"
 {
   rtx vec = gen_reg_rtx (<MODE>mode);
   neon_pairwise_reduce (vec, operands[1], <MODE>mode,
@@ -1395,8 +1317,7 @@
 (define_expand "reduc_smax_scal_<mode>"
   [(match_operand:<V_elem> 0 "nonimmediate_operand")
    (match_operand:VQ 1 "s_register_operand")]
-  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
-   && !BYTES_BIG_ENDIAN"
+  "ARM_HAVE_NEON_<MODE>_ARITH && !BYTES_BIG_ENDIAN"
 {
   rtx step1 = gen_reg_rtx (<V_HALF>mode);
 
@@ -1573,6 +1494,30 @@
   [(set_attr "type" "neon_qsub<q>")]
 )
 
+(define_expand "vec_cmp<mode><v_cmp_result>"
+  [(set (match_operand:<V_cmp_result> 0 "s_register_operand")
+	(match_operator:<V_cmp_result> 1 "comparison_operator"
+	  [(match_operand:VDQW 2 "s_register_operand")
+	   (match_operand:VDQW 3 "reg_or_zero_operand")]))]
+  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+{
+  arm_expand_vector_compare (operands[0], GET_CODE (operands[1]),
+			     operands[2], operands[3], false);
+  DONE;
+})
+
+(define_expand "vec_cmpu<mode><mode>"
+  [(set (match_operand:VDQIW 0 "s_register_operand")
+	(match_operator:VDQIW 1 "comparison_operator"
+	  [(match_operand:VDQIW 2 "s_register_operand")
+	   (match_operand:VDQIW 3 "reg_or_zero_operand")]))]
+  "TARGET_NEON"
+{
+  arm_expand_vector_compare (operands[0], GET_CODE (operands[1]),
+			     operands[2], operands[3], false);
+  DONE;
+})
+
 ;; Conditional instructions.  These are comparisons with conditional moves for
 ;; vectors.  They perform the assignment:
 ;;   
@@ -1586,230 +1531,53 @@
 	(if_then_else:VDQW
 	  (match_operator 3 "comparison_operator"
 	    [(match_operand:VDQW 4 "s_register_operand")
-	     (match_operand:VDQW 5 "nonmemory_operand")])
+	     (match_operand:VDQW 5 "reg_or_zero_operand")])
 	  (match_operand:VDQW 1 "s_register_operand")
 	  (match_operand:VDQW 2 "s_register_operand")))]
   "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
 {
-  int inverse = 0;
-  int use_zero_form = 0;
-  int swap_bsl_operands = 0;
-  rtx mask = gen_reg_rtx (<V_cmp_result>mode);
-  rtx tmp = gen_reg_rtx (<V_cmp_result>mode);
-
-  rtx (*base_comparison) (rtx, rtx, rtx);
-  rtx (*complimentary_comparison) (rtx, rtx, rtx);
-
-  switch (GET_CODE (operands[3]))
-    {
-    case GE:
-    case GT:
-    case LE:
-    case LT:
-    case EQ:
-      if (operands[5] == CONST0_RTX (<MODE>mode))
-	{
-	  use_zero_form = 1;
-	  break;
-	}
-      /* Fall through.  */
-    default:
-      if (!REG_P (operands[5]))
-	operands[5] = force_reg (<MODE>mode, operands[5]);
-    }
-
-  switch (GET_CODE (operands[3]))
-    {
-    case LT:
-    case UNLT:
-      inverse = 1;
-      /* Fall through.  */
-    case GE:
-    case UNGE:
-    case ORDERED:
-    case UNORDERED:
-      base_comparison = gen_neon_vcge<mode>;
-      complimentary_comparison = gen_neon_vcgt<mode>;
-      break;
-    case LE:
-    case UNLE:
-      inverse = 1;
-      /* Fall through.  */
-    case GT:
-    case UNGT:
-      base_comparison = gen_neon_vcgt<mode>;
-      complimentary_comparison = gen_neon_vcge<mode>;
-      break;
-    case EQ:
-    case NE:
-    case UNEQ:
-      base_comparison = gen_neon_vceq<mode>;
-      complimentary_comparison = gen_neon_vceq<mode>;
-      break;
-    default:
-      gcc_unreachable ();
-    }
-
-  switch (GET_CODE (operands[3]))
-    {
-    case LT:
-    case LE:
-    case GT:
-    case GE:
-    case EQ:
-      /* The easy case.  Here we emit one of vcge, vcgt or vceq.
-	 As a LT b <=> b GE a && a LE b <=> b GT a.  Our transformations are:
-	 a GE b -> a GE b
-	 a GT b -> a GT b
-	 a LE b -> b GE a
-	 a LT b -> b GT a
-	 a EQ b -> a EQ b
-	 Note that there also exist direct comparison against 0 forms,
-	 so catch those as a special case.  */
-      if (use_zero_form)
-	{
-	  inverse = 0;
-	  switch (GET_CODE (operands[3]))
-	    {
-	    case LT:
-	      base_comparison = gen_neon_vclt<mode>;
-	      break;
-	    case LE:
-	      base_comparison = gen_neon_vcle<mode>;
-	      break;
-	    default:
-	      /* Do nothing, other zero form cases already have the correct
-		 base_comparison.  */
-	      break;
-	    }
-	}
-
-      if (!inverse)
-	emit_insn (base_comparison (mask, operands[4], operands[5]));
-      else
-	emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
-      break;
-    case UNLT:
-    case UNLE:
-    case UNGT:
-    case UNGE:
-    case NE:
-      /* Vector compare returns false for lanes which are unordered, so if we use
-	 the inverse of the comparison we actually want to emit, then
-	 swap the operands to BSL, we will end up with the correct result.
-	 Note that a NE NaN and NaN NE b are true for all a, b.
-
-	 Our transformations are:
-	 a GE b -> !(b GT a)
-	 a GT b -> !(b GE a)
-	 a LE b -> !(a GT b)
-	 a LT b -> !(a GE b)
-	 a NE b -> !(a EQ b)  */
-
-      if (inverse)
-	emit_insn (base_comparison (mask, operands[4], operands[5]));
-      else
-	emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
-
-      swap_bsl_operands = 1;
-      break;
-    case UNEQ:
-      /* We check (a > b ||  b > a).  combining these comparisons give us
-	 true iff !(a != b && a ORDERED b), swapping the operands to BSL
-	 will then give us (a == b ||  a UNORDERED b) as intended.  */
-
-      emit_insn (gen_neon_vcgt<mode> (mask, operands[4], operands[5]));
-      emit_insn (gen_neon_vcgt<mode> (tmp, operands[5], operands[4]));
-      emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
-      swap_bsl_operands = 1;
-      break;
-    case UNORDERED:
-       /* Operands are ORDERED iff (a > b || b >= a).
-	 Swapping the operands to BSL will give the UNORDERED case.  */
-     swap_bsl_operands = 1;
-     /* Fall through.  */
-    case ORDERED:
-      emit_insn (gen_neon_vcgt<mode> (tmp, operands[4], operands[5]));
-      emit_insn (gen_neon_vcge<mode> (mask, operands[5], operands[4]));
-      emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
-      break;
-    default:
-      gcc_unreachable ();
-    }
+  arm_expand_vcond (operands, <V_cmp_result>mode);
+  DONE;
+})
 
-  if (swap_bsl_operands)
-    emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
-				    operands[1]));
-  else
-    emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
-				    operands[2]));
+(define_expand "vcond<V_cvtto><mode>"
+  [(set (match_operand:<V_CVTTO> 0 "s_register_operand")
+	(if_then_else:<V_CVTTO>
+	  (match_operator 3 "comparison_operator"
+	    [(match_operand:V32 4 "s_register_operand")
+	     (match_operand:V32 5 "reg_or_zero_operand")])
+	  (match_operand:<V_CVTTO> 1 "s_register_operand")
+	  (match_operand:<V_CVTTO> 2 "s_register_operand")))]
+  "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+{
+  arm_expand_vcond (operands, <V_cmp_result>mode);
   DONE;
 })
 
-(define_expand "vcondu<mode><mode>"
-  [(set (match_operand:VDQIW 0 "s_register_operand")
-	(if_then_else:VDQIW
+(define_expand "vcondu<mode><v_cmp_result>"
+  [(set (match_operand:VDQW 0 "s_register_operand")
+	(if_then_else:VDQW
 	  (match_operator 3 "arm_comparison_operator"
-	    [(match_operand:VDQIW 4 "s_register_operand")
-	     (match_operand:VDQIW 5 "s_register_operand")])
-	  (match_operand:VDQIW 1 "s_register_operand")
-	  (match_operand:VDQIW 2 "s_register_operand")))]
+	    [(match_operand:<V_cmp_result> 4 "s_register_operand")
+	     (match_operand:<V_cmp_result> 5 "reg_or_zero_operand")])
+	  (match_operand:VDQW 1 "s_register_operand")
+	  (match_operand:VDQW 2 "s_register_operand")))]
   "TARGET_NEON"
 {
-  rtx mask;
-  int inverse = 0, immediate_zero = 0;
-  
-  mask = gen_reg_rtx (<V_cmp_result>mode);
-  
-  if (operands[5] == CONST0_RTX (<MODE>mode))
-    immediate_zero = 1;
-  else if (!REG_P (operands[5]))
-    operands[5] = force_reg (<MODE>mode, operands[5]);
-  
-  switch (GET_CODE (operands[3]))
-    {
-    case GEU:
-      emit_insn (gen_neon_vcgeu<mode> (mask, operands[4], operands[5]));
-      break;
-    
-    case GTU:
-      emit_insn (gen_neon_vcgtu<mode> (mask, operands[4], operands[5]));
-      break;
-    
-    case EQ:
-      emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5]));
-      break;
-    
-    case LEU:
-      if (immediate_zero)
-	emit_insn (gen_neon_vcle<mode> (mask, operands[4], operands[5]));
-      else
-	emit_insn (gen_neon_vcgeu<mode> (mask, operands[5], operands[4]));
-      break;
-    
-    case LTU:
-      if (immediate_zero)
-        emit_insn (gen_neon_vclt<mode> (mask, operands[4], operands[5]));
-      else
-	emit_insn (gen_neon_vcgtu<mode> (mask, operands[5], operands[4]));
-      break;
-    
-    case NE:
-      emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5]));
-      inverse = 1;
-      break;
-    
-    default:
-      gcc_unreachable ();
-    }
-  
-  if (inverse)
-    emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
-				    operands[1]));
-  else
-    emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
-				    operands[2]));
+  arm_expand_vcond (operands, <V_cmp_result>mode);
+  DONE;
+})
 
+(define_expand "vcond_mask_<mode><v_cmp_result>"
+  [(set (match_operand:VDQW 0 "s_register_operand")
+	(if_then_else:VDQW
+	  (match_operand:<V_cmp_result> 3 "s_register_operand")
+	  (match_operand:VDQW 1 "s_register_operand")
+	  (match_operand:VDQW 2 "s_register_operand")))]
+  "TARGET_NEON"
+{
+  emit_insn (gen_neon_vbsl<mode> (operands[0], operands[3], operands[1],
+				  operands[2]));
   DONE;
 })
 
@@ -1823,7 +1591,7 @@
    (match_operand:VCVTF 2 "s_register_operand")]
   "TARGET_NEON"
 {
-  if (!<Is_float_mode> || flag_unsafe_math_optimizations)
+  if (ARM_HAVE_NEON_<MODE>_ARITH)
     emit_insn (gen_add<mode>3 (operands[0], operands[1], operands[2]));
   else
     emit_insn (gen_neon_vadd<mode>_unspec (operands[0], operands[1],
@@ -1837,7 +1605,7 @@
    (match_operand:VH 2 "s_register_operand")]
   "TARGET_NEON_FP16INST"
 {
-  emit_insn (gen_add<mode>3_fp16 (operands[0], operands[1], operands[2]));
+  emit_insn (gen_add<mode>3 (operands[0], operands[1], operands[2]));
   DONE;
 })
 
@@ -1847,7 +1615,7 @@
    (match_operand:VH 2 "s_register_operand")]
   "TARGET_NEON_FP16INST"
 {
-  emit_insn (gen_sub<mode>3_fp16 (operands[0], operands[1], operands[2]));
+  emit_insn (gen_sub<mode>3 (operands[0], operands[1], operands[2]));
   DONE;
 })
 
@@ -1942,17 +1710,6 @@
                     (const_string "neon_mul_<V_elem_ch><q>")))]
 )
 
-(define_insn "mul<mode>3"
- [(set
-   (match_operand:VH 0 "s_register_operand" "=w")
-   (mult:VH
-    (match_operand:VH 1 "s_register_operand" "w")
-    (match_operand:VH 2 "s_register_operand" "w")))]
-  "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
-  "vmul.f16\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
- [(set_attr "type" "neon_mul_<VH_elem_ch><q>")]
-)
-
 (define_insn "neon_vmulf<mode>"
  [(set
    (match_operand:VH 0 "s_register_operand" "=w")
@@ -1971,7 +1728,7 @@
    (match_operand:VDQW 3 "s_register_operand")]
   "TARGET_NEON"
 {
-  if (!<Is_float_mode> || flag_unsafe_math_optimizations)
+  if (ARM_HAVE_NEON_<MODE>_ARITH)
     emit_insn (gen_mul<mode>3add<mode>_neon (operands[0], operands[1],
 				             operands[2], operands[3]));
   else
@@ -1999,8 +1756,8 @@
    (match_operand:VH 3 "s_register_operand")]
   "TARGET_NEON_FP16INST"
 {
-  emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3],
-				       operands[1]));
+  emit_insn (gen_fma<mode>4 (operands[0], operands[2], operands[3],
+			     operands[1]));
   DONE;
 })
 
@@ -2462,7 +2219,7 @@
    (match_operand:VDQW 3 "s_register_operand")]
   "TARGET_NEON"
 {
-  if (!<Is_float_mode> || flag_unsafe_math_optimizations)
+  if (ARM_HAVE_NEON_<MODE>_ARITH)
     emit_insn (gen_mul<mode>3neg<mode>add<mode>_neon (operands[0],
 		 operands[1], operands[2], operands[3]));
   else
@@ -2569,7 +2326,7 @@
    (match_operand:VCVTF 2 "s_register_operand")]
   "TARGET_NEON"
 {
-  if (!<Is_float_mode> || flag_unsafe_math_optimizations)
+  if (ARM_HAVE_NEON_<MODE>_ARITH)
     emit_insn (gen_sub<mode>3 (operands[0], operands[1], operands[2]));
   else
     emit_insn (gen_neon_vsub<mode>_unspec (operands[0], operands[1],
@@ -2644,7 +2401,7 @@
 
 ;; These may expand to an UNSPEC pattern when a floating point mode is used
 ;; without unsafe math optimizations.
-(define_expand "neon_vc<cmp_op><mode>"
+(define_expand "@neon_vc<cmp_op><mode>"
   [(match_operand:<V_cmp_result> 0 "s_register_operand")
      (neg:<V_cmp_result>
        (COMPARISONS:VDQW (match_operand:VDQW 1 "s_register_operand")
@@ -2684,7 +2441,7 @@
   }
 )
 
-(define_insn "neon_vc<cmp_op><mode>_insn"
+(define_insn "@neon_vc<cmp_op><mode>_insn"
   [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
         (neg:<V_cmp_result>
           (COMPARISONS:<V_cmp_result>
@@ -2728,7 +2485,7 @@
   [(set_attr "type" "neon_fp_compare_s<q>")]
 )
 
-(define_expand "neon_vc<cmp_op><mode>"
+(define_expand "@neon_vc<cmp_op><mode>"
  [(match_operand:<V_cmp_result> 0 "s_register_operand")
   (neg:<V_cmp_result>
    (COMPARISONS:VH
@@ -2794,7 +2551,7 @@
 }
  [(set_attr "type" "neon_fp_compare_s<q>")])
 
-(define_insn "neon_vc<cmp_op>u<mode>"
+(define_insn "@neon_vc<code><mode>"
   [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
         (neg:<V_cmp_result>
           (GTUGEU:<V_cmp_result>
@@ -4751,7 +4508,7 @@ if (BYTES_BIG_ENDIAN)
   [(set_attr "type" "neon_bsl<q>")]
 )
 
-(define_expand "neon_vbsl<mode>"
+(define_expand "@neon_vbsl<mode>"
   [(set (match_operand:VDQX 0 "s_register_operand")
         (unspec:VDQX [(match_operand:<V_cmp_result> 1 "s_register_operand")
                       (match_operand:VDQX 2 "s_register_operand")
@@ -6658,7 +6415,7 @@ if (BYTES_BIG_ENDIAN)
  [(set (match_operand:VF 0 "s_register_operand" "=w")
        (abs:VF (minus:VF (match_operand:VF 1 "s_register_operand" "w")
 			 (match_operand:VF 2 "s_register_operand" "w"))))]
- "TARGET_NEON && flag_unsafe_math_optimizations"
+ "ARM_HAVE_NEON_<MODE>_ARITH"
  "vabd.<V_s_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
  [(set_attr "type" "neon_fp_abd_s<q>")]
 )
@@ -6668,7 +6425,7 @@ if (BYTES_BIG_ENDIAN)
        (abs:VF (unspec:VF [(match_operand:VF 1 "s_register_operand" "w")
 			    (match_operand:VF 2 "s_register_operand" "w")]
 		UNSPEC_VSUB)))]
- "TARGET_NEON && flag_unsafe_math_optimizations"
+ "ARM_HAVE_NEON_<MODE>_ARITH"
  "vabd.<V_if_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
  [(set_attr "type" "neon_fp_abd_s<q>")]
 )
diff --git a/gcc/config/arm/parsecpu.awk b/gcc/config/arm/parsecpu.awk
index 7fc3754..9423e8a 100644
--- a/gcc/config/arm/parsecpu.awk
+++ b/gcc/config/arm/parsecpu.awk
@@ -190,6 +190,23 @@ function gen_isa () {
 	ORS = z
 	print "\n"
     }
+
+    print "struct fbit_implication {"
+    print "  /* Represents a feature implication, where:"
+    print "     ante IMPLIES cons"
+    print "     meaning that if ante is enabled then we should"
+    print "     also implicitly enable cons.  */"
+    print "  enum isa_feature ante;"
+    print "  enum isa_feature cons;"
+    print "};\n"
+    print "static const struct fbit_implication all_implied_fbits[] ="
+    print "{"
+    for (impl in implied_bits) {
+      split (impl, impl_parts, SUBSEP)
+      print "  { isa_bit_" impl_parts[2] ", isa_bit_" impl_parts[1] " },"
+    }
+    print "  { isa_nobit, isa_nobit }"
+    print "};\n"
 }
 
 function gen_data () {
@@ -600,6 +617,40 @@ BEGIN {
     parse_ok = 1
 }
 
+/^define implied / {
+  if (NF < 4) fatal("syntax: define implied <name> [<feature-or-fgroup>]+\n" \
+		    "Implied bits must be defined with at least one antecedent.")
+  toplevel()
+  fbit = $3
+  if (fbit in features) fatal("implied feature " fbit " aliases a real feature")
+  if (fbit in fgroup) fatal("implied feature " fbit " aliases a feature group")
+  fcount = NF
+  features[fbit] = 1
+  for (n = 4; n <= fcount; n++) {
+    ante = $n
+    if (fbit == ante) fatal("feature cannot imply itself")
+    else if (ante in features) {
+      for (impl in implied_bits) {
+	split(impl, impl_sep, SUBSEP)
+	if (ante == impl_sep[1])
+	  fatal(ante " implies implied bit " fbit		\
+		". Chained implications not currently supported")
+      }
+      implied_bits[fbit, ante] = 1
+    } else if (ante in fgroup) {
+      for (bitcomb in fgrp_bits) {
+	split(bitcomb, bitsep, SUBSEP)
+	if (bitsep[1] == ante) {
+	  implied_bits[fbit, bitsep[2]] = 1
+	}
+      }
+    } else {
+      fatal("implied bit antecedent " ante " unrecognized")
+    }
+  }
+  parse_ok = 1
+}
+
 /^begin fpu / {
     if (NF != 3) fatal("syntax: begin fpu <name>")
     toplevel()
diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md
index 0a2399d..a3844e9 100644
--- a/gcc/config/arm/unspecs.md
+++ b/gcc/config/arm/unspecs.md
@@ -519,3 +519,803 @@
   UNSPEC_BFMAB
   UNSPEC_BFMAT
 ])
+
+;; Enumerators for MVE unspecs.
+(define_c_enum "unspec" [
+  VST4Q
+  VRNDXQ_F
+  VRNDQ_F
+  VRNDPQ_F
+  VRNDNQ_F
+  VRNDMQ_F
+  VRNDAQ_F
+  VREV64Q_F
+  VNEGQ_F
+  VDUPQ_N_F
+  VABSQ_F
+  VREV32Q_F
+  VCVTTQ_F32_F16
+  VCVTBQ_F32_F16
+  VCVTQ_TO_F_S
+  VQNEGQ_S
+  VCVTQ_TO_F_U
+  VREV16Q_S
+  VREV16Q_U
+  VADDLVQ_S
+  VMVNQ_N_S
+  VMVNQ_N_U
+  VCVTAQ_S
+  VCVTAQ_U
+  VREV64Q_S
+  VREV64Q_U
+  VQABSQ_S
+  VNEGQ_S
+  VMVNQ_S
+  VMVNQ_U
+  VDUPQ_N_U
+  VDUPQ_N_S
+  VCLZQ_U
+  VCLZQ_S
+  VCLSQ_S
+  VADDVQ_S
+  VADDVQ_U
+  VABSQ_S
+  VREV32Q_U
+  VREV32Q_S
+  VMOVLTQ_U
+  VMOVLTQ_S
+  VMOVLBQ_S
+  VMOVLBQ_U
+  VCVTQ_FROM_F_S
+  VCVTQ_FROM_F_U
+  VCVTPQ_S
+  VCVTPQ_U
+  VCVTNQ_S
+  VCVTNQ_U
+  VCVTMQ_S
+  VCVTMQ_U
+  VADDLVQ_U
+  VCTP8Q
+  VCTP16Q
+  VCTP32Q
+  VCTP64Q
+  VPNOT
+  VCREATEQ_F
+  VCVTQ_N_TO_F_S
+  VCVTQ_N_TO_F_U
+  VBRSRQ_N_F
+  VSUBQ_N_F
+  VCREATEQ_U
+  VCREATEQ_S
+  VSHRQ_N_S
+  VSHRQ_N_U
+  VCVTQ_N_FROM_F_S
+  VCVTQ_N_FROM_F_U
+  VADDLVQ_P_S
+  VADDLVQ_P_U
+  VCMPNEQ_U
+  VCMPNEQ_S
+  VSHLQ_S
+  VSHLQ_U
+  VABDQ_S
+  VADDQ_N_S
+  VADDVAQ_S
+  VADDVQ_P_S
+  VANDQ_S
+  VBICQ_S
+  VBRSRQ_N_S
+  VCADDQ_ROT270_S
+  VCADDQ_ROT90_S
+  VCMPEQQ_S
+  VCMPEQQ_N_S
+  VCMPNEQ_N_S
+  VEORQ_S
+  VHADDQ_S
+  VHADDQ_N_S
+  VHSUBQ_S
+  VHSUBQ_N_S
+  VMAXQ_S
+  VMAXVQ_S
+  VMINQ_S
+  VMINVQ_S
+  VMLADAVQ_S
+  VMULHQ_S
+  VMULLBQ_INT_S
+  VMULLTQ_INT_S
+  VMULQ_S
+  VMULQ_N_S
+  VORNQ_S
+  VORRQ_S
+  VQADDQ_S
+  VQADDQ_N_S
+  VQRSHLQ_S
+  VQRSHLQ_N_S
+  VQSHLQ_S
+  VQSHLQ_N_S
+  VQSHLQ_R_S
+  VQSUBQ_S
+  VQSUBQ_N_S
+  VRHADDQ_S
+  VRMULHQ_S
+  VRSHLQ_S
+  VRSHLQ_N_S
+  VRSHRQ_N_S
+  VSHLQ_N_S
+  VSHLQ_R_S
+  VSUBQ_S
+  VSUBQ_N_S
+  VABDQ_U
+  VADDQ_N_U
+  VADDVAQ_U
+  VADDVQ_P_U
+  VANDQ_U
+  VBICQ_U
+  VBRSRQ_N_U
+  VCADDQ_ROT270_U
+  VCADDQ_ROT90_U
+  VCMPEQQ_U
+  VCMPEQQ_N_U
+  VCMPNEQ_N_U
+  VEORQ_U
+  VHADDQ_U
+  VHADDQ_N_U
+  VHSUBQ_U
+  VHSUBQ_N_U
+  VMAXQ_U
+  VMAXVQ_U
+  VMINQ_U
+  VMINVQ_U
+  VMLADAVQ_U
+  VMULHQ_U
+  VMULLBQ_INT_U
+  VMULLTQ_INT_U
+  VMULQ_U
+  VMULQ_N_U
+  VORNQ_U
+  VORRQ_U
+  VQADDQ_U
+  VQADDQ_N_U
+  VQRSHLQ_U
+  VQRSHLQ_N_U
+  VQSHLQ_U
+  VQSHLQ_N_U
+  VQSHLQ_R_U
+  VQSUBQ_U
+  VQSUBQ_N_U
+  VRHADDQ_U
+  VRMULHQ_U
+  VRSHLQ_U
+  VRSHLQ_N_U
+  VRSHRQ_N_U
+  VSHLQ_N_U
+  VSHLQ_R_U
+  VSUBQ_U
+  VSUBQ_N_U
+  VCMPGEQ_N_S
+  VCMPGEQ_S
+  VCMPGTQ_N_S
+  VCMPGTQ_S
+  VCMPLEQ_N_S
+  VCMPLEQ_S
+  VCMPLTQ_N_S
+  VCMPLTQ_S
+  VHCADDQ_ROT270_S
+  VHCADDQ_ROT90_S
+  VMAXAQ_S
+  VMAXAVQ_S
+  VMINAQ_S
+  VMINAVQ_S
+  VMLADAVXQ_S
+  VMLSDAVQ_S
+  VMLSDAVXQ_S
+  VQDMULHQ_N_S
+  VQDMULHQ_S
+  VQRDMULHQ_N_S
+  VQRDMULHQ_S
+  VQSHLUQ_N_S
+  VCMPCSQ_N_U
+  VCMPCSQ_U
+  VCMPHIQ_N_U
+  VCMPHIQ_U
+  VABDQ_M_S
+  VABDQ_M_U
+  VABDQ_F
+  VADDQ_N_F
+  VANDQ_F
+  VBICQ_F
+  VCADDQ_ROT270_F
+  VCADDQ_ROT90_F
+  VCMPEQQ_F
+  VCMPEQQ_N_F
+  VCMPGEQ_F
+  VCMPGEQ_N_F
+  VCMPGTQ_F
+  VCMPGTQ_N_F
+  VCMPLEQ_F
+  VCMPLEQ_N_F
+  VCMPLTQ_F
+  VCMPLTQ_N_F
+  VCMPNEQ_F
+  VCMPNEQ_N_F
+  VCMULQ_F
+  VCMULQ_ROT180_F
+  VCMULQ_ROT270_F
+  VCMULQ_ROT90_F
+  VEORQ_F
+  VMAXNMAQ_F
+  VMAXNMAVQ_F
+  VMAXNMQ_F
+  VMAXNMVQ_F
+  VMINNMAQ_F
+  VMINNMAVQ_F
+  VMINNMQ_F
+  VMINNMVQ_F
+  VMULQ_F
+  VMULQ_N_F
+  VORNQ_F
+  VORRQ_F
+  VSUBQ_F
+  VADDLVAQ_U
+  VADDLVAQ_S
+  VBICQ_N_U
+  VBICQ_N_S
+  VCTP8Q_M
+  VCTP16Q_M
+  VCTP32Q_M
+  VCTP64Q_M
+  VCVTBQ_F16_F32
+  VCVTTQ_F16_F32
+  VMLALDAVQ_U
+  VMLALDAVXQ_U
+  VMLALDAVXQ_S
+  VMLALDAVQ_S
+  VMLSLDAVQ_S
+  VMLSLDAVXQ_S
+  VMOVNBQ_U
+  VMOVNBQ_S
+  VMOVNTQ_U
+  VMOVNTQ_S
+  VORRQ_N_S
+  VORRQ_N_U
+  VQDMULLBQ_N_S
+  VQDMULLBQ_S
+  VQDMULLTQ_N_S
+  VQDMULLTQ_S
+  VQMOVNBQ_U
+  VQMOVNBQ_S
+  VQMOVUNBQ_S
+  VQMOVUNTQ_S
+  VRMLALDAVHXQ_S
+  VRMLSLDAVHQ_S
+  VRMLSLDAVHXQ_S
+  VSHLLBQ_S
+  VSHLLBQ_U
+  VSHLLTQ_U
+  VSHLLTQ_S
+  VQMOVNTQ_U
+  VQMOVNTQ_S
+  VSHLLBQ_N_S
+  VSHLLBQ_N_U
+  VSHLLTQ_N_U
+  VSHLLTQ_N_S
+  VRMLALDAVHQ_U
+  VRMLALDAVHQ_S
+  VMULLTQ_POLY_P
+  VMULLBQ_POLY_P
+  VBICQ_M_N_S
+  VBICQ_M_N_U
+  VCMPEQQ_M_F
+  VCVTAQ_M_S
+  VCVTAQ_M_U
+  VCVTQ_M_TO_F_S
+  VCVTQ_M_TO_F_U
+  VQRSHRNBQ_N_U
+  VQRSHRNBQ_N_S
+  VQRSHRUNBQ_N_S
+  VRMLALDAVHAQ_S
+  VABAVQ_S
+  VABAVQ_U
+  VSHLCQ_S
+  VSHLCQ_U
+  VRMLALDAVHAQ_U
+  VABSQ_M_S
+  VADDVAQ_P_S
+  VADDVAQ_P_U
+  VCLSQ_M_S
+  VCLZQ_M_S
+  VCLZQ_M_U
+  VCMPCSQ_M_N_U
+  VCMPCSQ_M_U
+  VCMPEQQ_M_N_S
+  VCMPEQQ_M_N_U
+  VCMPEQQ_M_S
+  VCMPEQQ_M_U
+  VCMPGEQ_M_N_S
+  VCMPGEQ_M_S
+  VCMPGTQ_M_N_S
+  VCMPGTQ_M_S
+  VCMPHIQ_M_N_U
+  VCMPHIQ_M_U
+  VCMPLEQ_M_N_S
+  VCMPLEQ_M_S
+  VCMPLTQ_M_N_S
+  VCMPLTQ_M_S
+  VCMPNEQ_M_N_S
+  VCMPNEQ_M_N_U
+  VCMPNEQ_M_S
+  VCMPNEQ_M_U
+  VDUPQ_M_N_S
+  VDUPQ_M_N_U
+  VDWDUPQ_N_U
+  VDWDUPQ_WB_U
+  VIWDUPQ_N_U
+  VIWDUPQ_WB_U
+  VMAXAQ_M_S
+  VMAXAVQ_P_S
+  VMAXVQ_P_S
+  VMAXVQ_P_U
+  VMINAQ_M_S
+  VMINAVQ_P_S
+  VMINVQ_P_S
+  VMINVQ_P_U
+  VMLADAVAQ_S
+  VMLADAVAQ_U
+  VMLADAVQ_P_S
+  VMLADAVQ_P_U
+  VMLADAVXQ_P_S
+  VMLAQ_N_S
+  VMLAQ_N_U
+  VMLASQ_N_S
+  VMLASQ_N_U
+  VMLSDAVQ_P_S
+  VMLSDAVXQ_P_S
+  VMVNQ_M_S
+  VMVNQ_M_U
+  VNEGQ_M_S
+  VPSELQ_S
+  VPSELQ_U
+  VQABSQ_M_S
+  VQDMLAHQ_N_S
+  VQDMLASHQ_N_S
+  VQNEGQ_M_S
+  VQRDMLADHQ_S
+  VQRDMLADHXQ_S
+  VQRDMLAHQ_N_S
+  VQRDMLASHQ_N_S
+  VQRDMLSDHQ_S
+  VQRDMLSDHXQ_S
+  VQRSHLQ_M_N_S
+  VQRSHLQ_M_N_U
+  VQSHLQ_M_R_S
+  VQSHLQ_M_R_U
+  VREV64Q_M_S
+  VREV64Q_M_U
+  VRSHLQ_M_N_S
+  VRSHLQ_M_N_U
+  VSHLQ_M_R_S
+  VSHLQ_M_R_U
+  VSLIQ_N_S
+  VSLIQ_N_U
+  VSRIQ_N_S
+  VSRIQ_N_U
+  VQDMLSDHXQ_S
+  VQDMLSDHQ_S
+  VQDMLADHXQ_S
+  VQDMLADHQ_S
+  VMLSDAVAXQ_S
+  VMLSDAVAQ_S
+  VMLADAVAXQ_S
+  VCMPGEQ_M_F
+  VCMPGTQ_M_N_F
+  VMLSLDAVQ_P_S
+  VRMLALDAVHAXQ_S
+  VMLSLDAVXQ_P_S
+  VFMAQ_F
+  VMLSLDAVAQ_S
+  VQSHRUNBQ_N_S
+  VQRSHRUNTQ_N_S
+  VCMLAQ_F
+  VMINNMAQ_M_F
+  VFMASQ_N_F
+  VDUPQ_M_N_F
+  VCMPGTQ_M_F
+  VCMPLTQ_M_F
+  VRMLSLDAVHQ_P_S
+  VQSHRUNTQ_N_S
+  VABSQ_M_F
+  VMAXNMAVQ_P_F
+  VFMAQ_N_F
+  VRMLSLDAVHXQ_P_S
+  VREV32Q_M_F
+  VRMLSLDAVHAQ_S
+  VRMLSLDAVHAXQ_S
+  VCMPLTQ_M_N_F
+  VCMPNEQ_M_F
+  VRNDAQ_M_F
+  VRNDPQ_M_F
+  VADDLVAQ_P_S
+  VQMOVUNBQ_M_S
+  VCMPLEQ_M_F
+  VCMLAQ_ROT180_F
+  VMLSLDAVAXQ_S
+  VRNDXQ_M_F
+  VFMSQ_F
+  VMINNMVQ_P_F
+  VMAXNMVQ_P_F
+  VPSELQ_F
+  VCMLAQ_ROT90_F
+  VQMOVUNTQ_M_S
+  VREV64Q_M_F
+  VNEGQ_M_F
+  VRNDMQ_M_F
+  VCMPLEQ_M_N_F
+  VCMPGEQ_M_N_F
+  VRNDNQ_M_F
+  VMINNMAVQ_P_F
+  VCMPNEQ_M_N_F
+  VRMLALDAVHQ_P_S
+  VRMLALDAVHXQ_P_S
+  VCMPEQQ_M_N_F
+  VCMLAQ_ROT270_F
+  VMAXNMAQ_M_F
+  VRNDQ_M_F
+  VMLALDAVQ_P_U
+  VMLALDAVQ_P_S
+  VQMOVNBQ_M_S
+  VQMOVNBQ_M_U
+  VMOVLTQ_M_U
+  VMOVLTQ_M_S
+  VMOVNBQ_M_U
+  VMOVNBQ_M_S
+  VRSHRNTQ_N_U
+  VRSHRNTQ_N_S
+  VORRQ_M_N_S
+  VORRQ_M_N_U
+  VREV32Q_M_S
+  VREV32Q_M_U
+  VQRSHRNTQ_N_U
+  VQRSHRNTQ_N_S
+  VMOVNTQ_M_U
+  VMOVNTQ_M_S
+  VMOVLBQ_M_U
+  VMOVLBQ_M_S
+  VMLALDAVAQ_S
+  VMLALDAVAQ_U
+  VQSHRNBQ_N_U
+  VQSHRNBQ_N_S
+  VSHRNBQ_N_U
+  VSHRNBQ_N_S
+  VRSHRNBQ_N_S
+  VRSHRNBQ_N_U
+  VMLALDAVXQ_P_U
+  VMLALDAVXQ_P_S
+  VQMOVNTQ_M_U
+  VQMOVNTQ_M_S
+  VMVNQ_M_N_U
+  VMVNQ_M_N_S
+  VQSHRNTQ_N_U
+  VQSHRNTQ_N_S
+  VMLALDAVAXQ_S
+  VMLALDAVAXQ_U
+  VSHRNTQ_N_S
+  VSHRNTQ_N_U
+  VCVTBQ_M_F16_F32
+  VCVTBQ_M_F32_F16
+  VCVTTQ_M_F16_F32
+  VCVTTQ_M_F32_F16
+  VCVTMQ_M_S
+  VCVTMQ_M_U
+  VCVTNQ_M_S
+  VCVTPQ_M_S
+  VCVTPQ_M_U
+  VCVTQ_M_N_FROM_F_S
+  VCVTNQ_M_U
+  VREV16Q_M_S
+  VREV16Q_M_U
+  VREV32Q_M
+  VCVTQ_M_FROM_F_U
+  VCVTQ_M_FROM_F_S
+  VRMLALDAVHQ_P_U
+  VADDLVAQ_P_U
+  VCVTQ_M_N_FROM_F_U
+  VQSHLUQ_M_N_S
+  VABAVQ_P_S
+  VABAVQ_P_U
+  VSHLQ_M_S
+  VSHLQ_M_U
+  VSRIQ_M_N_S
+  VSRIQ_M_N_U
+  VSUBQ_M_U
+  VSUBQ_M_S
+  VCVTQ_M_N_TO_F_U
+  VCVTQ_M_N_TO_F_S
+  VQADDQ_M_U
+  VQADDQ_M_S
+  VRSHRQ_M_N_S
+  VSUBQ_M_N_S
+  VSUBQ_M_N_U
+  VBRSRQ_M_N_S
+  VSUBQ_M_N_F
+  VBICQ_M_F
+  VHADDQ_M_U
+  VBICQ_M_U
+  VBICQ_M_S
+  VMULQ_M_N_U
+  VHADDQ_M_S
+  VORNQ_M_F
+  VMLAQ_M_N_S
+  VQSUBQ_M_U
+  VQSUBQ_M_S
+  VMLAQ_M_N_U
+  VQSUBQ_M_N_U
+  VQSUBQ_M_N_S
+  VMULLTQ_INT_M_S
+  VMULLTQ_INT_M_U
+  VMULQ_M_N_S
+  VMULQ_M_N_F
+  VMLASQ_M_N_U
+  VMLASQ_M_N_S
+  VMAXQ_M_U
+  VQRDMLAHQ_M_N_U
+  VCADDQ_ROT270_M_F
+  VCADDQ_ROT270_M_U
+  VCADDQ_ROT270_M_S
+  VQRSHLQ_M_S
+  VMULQ_M_F
+  VRHADDQ_M_U
+  VSHRQ_M_N_U
+  VRHADDQ_M_S
+  VMULQ_M_S
+  VMULQ_M_U
+  VQDMLASHQ_M_N_S
+  VQRDMLASHQ_M_N_S
+  VRSHLQ_M_S
+  VRSHLQ_M_U
+  VRSHRQ_M_N_U
+  VADDQ_M_N_F
+  VADDQ_M_N_S
+  VADDQ_M_N_U
+  VQRDMLASHQ_M_N_U
+  VMAXQ_M_S
+  VQRDMLAHQ_M_N_S
+  VORRQ_M_S
+  VORRQ_M_U
+  VORRQ_M_F
+  VQRSHLQ_M_U
+  VRMULHQ_M_U
+  VRMULHQ_M_S
+  VMINQ_M_S
+  VMINQ_M_U
+  VANDQ_M_F
+  VANDQ_M_U
+  VANDQ_M_S
+  VHSUBQ_M_N_S
+  VHSUBQ_M_N_U
+  VMULHQ_M_S
+  VMULHQ_M_U
+  VMULLBQ_INT_M_U
+  VMULLBQ_INT_M_S
+  VCADDQ_ROT90_M_F
+  VSHRQ_M_N_S
+  VADDQ_M_U
+  VSLIQ_M_N_U
+  VQADDQ_M_N_S
+  VBRSRQ_M_N_F
+  VABDQ_M_F
+  VBRSRQ_M_N_U
+  VEORQ_M_F
+  VSHLQ_M_N_S
+  VQDMLAHQ_M_N_U
+  VQDMLAHQ_M_N_S
+  VSHLQ_M_N_U
+  VMLADAVAQ_P_U
+  VMLADAVAQ_P_S
+  VSLIQ_M_N_S
+  VQSHLQ_M_U
+  VQSHLQ_M_S
+  VCADDQ_ROT90_M_U
+  VCADDQ_ROT90_M_S
+  VORNQ_M_U
+  VORNQ_M_S
+  VQSHLQ_M_N_S
+  VQSHLQ_M_N_U
+  VADDQ_M_S
+  VHADDQ_M_N_S
+  VADDQ_M_F
+  VQADDQ_M_N_U
+  VEORQ_M_S
+  VEORQ_M_U
+  VHSUBQ_M_S
+  VHSUBQ_M_U
+  VHADDQ_M_N_U
+  VHCADDQ_ROT90_M_S
+  VQRDMLSDHQ_M_S
+  VQRDMLSDHXQ_M_S
+  VQRDMLADHXQ_M_S
+  VQDMULHQ_M_S
+  VMLADAVAXQ_P_S
+  VQDMLADHXQ_M_S
+  VQRDMULHQ_M_S
+  VMLSDAVAXQ_P_S
+  VQDMULHQ_M_N_S
+  VHCADDQ_ROT270_M_S
+  VQDMLSDHQ_M_S
+  VQDMLSDHXQ_M_S
+  VMLSDAVAQ_P_S
+  VQRDMLADHQ_M_S
+  VQDMLADHQ_M_S
+  VMLALDAVAQ_P_U
+  VMLALDAVAQ_P_S
+  VQRSHRNBQ_M_N_U
+  VQRSHRNBQ_M_N_S
+  VQRSHRNTQ_M_N_S
+  VQSHRNBQ_M_N_U
+  VQSHRNBQ_M_N_S
+  VQSHRNTQ_M_N_S
+  VRSHRNBQ_M_N_U
+  VRSHRNBQ_M_N_S
+  VRSHRNTQ_M_N_U
+  VSHLLBQ_M_N_U
+  VSHLLBQ_M_N_S
+  VSHLLTQ_M_N_U
+  VSHLLTQ_M_N_S
+  VSHRNBQ_M_N_S
+  VSHRNBQ_M_N_U
+  VSHRNTQ_M_N_S
+  VSHRNTQ_M_N_U
+  VMLALDAVAXQ_P_S
+  VQRSHRNTQ_M_N_U
+  VQSHRNTQ_M_N_U
+  VRSHRNTQ_M_N_S
+  VQRDMULHQ_M_N_S
+  VRMLALDAVHAQ_P_S
+  VMLSLDAVAQ_P_S
+  VMLSLDAVAXQ_P_S
+  VMULLBQ_POLY_M_P
+  VMULLTQ_POLY_M_P
+  VQDMULLBQ_M_N_S
+  VQDMULLBQ_M_S
+  VQDMULLTQ_M_N_S
+  VQDMULLTQ_M_S
+  VQRSHRUNBQ_M_N_S
+  VQSHRUNBQ_M_N_S
+  VQSHRUNTQ_M_N_S
+  VRMLALDAVHAQ_P_U
+  VRMLALDAVHAXQ_P_S
+  VRMLSLDAVHAQ_P_S
+  VRMLSLDAVHAXQ_P_S
+  VQRSHRUNTQ_M_N_S
+  VCMLAQ_M_F
+  VCMLAQ_ROT180_M_F
+  VCMLAQ_ROT270_M_F
+  VCMLAQ_ROT90_M_F
+  VCMULQ_M_F
+  VCMULQ_ROT180_M_F
+  VCMULQ_ROT270_M_F
+  VCMULQ_ROT90_M_F
+  VFMAQ_M_F
+  VFMAQ_M_N_F
+  VFMASQ_M_N_F
+  VFMSQ_M_F
+  VMAXNMQ_M_F
+  VMINNMQ_M_F
+  VSUBQ_M_F
+  VSTRWQSB_S
+  VSTRWQSB_U
+  VSTRBQSO_S
+  VSTRBQSO_U
+  VSTRBQ_S
+  VSTRBQ_U
+  VLDRBQGO_S
+  VLDRBQGO_U
+  VLDRBQ_S
+  VLDRBQ_U
+  VLDRWQGB_S
+  VLDRWQGB_U
+  VLD1Q_F
+  VLD1Q_S
+  VLD1Q_U
+  VLDRHQ_F
+  VLDRHQGO_S
+  VLDRHQGO_U
+  VLDRHQGSO_S
+  VLDRHQGSO_U
+  VLDRHQ_S
+  VLDRHQ_U
+  VLDRWQ_F
+  VLDRWQ_S
+  VLDRWQ_U
+  VLDRDQGB_S
+  VLDRDQGB_U
+  VLDRDQGO_S
+  VLDRDQGO_U
+  VLDRDQGSO_S
+  VLDRDQGSO_U
+  VLDRHQGO_F
+  VLDRHQGSO_F
+  VLDRWQGB_F
+  VLDRWQGO_F
+  VLDRWQGO_S
+  VLDRWQGO_U
+  VLDRWQGSO_F
+  VLDRWQGSO_S
+  VLDRWQGSO_U
+  VSTRHQ_F
+  VST1Q_S
+  VST1Q_U
+  VSTRHQSO_S
+  VSTRHQ_U
+  VSTRWQ_S
+  VSTRWQ_U
+  VSTRWQ_F
+  VST1Q_F
+  VSTRDQSB_S
+  VSTRDQSB_U
+  VSTRDQSO_S
+  VSTRDQSO_U
+  VSTRDQSSO_S
+  VSTRDQSSO_U
+  VSTRWQSO_S
+  VSTRWQSO_U
+  VSTRWQSSO_S
+  VSTRWQSSO_U
+  VSTRHQSO_F
+  VSTRHQSSO_F
+  VSTRWQSB_F
+  VSTRWQSO_F
+  VSTRWQSSO_F
+  VDDUPQ
+  VDDUPQ_M
+  VDWDUPQ
+  VDWDUPQ_M
+  VIDUPQ
+  VIDUPQ_M
+  VIWDUPQ
+  VIWDUPQ_M
+  VSTRWQSBWB_S
+  VSTRWQSBWB_U
+  VLDRWQGBWB_S
+  VLDRWQGBWB_U
+  VSTRWQSBWB_F
+  VLDRWQGBWB_F
+  VSTRDQSBWB_S
+  VSTRDQSBWB_U
+  VLDRDQGBWB_S
+  VLDRDQGBWB_U
+  VADCQ_U
+  VADCQ_M_U
+  VADCQ_S
+  VADCQ_M_S
+  VSBCIQ_U
+  VSBCIQ_S
+  VSBCIQ_M_U
+  VSBCIQ_M_S
+  VSBCQ_U
+  VSBCQ_S
+  VSBCQ_M_U
+  VSBCQ_M_S
+  VADCIQ_U
+  VADCIQ_M_U
+  VADCIQ_S
+  VADCIQ_M_S
+  VLD2Q
+  VLD4Q
+  VST2Q
+  VSHLCQ_M_U
+  VSHLCQ_M_S
+  VSTRHQSO_U
+  VSTRHQSSO_S
+  VSTRHQSSO_U
+  VSTRHQ_S
+  SRSHRL
+  SRSHR
+  URSHR
+  URSHRL
+  SQRSHR
+  UQRSHL
+  UQRSHLL_64
+  UQRSHLL_48
+  SQRSHRL_64
+  SQRSHRL_48
+  VSHLCQ_M_
+])
diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-common.md
index b7e3619..250e503 100644
--- a/gcc/config/arm/vec-common.md
+++ b/gcc/config/arm/vec-common.md
@@ -81,104 +81,53 @@
 ;; patterns separately for Neon, IWMMXT and MVE.
 
 (define_expand "add<mode>3"
-  [(set (match_operand:VNIM 0 "s_register_operand")
-	(plus:VNIM (match_operand:VNIM 1 "s_register_operand")
-		   (match_operand:VNIM 2 "s_register_operand")))]
-  "(TARGET_NEON && ((<MODE>mode != V2SFmode && <MODE>mode != V4SFmode)
-		    || flag_unsafe_math_optimizations))
-   || (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (<MODE>mode))
-   || (TARGET_HAVE_MVE && VALID_MVE_SI_MODE(<MODE>mode))
-   || (TARGET_HAVE_MVE_FLOAT && VALID_MVE_SF_MODE(<MODE>mode))"
-{
-})
-
-;; Vector arithmetic.  Expanders are blank, then unnamed insns implement
-;; patterns separately for Neon and MVE.
-
-(define_expand "addv8hf3"
-  [(set (match_operand:V8HF 0 "s_register_operand")
-	(plus:V8HF (match_operand:V8HF 1 "s_register_operand")
-		   (match_operand:V8HF 2 "s_register_operand")))]
-  "(TARGET_HAVE_MVE_FLOAT && VALID_MVE_SF_MODE(V8HFmode))
-   || (TARGET_NEON_FP16INST && flag_unsafe_math_optimizations)"
-{
-  if (TARGET_NEON_FP16INST && flag_unsafe_math_optimizations)
-    emit_insn (gen_addv8hf3_neon (operands[0], operands[1], operands[2]));
-})
-
-;; Vector arithmetic.  Expanders are blank, then unnamed insns implement
-;; patterns separately for Neon and IWMMXT.
-
-(define_expand "add<mode>3"
-  [(set (match_operand:VNINOTM 0 "s_register_operand")
-	(plus:VNINOTM (match_operand:VNINOTM 1 "s_register_operand")
-		      (match_operand:VNINOTM 2 "s_register_operand")))]
-  "(TARGET_NEON && ((<MODE>mode != V2SFmode && <MODE>mode != V4SFmode)
-		    || flag_unsafe_math_optimizations))
-   || (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (<MODE>mode))"
-{
-})
-
-;; Vector arithmetic. Expanders are blank, then unnamed insns implement
-;; patterns separately for IWMMXT and Neon.
+  [(set (match_operand:VDQ 0 "s_register_operand")
+	(plus:VDQ (match_operand:VDQ 1 "s_register_operand")
+		  (match_operand:VDQ 2 "s_register_operand")))]
+  "ARM_HAVE_<MODE>_ARITH"
+)
 
 (define_expand "sub<mode>3"
-  [(set (match_operand:VALL 0 "s_register_operand")
-        (minus:VALL (match_operand:VALL 1 "s_register_operand")
-                    (match_operand:VALL 2 "s_register_operand")))]
-  "(TARGET_NEON && ((<MODE>mode != V2SFmode && <MODE>mode != V4SFmode)
-		    || flag_unsafe_math_optimizations))
-   || (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (<MODE>mode))"
-{
-})
+  [(set (match_operand:VDQ 0 "s_register_operand")
+	(minus:VDQ (match_operand:VDQ 1 "s_register_operand")
+		   (match_operand:VDQ 2 "s_register_operand")))]
+  "ARM_HAVE_<MODE>_ARITH"
+)
 
 (define_expand "mul<mode>3"
-  [(set (match_operand:VALLW 0 "s_register_operand")
-        (mult:VALLW (match_operand:VALLW 1 "s_register_operand")
-		    (match_operand:VALLW 2 "s_register_operand")))]
-  "(TARGET_NEON && ((<MODE>mode != V2SFmode && <MODE>mode != V4SFmode)
-		    || flag_unsafe_math_optimizations))
-   || (<MODE>mode == V4HImode && TARGET_REALLY_IWMMXT)"
-{
-})
+  [(set (match_operand:VDQWH 0 "s_register_operand")
+	(mult:VDQWH (match_operand:VDQWH 1 "s_register_operand")
+		    (match_operand:VDQWH 2 "s_register_operand")))]
+  "ARM_HAVE_<MODE>_ARITH"
+)
 
 (define_expand "smin<mode>3"
   [(set (match_operand:VALLW 0 "s_register_operand")
 	(smin:VALLW (match_operand:VALLW 1 "s_register_operand")
 		    (match_operand:VALLW 2 "s_register_operand")))]
-  "(TARGET_NEON && ((<MODE>mode != V2SFmode && <MODE>mode != V4SFmode)
-		    || flag_unsafe_math_optimizations))
-   || (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (<MODE>mode))"
-{
-})
+   "ARM_HAVE_<MODE>_ARITH"
+)
 
 (define_expand "umin<mode>3"
   [(set (match_operand:VINTW 0 "s_register_operand")
 	(umin:VINTW (match_operand:VINTW 1 "s_register_operand")
 		    (match_operand:VINTW 2 "s_register_operand")))]
-  "TARGET_NEON
-   || (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (<MODE>mode))"
-{
-})
+   "ARM_HAVE_<MODE>_ARITH"
+)
 
 (define_expand "smax<mode>3"
   [(set (match_operand:VALLW 0 "s_register_operand")
 	(smax:VALLW (match_operand:VALLW 1 "s_register_operand")
 		    (match_operand:VALLW 2 "s_register_operand")))]
-  "(TARGET_NEON && ((<MODE>mode != V2SFmode && <MODE>mode != V4SFmode)
-		    || flag_unsafe_math_optimizations))
-   || (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (<MODE>mode))"
-{
-})
+   "ARM_HAVE_<MODE>_ARITH"
+)
 
 (define_expand "umax<mode>3"
   [(set (match_operand:VINTW 0 "s_register_operand")
 	(umax:VINTW (match_operand:VINTW 1 "s_register_operand")
 		    (match_operand:VINTW 2 "s_register_operand")))]
-  "TARGET_NEON
-   || (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (<MODE>mode))"
-{
-})
+   "ARM_HAVE_<MODE>_ARITH"
+)
 
 (define_expand "vec_perm<mode>"
   [(match_operand:VE 0 "s_register_operand")
diff --git a/gcc/config/arm/vfp.md b/gcc/config/arm/vfp.md
index 6a2bc5a..e6c287c 100644
--- a/gcc/config/arm/vfp.md
+++ b/gcc/config/arm/vfp.md
@@ -387,31 +387,15 @@
    (set_attr "arch"           "t2,any,any,any,a,t2,any,any,any,any,any,any")]
 )
 
-(define_insn "*mov_load_vfp_hf16"
-  [(set (match_operand:HF 0 "s_register_operand" "=t")
-	(match_operand:HF 1 "memory_operand" "Uj"))]
-  "TARGET_HAVE_MVE_FLOAT"
-  "vldr.16\\t%0, %E1"
-)
-
-(define_insn "*mov_store_vfp_hf16"
-  [(set (match_operand:HF 0 "memory_operand" "=Uj")
-	(match_operand:HF 1 "s_register_operand"   "t"))]
-  "TARGET_HAVE_MVE_FLOAT"
-  "vstr.16\\t%1, %E0"
-)
-
 ;; HFmode and BFmode moves
 
 (define_insn "*mov<mode>_vfp_<mode>16"
   [(set (match_operand:HFBF 0 "nonimmediate_operand"
-			  "= ?r,?m,t,r,t,r,t, t, Um,r")
+			  "= ?r,?m,t,r,t,r,t, t, Uj,r")
 	(match_operand:HFBF 1 "general_operand"
-			  "  m,r,t,r,r,t,Dv,Um,t, F"))]
+			  "  m,r,t,r,r,t,Dv,Uj,t, F"))]
   "TARGET_32BIT
-   && TARGET_VFP_FP16INST
-   && arm_mve_mode_and_operands_type_check (<MODE>mode, operands[0],
-					    operands[1])
+   && (TARGET_VFP_FP16INST || TARGET_HAVE_MVE)
    && (s_register_operand (operands[0], <MODE>mode)
        || s_register_operand (operands[1], <MODE>mode))"
  {
@@ -430,9 +414,15 @@
     case 6: /* S register from immediate.  */
       return \"vmov.f16\\t%0, %1\t%@ __<fporbf>\";
     case 7: /* S register from memory.  */
-      return \"vld1.16\\t{%z0}, %A1\";
+      if (TARGET_HAVE_MVE)
+	return \"vldr.16\\t%0, %1\";
+      else
+	return \"vld1.16\\t{%z0}, %A1\";
     case 8: /* Memory from S register.  */
-      return \"vst1.16\\t{%z1}, %A0\";
+      if (TARGET_HAVE_MVE)
+	return \"vstr.16\\t%1, %0\";
+      else
+	return \"vst1.16\\t{%z1}, %A0\";
     case 9: /* ARM register from constant.  */
       {
 	long bits;
@@ -2135,7 +2125,7 @@
 	(match_operand:DF 1 "const_double_operand" "F"))
    (clobber (match_operand:DF 2 "s_register_operand" "=r"))]
   "arm_disable_literal_pool
-   && TARGET_HARD_FLOAT
+   && TARGET_VFP_BASE
    && !arm_const_double_rtx (operands[1])
    && !(TARGET_VFP_DOUBLE && vfp3_const_double_rtx (operands[1]))"
   "#"
@@ -2161,7 +2151,7 @@
 	(match_operand:SF 1 "const_double_operand" "E"))
    (clobber (match_operand:SF 2 "s_register_operand" "=r"))]
   "arm_disable_literal_pool
-   && TARGET_HARD_FLOAT
+   && TARGET_VFP_BASE
    && !vfp3_const_double_rtx (operands[1])"
   "#"
   ""
diff --git a/gcc/config/arm/vxworks.h b/gcc/config/arm/vxworks.h
index 2ebfce8..487ec0f 100644
--- a/gcc/config/arm/vxworks.h
+++ b/gcc/config/arm/vxworks.h
@@ -44,7 +44,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 							\
     if (arm_arch_xscale)				\
       builtin_define ("_VX_CPU=XSCALE");		\
-    if (arm_arch8)					\
+    else if (arm_arch8)					\
       builtin_define ("_VX_CPU=ARMARCH8A");		\
     else if (arm_arch7)					\
       {							\
diff --git a/gcc/config/bpf/bpf.md b/gcc/config/bpf/bpf.md
index 769d8ea..8e7cf50 100644
--- a/gcc/config/bpf/bpf.md
+++ b/gcc/config/bpf/bpf.md
@@ -165,6 +165,16 @@
   "div<msuffix>\t%0,%2"
   [(set_attr "type" "<mtype>")])
 
+;; However, xBPF does provide a signed division operator, sdiv.
+
+(define_insn "div<AM:mode>3"
+  [(set (match_operand:AM 0 "register_operand" "=r,r")
+        (div:AM (match_operand:AM 1 "register_operand" " 0,0")
+                (match_operand:AM 2 "reg_or_imm_operand" "r,I")))]
+  "TARGET_XBPF"
+  "sdiv<msuffix>\t%0,%2"
+  [(set_attr "type" "<mtype>")])
+
 ;;; Modulus
 
 ;; Note that eBPF doesn't provide instructions for signed integer
@@ -178,6 +188,16 @@
   "mod<msuffix>\t%0,%2"
   [(set_attr "type" "<mtype>")])
 
+;; Again, xBPF provides a signed version, smod.
+
+(define_insn "mod<AM:mode>3"
+  [(set (match_operand:AM 0 "register_operand" "=r,r")
+        (mod:AM (match_operand:AM 1 "register_operand" " 0,0")
+                (match_operand:AM 2 "reg_or_imm_operand" "r,I")))]
+  "TARGET_XBPF"
+  "smod<msuffix>\t%0,%2"
+  [(set_attr "type" "<mtype>")])
+
 ;;; Logical AND
 (define_insn "and<AM:mode>3"
   [(set (match_operand:AM 0 "register_operand" "=r,r")
diff --git a/gcc/config/darwin-protos.h b/gcc/config/darwin-protos.h
index 54cd1e4..49c540f 100644
--- a/gcc/config/darwin-protos.h
+++ b/gcc/config/darwin-protos.h
@@ -125,6 +125,6 @@ extern bool darwin_kextabi_p (void);
 extern void darwin_override_options (void);
 extern void darwin_patch_builtins (void);
 extern void darwin_rename_builtins (void);
-extern bool darwin_libc_has_function (enum function_class fn_class);
+extern bool darwin_libc_has_function (enum function_class fn_class, tree);
 
 #endif /* CONFIG_DARWIN_PROTOS_H */
diff --git a/gcc/config/darwin-sections.def b/gcc/config/darwin-sections.def
index 98677f6..65bf5ad 100644
--- a/gcc/config/darwin-sections.def
+++ b/gcc/config/darwin-sections.def
@@ -198,3 +198,18 @@ DEF_SECTION (objc2_image_info_section, 0,
 	     ".section __DATA, __objc_imageinfo, regular, no_dead_strip", 1)
 DEF_SECTION (objc2_constant_string_object_section, 0,
 	     ".section __DATA, __objc_stringobj, regular, no_dead_strip", 1)
+
+/* Additions for compatibility with later runtime conventions especially for
+   sections containing strings.  */
+DEF_SECTION (objc2_data_section, 0, ".section __DATA, __data", 1)
+
+DEF_SECTION (objc2_ivar_section, 0, ".section __DATA, __objc_ivar", 1)
+
+DEF_SECTION (objc2_class_names_section, 0,
+	     ".section __TEXT, __objc_classname, cstring_literals", 1)
+
+DEF_SECTION (objc2_method_names_section, 0,
+	     ".section __TEXT, __objc_methname, cstring_literals", 1)
+
+DEF_SECTION (objc2_method_types_section, 0,
+	     ".section __TEXT, __objc_methtype, cstring_literals", 1)
diff --git a/gcc/config/darwin.c b/gcc/config/darwin.c
index c8edfb8..dd4857f 100644
--- a/gcc/config/darwin.c
+++ b/gcc/config/darwin.c
@@ -136,7 +136,7 @@ output_objc_section_asm_op (const void *directive)
      order in the object.  The code below implements this by emitting
      a section header for each ObjC section the first time that an ObjC
      section is requested.  */
-  if (! been_here)
+  if (darwin_symbol_stubs && ! been_here)
     {
       section *saved_in_section = in_section;
       static const enum darwin_section_enum tomark[] =
@@ -174,20 +174,23 @@ output_objc_section_asm_op (const void *directive)
       /* ABI=2 */
       static const enum darwin_section_enum tomarkv2[] =
 	{
+	  objc2_method_names_section,
 	  objc2_message_refs_section,
+	  objc2_selector_refs_section,
+	  objc2_ivar_section,
 	  objc2_classdefs_section,
 	  objc2_metadata_section,
 	  objc2_classrefs_section,
+	  objc2_class_names_section,
 	  objc2_classlist_section,
 	  objc2_categorylist_section,
-	  objc2_selector_refs_section,
 	  objc2_nonlazy_class_section,
 	  objc2_nonlazy_category_section,
 	  objc2_protocollist_section,
 	  objc2_protocolrefs_section,
 	  objc2_super_classrefs_section,
+	  objc2_constant_string_object_section,
 	  objc2_image_info_section,
-	  objc2_constant_string_object_section
 	} ;
       size_t i;
 
@@ -1436,7 +1439,7 @@ darwin_objc2_section (tree decl ATTRIBUTE_UNUSED, tree meta, section * base)
   gcc_assert (TREE_CODE (ident) == IDENTIFIER_NODE);
   p = IDENTIFIER_POINTER (ident);
 
-  gcc_checking_assert (flag_next_runtime == 1 && flag_objc_abi == 2);
+  gcc_checking_assert (flag_next_runtime >= 1 && flag_objc_abi == 2);
 
   objc_metadata_seen = 1;
 
@@ -1447,11 +1450,20 @@ darwin_objc2_section (tree decl ATTRIBUTE_UNUSED, tree meta, section * base)
      first.  */
   if      (!strncmp (p, "V2_BASE", 7))
     return base;
+  else if (!strncmp (p, "V2_CNAM", 7))
+    return darwin_sections[objc2_class_names_section];
+  else if (!strncmp (p, "V2_MNAM", 7))
+    return darwin_sections[objc2_method_names_section];
+  else if (!strncmp (p, "V2_MTYP", 7))
+    return darwin_sections[objc2_method_types_section];
   else if (!strncmp (p, "V2_STRG", 7))
     return darwin_sections[cstring_section];
 
   else if (!strncmp (p, "G2_META", 7) || !strncmp (p, "G2_CLAS", 7))
     return darwin_sections[objc2_classdefs_section];
+  else if (!strncmp (p, "V2_PCOL", 7))
+    return ld_uses_coal_sects ? darwin_sections[data_coal_section]
+			      : darwin_sections[objc2_data_section];
   else if (!strncmp (p, "V2_MREF", 7))
     return darwin_sections[objc2_message_refs_section];
   else if (!strncmp (p, "V2_CLRF", 7))
@@ -1487,6 +1499,9 @@ darwin_objc2_section (tree decl ATTRIBUTE_UNUSED, tree meta, section * base)
   else if (!strncmp (p, "V2_CSTR", 7))
     return darwin_sections[objc2_constant_string_object_section];
 
+  else if (!strncmp (p, "V2_IVRF", 7))
+    return darwin_sections[objc2_ivar_section];
+
   /* Not recognized, default.  */
   return base;
 }
@@ -1500,7 +1515,7 @@ darwin_objc1_section (tree decl ATTRIBUTE_UNUSED, tree meta, section * base)
   gcc_assert (TREE_CODE (ident) == IDENTIFIER_NODE);
   p = IDENTIFIER_POINTER (ident);
 
-  gcc_checking_assert (flag_next_runtime == 1 && flag_objc_abi < 2);
+  gcc_checking_assert (flag_next_runtime >= 1 && flag_objc_abi < 2);
 
   objc_metadata_seen = 1;
 
@@ -1861,6 +1876,14 @@ darwin_globalize_label (FILE *stream, const char *name)
 {
   if (!!strncmp (name, "_OBJC_", 6))
     default_globalize_label (stream, name);
+  /* We have some Objective C cases that need to be global, but only on newer
+     OS versions.  */
+  if (flag_objc_abi < 2 || flag_next_runtime < 100700)
+    return;
+  if (!strncmp (name+6, "LabelPro", 8))
+    default_globalize_label (stream, name);
+  if (!strncmp (name+6, "Protocol_", 9))
+    default_globalize_label (stream, name);
 }
 
 /* This routine returns non-zero if 'name' starts with the special objective-c
@@ -1879,7 +1902,49 @@ darwin_label_is_anonymous_local_objc_name (const char *name)
     while (*p >= '0' && *p <= '9')
       p++;
   }
-  return (!strncmp ((const char *)p, "_OBJC_", 6));
+  if (strncmp ((const char *)p, "_OBJC_", 6) != 0)
+    return false;
+
+  /* We need some of the objective c meta-data symbols to be visible to the
+     linker (when the target OS version is newer).  FIXME: this is horrible,
+     we need a better mechanism.  */
+
+  if (flag_objc_abi < 2 || flag_next_runtime < 100700)
+    return true;
+
+  p += 6;
+  if (!strncmp ((const char *)p, "ClassRef", 8))
+    return false;
+  else if (!strncmp ((const char *)p, "SelRef", 6))
+    return false;
+  else if (!strncmp ((const char *)p, "Category", 8))
+    {
+      if (p[8] == '_' || p[8] == 'I' || p[8] == 'P' || p[8] == 'C' )
+	return false;
+      return true;
+    }
+  else if (!strncmp ((const char *)p, "ClassMethods", 12))
+    return false;
+  else if (!strncmp ((const char *)p, "Instance", 8))
+    {
+      if (p[8] == 'I' || p[8] == 'M')
+	return false;
+      return true;
+    }
+  else if (!strncmp ((const char *)p, "CLASS_RO", 8))
+    return false;
+  else if (!strncmp ((const char *)p, "METACLASS_RO", 12))
+    return false;
+  else if (!strncmp ((const char *)p, "Protocol", 8))
+    {
+      if (p[8] == '_' || p[8] == 'I' || p[8] == 'P'
+	  || p[8] == 'M' || p[8] == 'C' || p[8] == 'O')
+	return false;
+      return true;
+    }
+  else if (!strncmp ((const char *)p, "LabelPro", 8))
+    return false;
+  return true;
 }
 
 /* LTO support for Mach-O.
@@ -2384,11 +2449,7 @@ darwin_emit_local_bss (FILE *fp, tree decl, const char *name,
 			unsigned HOST_WIDE_INT size,
 			unsigned int l2align)
 {
-   /* FIXME: We have a fudge to make this work with Java even when the target does
-   not use sections anchors -- Java seems to need at least one small item in a
-   non-zerofill segment.   */
-   if ((DARWIN_SECTION_ANCHORS && flag_section_anchors && size < BYTES_ZFILL)
-       || (size && size <= 2))
+   if (DARWIN_SECTION_ANCHORS && flag_section_anchors && size < BYTES_ZFILL)
     {
       /* Put smaller objects in _static_data, where the section anchors system
 	 can get them.
@@ -2414,16 +2475,13 @@ darwin_emit_local_bss (FILE *fp, tree decl, const char *name,
     }
   else
     {
-      /* When we are on a non-section anchor target, we can get zero-sized
-	 items here.  However, all we need to do is to bump them to one byte
-	 and the section alignment will take care of the rest.  */
+      /* When we are on a non-section anchor target (or not using section
+	 anchors, we can get zero-sized items here.  However, all we need to
+	 do is to bump them to one byte and the section alignment will take
+	 care of the rest.  */
       char secnam[64];
-      unsigned int flags ;
-      snprintf (secnam, 64, "__DATA,__%sbss%u", ((size)?"":"zo_"),
-						(unsigned) l2align);
-      /* We can't anchor (yet, if ever) in zerofill sections, because we can't
-	 switch to them and emit a label.  */
-      flags = SECTION_BSS|SECTION_WRITE|SECTION_NO_ANCHOR;
+      snprintf (secnam, 64, "__DATA,__bss");
+      unsigned int flags = SECTION_BSS|SECTION_WRITE|SECTION_NO_ANCHOR;
       in_section = get_section (secnam, flags, NULL);
       fprintf (fp, "\t.zerofill %s,", secnam);
       assemble_name (fp, name);
@@ -2434,7 +2492,7 @@ darwin_emit_local_bss (FILE *fp, tree decl, const char *name,
 	fprintf (fp, "," HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
 		 size, (unsigned) l2align);
       else
-	fprintf (fp, "," HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
+	fprintf (fp, "," HOST_WIDE_INT_PRINT_UNSIGNED",0\n", size);
     }
 
   (*targetm.encode_section_info) (decl, DECL_RTL (decl), false);
@@ -2559,9 +2617,8 @@ fprintf (fp, "# albss: %s (%lld,%d) ro %d cst %d stat %d com %d"
       return;
     }
 
-  /* So we have a public symbol (small item fudge for Java, see above).  */
-  if ((DARWIN_SECTION_ANCHORS && flag_section_anchors && size < BYTES_ZFILL)
-       || (size && size <= 2))
+  /* So we have a public symbol.  */
+  if (DARWIN_SECTION_ANCHORS && flag_section_anchors && size < BYTES_ZFILL)
     {
       /* Put smaller objects in data, where the section anchors system can get
 	 them.  However, if they are zero-sized punt them to yet a different
@@ -2586,16 +2643,10 @@ fprintf (fp, "# albss: %s (%lld,%d) ro %d cst %d stat %d com %d"
     }
   else
     {
+      /* Section anchors not in use.  */
+      unsigned int flags = SECTION_BSS|SECTION_WRITE|SECTION_NO_ANCHOR;
       char secnam[64];
-      unsigned int flags ;
-      /* When we are on a non-section anchor target, we can get zero-sized
-	 items here.  However, all we need to do is to bump them to one byte
-	 and the section alignment will take care of the rest.  */
-      snprintf (secnam, 64, "__DATA,__%spu_bss%u", ((size)?"":"zo_"), l2align);
-
-      /* We can't anchor in zerofill sections, because we can't switch
-	 to them and emit a label.  */
-      flags = SECTION_BSS|SECTION_WRITE|SECTION_NO_ANCHOR;
+      snprintf (secnam, 64, "__DATA,__common");
       in_section = get_section (secnam, flags, NULL);
       fprintf (fp, "\t.zerofill %s,", secnam);
       assemble_name (fp, name);
@@ -2605,7 +2656,7 @@ fprintf (fp, "# albss: %s (%lld,%d) ro %d cst %d stat %d com %d"
       if (l2align)
 	fprintf (fp, "," HOST_WIDE_INT_PRINT_UNSIGNED",%u\n", size, l2align);
       else
-	fprintf (fp, "," HOST_WIDE_INT_PRINT_UNSIGNED"\n", size);
+	fprintf (fp, "," HOST_WIDE_INT_PRINT_UNSIGNED",0\n", size);
     }
   (* targetm.encode_section_info) (decl, DECL_RTL (decl), false);
 }
@@ -3141,10 +3192,14 @@ darwin_override_options (void)
   /* Keep track of which (major) version we're generating code for.  */
   if (darwin_macosx_version_min)
     {
-      if (strverscmp (darwin_macosx_version_min, "10.6") >= 0)
+      if (strverscmp (darwin_macosx_version_min, "10.7") >= 0)
+	generating_for_darwin_version = 11;
+      else if (strverscmp (darwin_macosx_version_min, "10.6") >= 0)
 	generating_for_darwin_version = 10;
       else if (strverscmp (darwin_macosx_version_min, "10.5") >= 0)
 	generating_for_darwin_version = 9;
+      else if (strverscmp (darwin_macosx_version_min, "10.4") >= 0)
+	generating_for_darwin_version = 8;
 
       /* Earlier versions are not specifically accounted, until required.  */
     }
@@ -3160,6 +3215,20 @@ darwin_override_options (void)
      should check for correctness re. the ABI.  TODO: check and provide the
      flags (runtime & ABI) from the lto wrapper).  */
 
+  /* At present, make a hard update to the runtime version based on the target
+     OS version.  */
+  if (flag_next_runtime)
+    {
+      if (generating_for_darwin_version > 10)
+	flag_next_runtime = 100705;
+      else if (generating_for_darwin_version > 9)
+	flag_next_runtime = 100608;
+      else if (generating_for_darwin_version > 8)
+	flag_next_runtime = 100508;
+      else
+	flag_next_runtime = 100000;
+    }
+
   /* Unless set, force ABI=2 for NeXT and m64, 0 otherwise.  */
   if (!global_options_set.x_flag_objc_abi)
     global_options.x_flag_objc_abi
@@ -3542,7 +3611,8 @@ darwin_rename_builtins (void)
 }
 
 bool
-darwin_libc_has_function (enum function_class fn_class)
+darwin_libc_has_function (enum function_class fn_class,
+			  tree type ATTRIBUTE_UNUSED)
 {
   if (fn_class == function_sincos)
     return (strverscmp (darwin_macosx_version_min, "10.9") >= 0);
diff --git a/gcc/config/darwin.h b/gcc/config/darwin.h
index 55a5361..f9d4fec 100644
--- a/gcc/config/darwin.h
+++ b/gcc/config/darwin.h
@@ -107,7 +107,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 /* Default to using the NeXT-style runtime, since that's what is
    pre-installed on Darwin systems.  */
 
-#define NEXT_OBJC_RUNTIME 1
+#define NEXT_OBJC_RUNTIME 100508
 
 /* Don't default to pcc-struct-return, because gcc is the only compiler, and
    we want to retain compatibility with older gcc versions.  */
@@ -476,6 +476,7 @@ extern GTY(()) int darwin_ms_struct;
    debugging data.  */
 
 #define ASM_DEBUG_SPEC  "%{g*:%{%:debug-level-gt(0):%{!gdwarf*:--gstabs}}}"
+#define ASM_DEBUG_OPTION_SPEC ""
 #define ASM_FINAL_SPEC \
   "%{gsplit-dwarf:%ngsplit-dwarf is not supported on this platform} %<gsplit-dwarf"
 
diff --git a/gcc/config/darwin9.h b/gcc/config/darwin9.h
index b7bdf63..787aca7 100644
--- a/gcc/config/darwin9.h
+++ b/gcc/config/darwin9.h
@@ -41,6 +41,9 @@ along with GCC; see the file COPYING3.  If not see
 #undef  ASM_DEBUG_SPEC
 #define ASM_DEBUG_SPEC  "%{g*:%{%:debug-level-gt(0):%{gstabs:--gstabs}}}"
 
+#undef  ASM_DEBUG_OPTION_SPEC
+#define ASM_DEBUG_OPTION_SPEC	""
+
 #undef  ASM_OUTPUT_ALIGNED_COMMON
 #define ASM_OUTPUT_ALIGNED_COMMON(FILE, NAME, SIZE, ALIGN)		\
   do {									\
diff --git a/gcc/config/gcn/gcn.md b/gcc/config/gcn/gcn.md
index 0e73fea..763e770 100644
--- a/gcc/config/gcn/gcn.md
+++ b/gcc/config/gcn/gcn.md
@@ -67,6 +67,7 @@
   UNSPECV_ICACHE_INV])
 
 (define_c_enum "unspec" [
+  UNSPEC_ADDPTR
   UNSPEC_VECTOR
   UNSPEC_BPERMUTE
   UNSPEC_SGPRBASE
@@ -1219,29 +1220,47 @@
 
 ; "addptr" is the same as "add" except that it must not write to VCC or SCC
 ; as a side-effect.  Unfortunately GCN does not have a suitable instruction
-; for this, so we use a custom VOP3 add with CC_SAVE_REG as a temp.
-; Note that it is not safe to save/clobber/restore SCC because doing so will
-; break data-flow analysis, so this must use vector registers.
+; for this, so we use CC_SAVE_REG as a temp.
+; Note that it is not safe to save/clobber/restore as separate insns because
+; doing so will break data-flow analysis, so this must use multiple
+; instructions in one insn.
 ;
 ; The "v0" should be just "v", but somehow the "0" helps LRA not loop forever
 ; on testcase pr54713-2.c with -O0. It's only an optimization hint anyway.
+;
+; The SGPR alternative is preferred as it is typically used with mov_sgprbase.
 
 (define_insn "addptrdi3"
-  [(set (match_operand:DI 0 "register_operand"		 "= v")
-	(plus:DI (match_operand:DI 1 "register_operand"	 " v0")
-		 (match_operand:DI 2 "nonmemory_operand" "vDA")))]
+  [(set (match_operand:DI 0 "register_operand"		 "= v, Sg")
+    (unspec:DI [
+	(plus:DI (match_operand:DI 1 "register_operand"	 "^v0,Sg0")
+		 (match_operand:DI 2 "nonmemory_operand" "vDA,SgDB"))]
+	UNSPEC_ADDPTR))]
   ""
   {
-    rtx new_operands[4] = { operands[0], operands[1], operands[2],
-			    gen_rtx_REG (DImode, CC_SAVE_REG) };
+    if (which_alternative == 0)
+      {
+	rtx new_operands[4] = { operands[0], operands[1], operands[2],
+				gen_rtx_REG (DImode, CC_SAVE_REG) };
 
-    output_asm_insn ("v_add%^_u32 %L0, %3, %L2, %L1", new_operands);
-    output_asm_insn ("v_addc%^_u32 %H0, %3, %H2, %H1, %3", new_operands);
+	output_asm_insn ("v_add%^_u32\t%L0, %3, %L2, %L1", new_operands);
+	output_asm_insn ("v_addc%^_u32\t%H0, %3, %H2, %H1, %3", new_operands);
+      }
+    else
+      {
+	rtx new_operands[4] = { operands[0], operands[1], operands[2],
+				gen_rtx_REG (BImode, CC_SAVE_REG) };
+
+	output_asm_insn ("s_mov_b32\t%3, scc", new_operands);
+	output_asm_insn ("s_add_u32\t%L0, %L1, %L2", new_operands);
+	output_asm_insn ("s_addc_u32\t%H0, %H1, %H2", new_operands);
+	output_asm_insn ("s_cmpk_lg_u32\t%3, 0", new_operands);
+      }
 
     return "";
   }
-  [(set_attr "type" "vmult")
-   (set_attr "length" "16")])
+  [(set_attr "type" "vmult,mult")
+   (set_attr "length" "16,24")])
 
 ;; }}}
 ;; {{{ ALU special cases: Minus
diff --git a/gcc/config/gcn/mkoffload.c b/gcc/config/gcn/mkoffload.c
index 0983b98..f7589a5 100644
--- a/gcc/config/gcn/mkoffload.c
+++ b/gcc/config/gcn/mkoffload.c
@@ -737,7 +737,8 @@ compile_native (const char *infile, const char *outfile, const char *compiler,
   obstack_ptr_grow (&argv_obstack, NULL);
 
   const char **new_argv = XOBFINISH (&argv_obstack, const char **);
-  fork_execute (new_argv[0], CONST_CAST (char **, new_argv), true);
+  fork_execute (new_argv[0], CONST_CAST (char **, new_argv), true,
+		".gccnative_args");
   obstack_free (&argv_obstack, NULL);
 }
 
@@ -1001,7 +1002,7 @@ main (int argc, char **argv)
   unsetenv ("LIBRARY_PATH");
 
   /* Run the compiler pass.  */
-  fork_execute (cc_argv[0], CONST_CAST (char **, cc_argv), true);
+  fork_execute (cc_argv[0], CONST_CAST (char **, cc_argv), true,  ".gcc_args");
   obstack_free (&cc_argv_obstack, NULL);
 
   in = fopen (gcn_s1_name, "r");
@@ -1022,7 +1023,7 @@ main (int argc, char **argv)
   fclose (out);
 
   /* Run the assemble/link pass.  */
-  fork_execute (ld_argv[0], CONST_CAST (char **, ld_argv), true);
+  fork_execute (ld_argv[0], CONST_CAST (char **, ld_argv), true, ".ld_args");
   obstack_free (&ld_argv_obstack, NULL);
 
   in = fopen (gcn_o_name, "r");
diff --git a/gcc/config/i386/adxintrin.h b/gcc/config/i386/adxintrin.h
index 6c15417..6dffe45 100644
--- a/gcc/config/i386/adxintrin.h
+++ b/gcc/config/i386/adxintrin.h
@@ -21,8 +21,8 @@
    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
    <http://www.gnu.org/licenses/>.  */
 
-#if !defined _IMMINTRIN_H_INCLUDED
-# error "Never use <adxintrin.h> directly; include <immintrin.h> instead."
+#ifndef _X86GPRINTRIN_H_INCLUDED
+# error "Never use <adxintrin.h> directly; include <x86gprintrin.h> instead."
 #endif
 
 #ifndef _ADXINTRIN_H_INCLUDED
diff --git a/gcc/config/i386/amxbf16intrin.h b/gcc/config/i386/amxbf16intrin.h
new file mode 100644
index 0000000..77cc395
--- /dev/null
+++ b/gcc/config/i386/amxbf16intrin.h
@@ -0,0 +1,52 @@
+/* Copyright (C) 2020 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#if !defined _IMMINTRIN_H_INCLUDED
+#error "Never use <amxbf16intrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef _AMXBF16INTRIN_H_INCLUDED
+#define _AMXBF16INTRIN_H_INCLUDED
+
+#if !defined(__AMX_BF16__)
+#pragma GCC push_options
+#pragma GCC target("amx-bf16")
+#define __DISABLE_AMX_BF16__
+#endif /* __AMX_BF16__ */
+
+#if defined(__x86_64__) && defined(__AMX_BF16__)
+#define _tile_dpbf16ps_internal(dst,src1,src2)					\
+  __asm__ volatile\
+  ("{tdpbf16ps\t%%tmm"#src2", %%tmm"#src1", %%tmm"#dst"|tdpbf16ps\t%%tmm"#dst", %%tmm"#src1", %%tmm"#src2"}" ::)
+
+#define _tile_dpbf16ps(dst,src1,src2)					\
+  _tile_dpbf16ps_internal (dst, src1, src2)
+
+#endif
+
+#ifdef __DISABLE_AMX_BF16__
+#undef __DISABLE_AMX_BF16__
+#pragma GCC pop_options
+#endif /* __DISABLE_AMX_BF16__ */
+
+#endif /* _AMXBF16INTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/amxint8intrin.h b/gcc/config/i386/amxint8intrin.h
new file mode 100644
index 0000000..f4e410b
--- /dev/null
+++ b/gcc/config/i386/amxint8intrin.h
@@ -0,0 +1,61 @@
+/* Copyright (C) 2020 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#if !defined _IMMINTRIN_H_INCLUDED
+#error "Never use <amxint8intrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef _AMXINT8INTRIN_H_INCLUDED
+#define _AMXINT8INTRIN_H_INCLUDED
+
+#if !defined(__AMX_INT8__)
+#pragma GCC push_options
+#pragma GCC target("amx-int8")
+#define __DISABLE_AMX_INT8__
+#endif /* __AMX_INT8__ */
+
+#if defined(__x86_64__) && defined(__AMX_INT8__)
+#define _tile_int8_dp_internal(name,dst,src1,src2)					\
+  __asm__ volatile							\
+  ("{"#name"\t%%tmm"#src2", %%tmm"#src1", %%tmm"#dst"|"#name"\t%%tmm"#dst", %%tmm"#src1", %%tmm"#src2"}" ::)
+
+#define _tile_dpbssd(dst,src1,src2)					\
+  _tile_int8_dp_internal (tdpbssd, dst, src1, src2)
+
+#define _tile_dpbsud(dst,src1,src2)					\
+  _tile_int8_dp_internal (tdpbsud, dst, src1, src2)
+
+#define _tile_dpbusd(dst,src1,src2)					\
+  _tile_int8_dp_internal (tdpbusd, dst, src1, src2)
+
+#define _tile_dpbuud(dst,src1,src2)					\
+  _tile_int8_dp_internal (tdpbuud, dst, src1, src2)
+
+#endif
+
+#ifdef __DISABLE_AMX_INT8__
+#undef __DISABLE_AMX_INT8__
+#pragma GCC pop_options
+#endif /* __DISABLE_AMX_INT8__ */
+
+#endif /* _AMXINT8INTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/amxtileintrin.h b/gcc/config/i386/amxtileintrin.h
new file mode 100644
index 0000000..41fb9a5
--- /dev/null
+++ b/gcc/config/i386/amxtileintrin.h
@@ -0,0 +1,98 @@
+/* Copyright (C) 2020 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#if !defined _IMMINTRIN_H_INCLUDED
+#error "Never use <amxtileintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef _AMXTILEINTRIN_H_INCLUDED
+#define _AMXTILEINTRIN_H_INCLUDED
+
+#if !defined(__AMX_TILE__)
+#pragma GCC push_options
+#pragma GCC target("amx-tile")
+#define __DISABLE_AMX_TILE__
+#endif /* __AMX_TILE__ */
+
+#if defined(__x86_64__) && defined(__AMX_TILE__)
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_tile_loadconfig (const void *__config)
+{
+  __asm__ volatile ("ldtilecfg\t%X0" :: "m" (*((const void **)__config)));
+}
+
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_tile_storeconfig (void *__config)
+{
+  __asm__ volatile ("sttilecfg\t%X0" : "=m" (*((void **)__config)));
+}
+
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_tile_release (void)
+{
+  __asm__ volatile ("tilerelease" ::);
+}
+
+#define _tile_loadd(dst,base,stride)		\
+  _tile_loadd_internal (dst, base, stride)
+
+#define _tile_loadd_internal(dst,base,stride)				\
+  __asm__ volatile							\
+  ("{tileloadd\t(%0,%1,1), %%tmm"#dst"|tileloadd\t%%tmm"#dst", [%0+%1*1]}" \
+   :: "r" ((const void*) base), "r" ((long) stride))
+
+#define _tile_stream_loadd(dst,base,stride)		\
+  _tile_stream_loadd_internal (dst, base, stride)
+
+#define _tile_stream_loadd_internal(dst,base,stride)			\
+  __asm__ volatile							\
+  ("{tileloaddt1\t(%0,%1,1), %%tmm"#dst"|tileloaddt1\t%%tmm"#dst", [%0+%1*1]}" \
+   :: "r" ((const void*) base), "r" ((long) stride))
+
+#define _tile_stored(dst,base,stride)		\
+  _tile_stored_internal (dst, base, stride)
+
+#define _tile_stored_internal(src,base,stride)				\
+  __asm__ volatile							\
+  ("{tilestored\t%%tmm"#src", (%0,%1,1)|tilestored\t[%0+%1*1], %%tmm"#src"}" \
+   :: "r" ((void*) base), "r" ((long) stride) \
+   : "memory")
+
+#define _tile_zero(dst)				\
+  _tile_zero_internal (dst)
+
+#define _tile_zero_internal(dst)		\
+  __asm__ volatile				\
+  ("tilezero\t%%tmm"#dst ::)
+
+#endif
+
+#ifdef __DISABLE_AMX_TILE__
+#undef __DISABLE_AMX_TILE__
+#pragma GCC pop_options
+#endif /* __DISABLE_AMX_TILE__ */
+
+#endif /* _AMXTILEINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/avx2intrin.h b/gcc/config/i386/avx2intrin.h
index 6bf1f8c..e29c532 100644
--- a/gcc/config/i386/avx2intrin.h
+++ b/gcc/config/i386/avx2intrin.h
@@ -950,6 +950,9 @@ _mm256_broadcastsi128_si256 (__m128i __X)
   return (__m256i) __builtin_ia32_vbroadcastsi256 ((__v2di)__X);
 }
 
+#define _mm_broadcastsi128_si256(X) _mm256_broadcastsi128_si256(X)
+#define _mm_broadcastsd_pd(X) _mm_movedup_pd(X)
+
 #ifdef __OPTIMIZE__
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
diff --git a/gcc/config/i386/avx512bwintrin.h b/gcc/config/i386/avx512bwintrin.h
index d19c104..3da05e1 100644
--- a/gcc/config/i386/avx512bwintrin.h
+++ b/gcc/config/i386/avx512bwintrin.h
@@ -36,7 +36,11 @@
 
 /* Internal data types for implementing the intrinsics.  */
 typedef short __v32hi __attribute__ ((__vector_size__ (64)));
+typedef short __v32hi_u __attribute__ ((__vector_size__ (64),	\
+					__may_alias__, __aligned__ (1)));
 typedef char __v64qi __attribute__ ((__vector_size__ (64)));
+typedef char __v64qi_u __attribute__ ((__vector_size__ (64),	\
+				       __may_alias__, __aligned__ (1)));
 
 typedef unsigned long long __mmask64;
 
@@ -303,6 +307,13 @@ _mm512_maskz_mov_epi16 (__mmask32 __U, __m512i __A)
 
 extern __inline __m512i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_loadu_epi16 (void const *__P)
+{
+  return (__m512i) (*(__v32hi_u *) __P);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_mask_loadu_epi16 (__m512i __W, __mmask32 __U, void const *__P)
 {
   return (__m512i) __builtin_ia32_loaddquhi512_mask ((const short *) __P,
@@ -322,6 +333,13 @@ _mm512_maskz_loadu_epi16 (__mmask32 __U, void const *__P)
 
 extern __inline void
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_storeu_epi16 (void *__P, __m512i __A)
+{
+  *(__v32hi_u *) __P = (__v32hi_u) __A;
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_mask_storeu_epi16 (void *__P, __mmask32 __U, __m512i __A)
 {
   __builtin_ia32_storedquhi512_mask ((short *) __P,
@@ -382,6 +400,13 @@ _kunpackd_mask64 (__mmask32 __A, __mmask32 __B)
 
 extern __inline __m512i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_loadu_epi8 (void const *__P)
+{
+  return (__m512i) (*(__v64qi_u *) __P);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_mask_loadu_epi8 (__m512i __W, __mmask64 __U, void const *__P)
 {
   return (__m512i) __builtin_ia32_loaddquqi512_mask ((const char *) __P,
@@ -401,6 +426,13 @@ _mm512_maskz_loadu_epi8 (__mmask64 __U, void const *__P)
 
 extern __inline void
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_storeu_epi8 (void *__P, __m512i __A)
+{
+  *(__v64qi_u *) __P = (__v64qi_u) __A;
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_mask_storeu_epi8 (void *__P, __mmask64 __U, __m512i __A)
 {
   __builtin_ia32_storedquqi512_mask ((char *) __P,
diff --git a/gcc/config/i386/avx512dqintrin.h b/gcc/config/i386/avx512dqintrin.h
index d28dfab..fd61b70 100644
--- a/gcc/config/i386/avx512dqintrin.h
+++ b/gcc/config/i386/avx512dqintrin.h
@@ -1168,6 +1168,17 @@ _mm_reduce_sd (__m128d __A, __m128d __B, int __C)
 
 extern __inline __m128d
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_reduce_round_sd (__m128d __A, __m128d __B, int __C, const int __R)
+{
+  return (__m128d) __builtin_ia32_reducesd_mask_round ((__v2df) __A,
+						       (__v2df) __B, __C,
+						       (__v2df)
+						       _mm_setzero_pd (),
+						       (__mmask8) -1, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mask_reduce_sd (__m128d __W,  __mmask8 __U, __m128d __A,
 		    __m128d __B, int __C)
 {
@@ -1179,6 +1190,17 @@ _mm_mask_reduce_sd (__m128d __W,  __mmask8 __U, __m128d __A,
 
 extern __inline __m128d
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_reduce_round_sd (__m128d __W,  __mmask8 __U, __m128d __A,
+			  __m128d __B, int __C, const int __R)
+{
+  return (__m128d) __builtin_ia32_reducesd_mask_round ((__v2df) __A,
+						       (__v2df) __B, __C,
+						       (__v2df) __W,
+						       __U, __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskz_reduce_sd (__mmask8 __U, __m128d __A, __m128d __B, int __C)
 {
   return (__m128d) __builtin_ia32_reducesd_mask ((__v2df) __A,
@@ -1187,6 +1209,18 @@ _mm_maskz_reduce_sd (__mmask8 __U, __m128d __A, __m128d __B, int __C)
 						 (__mmask8) __U);
 }
 
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_reduce_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
+			   int __C, const int __R)
+{
+  return (__m128d) __builtin_ia32_reducesd_mask_round ((__v2df) __A,
+						       (__v2df) __B, __C,
+						       (__v2df)
+						       _mm_setzero_pd (),
+						       __U, __R);
+}
+
 extern __inline __m128
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_reduce_ss (__m128 __A, __m128 __B, int __C)
@@ -1197,6 +1231,16 @@ _mm_reduce_ss (__m128 __A, __m128 __B, int __C)
 						(__mmask8) -1);
 }
 
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_reduce_round_ss (__m128 __A, __m128 __B, int __C, const int __R)
+{
+  return (__m128) __builtin_ia32_reducess_mask_round ((__v4sf) __A,
+						      (__v4sf) __B, __C,
+						      (__v4sf)
+						      _mm_setzero_ps (),
+						      (__mmask8) -1, __R);
+}
 
 extern __inline __m128
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
@@ -1211,6 +1255,17 @@ _mm_mask_reduce_ss (__m128 __W,  __mmask8 __U, __m128 __A,
 
 extern __inline __m128
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_reduce_round_ss (__m128 __W,  __mmask8 __U, __m128 __A,
+			  __m128 __B, int __C, const int __R)
+{
+  return (__m128) __builtin_ia32_reducess_mask_round ((__v4sf) __A,
+						      (__v4sf) __B, __C,
+						      (__v4sf) __W,
+						      __U, __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskz_reduce_ss (__mmask8 __U, __m128 __A, __m128 __B, int __C)
 {
   return (__m128) __builtin_ia32_reducess_mask ((__v4sf) __A,
@@ -1219,6 +1274,18 @@ _mm_maskz_reduce_ss (__mmask8 __U, __m128 __A, __m128 __B, int __C)
 						(__mmask8) __U);
 }
 
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_reduce_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
+			   int __C, const int __R)
+{
+  return (__m128) __builtin_ia32_reducess_mask_round ((__v4sf) __A,
+						      (__v4sf) __B, __C,
+						      (__v4sf)
+						      _mm_setzero_ps (),
+						      __U, __R);
+}
+
 extern __inline __m128d
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_range_sd (__m128d __A, __m128d __B, int __C)
@@ -1808,6 +1875,17 @@ _mm512_reduce_pd (__m512d __A, int __B)
 
 extern __inline __m512d
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_reduce_round_pd (__m512d __A, int __B, const int __R)
+{
+  return (__m512d) __builtin_ia32_reducepd512_mask_round ((__v8df) __A,
+							  __B,
+							  (__v8df)
+							  _mm512_setzero_pd (),
+							  (__mmask8) -1, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_mask_reduce_pd (__m512d __W, __mmask8 __U, __m512d __A, int __B)
 {
   return (__m512d) __builtin_ia32_reducepd512_mask ((__v8df) __A, __B,
@@ -1817,6 +1895,17 @@ _mm512_mask_reduce_pd (__m512d __W, __mmask8 __U, __m512d __A, int __B)
 
 extern __inline __m512d
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_reduce_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
+			     int __B, const int __R)
+{
+  return (__m512d) __builtin_ia32_reducepd512_mask_round ((__v8df) __A,
+							  __B,
+							  (__v8df) __W,
+							  __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_maskz_reduce_pd (__mmask8 __U, __m512d __A, int __B)
 {
   return (__m512d) __builtin_ia32_reducepd512_mask ((__v8df) __A, __B,
@@ -1825,6 +1914,18 @@ _mm512_maskz_reduce_pd (__mmask8 __U, __m512d __A, int __B)
 						    (__mmask8) __U);
 }
 
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_reduce_round_pd (__mmask8 __U, __m512d __A, int __B,
+			      const int __R)
+{
+  return (__m512d) __builtin_ia32_reducepd512_mask_round ((__v8df) __A,
+							  __B,
+							  (__v8df)
+							  _mm512_setzero_pd (),
+							  __U, __R);
+}
+
 extern __inline __m512
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_reduce_ps (__m512 __A, int __B)
@@ -1837,6 +1938,17 @@ _mm512_reduce_ps (__m512 __A, int __B)
 
 extern __inline __m512
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_reduce_round_ps (__m512 __A, int __B, const int __R)
+{
+  return (__m512) __builtin_ia32_reduceps512_mask_round ((__v16sf) __A,
+							 __B,
+							 (__v16sf)
+							 _mm512_setzero_ps (),
+							 (__mmask16) -1, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_mask_reduce_ps (__m512 __W, __mmask16 __U, __m512 __A, int __B)
 {
   return (__m512) __builtin_ia32_reduceps512_mask ((__v16sf) __A, __B,
@@ -1846,6 +1958,17 @@ _mm512_mask_reduce_ps (__m512 __W, __mmask16 __U, __m512 __A, int __B)
 
 extern __inline __m512
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_reduce_round_ps (__m512 __W, __mmask16 __U, __m512 __A, int __B,
+			     const int __R)
+{
+  return (__m512) __builtin_ia32_reduceps512_mask_round ((__v16sf) __A,
+							 __B,
+							 (__v16sf) __W,
+							 __U, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_maskz_reduce_ps (__mmask16 __U, __m512 __A, int __B)
 {
   return (__m512) __builtin_ia32_reduceps512_mask ((__v16sf) __A, __B,
@@ -1854,6 +1977,18 @@ _mm512_maskz_reduce_ps (__mmask16 __U, __m512 __A, int __B)
 						   (__mmask16) __U);
 }
 
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_reduce_round_ps (__mmask16 __U, __m512 __A, int __B,
+			      const int __R)
+{
+  return (__m512) __builtin_ia32_reduceps512_mask_round ((__v16sf) __A,
+							 __B,
+							 (__v16sf)
+							 _mm512_setzero_ps (),
+							 __U, __R);
+}
+
 extern __inline __m256
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_extractf32x8_ps (__m512 __A, const int __imm)
@@ -2440,26 +2575,50 @@ _mm512_fpclass_ps_mask (__m512 __A, const int __imm)
   ((__m512d) __builtin_ia32_reducepd512_mask ((__v8df)(__m512d)(A),	\
     (int)(B), (__v8df)_mm512_setzero_pd (), (__mmask8)-1))
 
+#define _mm512_reduce_round_pd(A, B, R)					 \
+  ((__m512d) __builtin_ia32_reducepd512_mask_round ((__v8df)(__m512d)(A),\
+    (int)(B), (__v8df)_mm512_setzero_pd (), (__mmask8)-1, (R)))
+
 #define _mm512_mask_reduce_pd(W, U, A, B)				\
   ((__m512d) __builtin_ia32_reducepd512_mask ((__v8df)(__m512d)(A),	\
     (int)(B), (__v8df)(__m512d)(W), (__mmask8)(U)))
 
+#define _mm512_mask_reduce_round_pd(W, U, A, B, R)			 \
+  ((__m512d) __builtin_ia32_reducepd512_mask_round ((__v8df)(__m512d)(A),\
+    (int)(B), (__v8df)(__m512d)(W), (U), (R)))
+
 #define _mm512_maskz_reduce_pd(U, A, B)					\
   ((__m512d) __builtin_ia32_reducepd512_mask ((__v8df)(__m512d)(A),	\
     (int)(B), (__v8df)_mm512_setzero_pd (), (__mmask8)(U)))
 
+#define _mm512_maskz_reduce_round_pd(U, A, B, R)			 \
+  ((__m512d) __builtin_ia32_reducepd512_mask_round ((__v8df)(__m512d)(A),\
+    (int)(B), (__v8df)_mm512_setzero_pd (), (U), (R)))
+
 #define _mm512_reduce_ps(A, B)						\
   ((__m512) __builtin_ia32_reduceps512_mask ((__v16sf)(__m512)(A),	\
     (int)(B), (__v16sf)_mm512_setzero_ps (), (__mmask16)-1))
 
+#define _mm512_reduce_round_ps(A, B, R)					\
+  ((__m512) __builtin_ia32_reduceps512_mask_round ((__v16sf)(__m512)(A),\
+    (int)(B), (__v16sf)_mm512_setzero_ps (), (__mmask16)-1, (R)))
+
 #define _mm512_mask_reduce_ps(W, U, A, B)				\
   ((__m512) __builtin_ia32_reduceps512_mask ((__v16sf)(__m512)(A),	\
     (int)(B), (__v16sf)(__m512)(W), (__mmask16)(U)))
 
+#define _mm512_mask_reduce_round_ps(W, U, A, B, R)			\
+  ((__m512) __builtin_ia32_reduceps512_mask_round ((__v16sf)(__m512)(A),\
+    (int)(B), (__v16sf)(__m512)(W), (U), (R)))
+
 #define _mm512_maskz_reduce_ps(U, A, B)					\
   ((__m512) __builtin_ia32_reduceps512_mask ((__v16sf)(__m512)(A),	\
     (int)(B), (__v16sf)_mm512_setzero_ps (), (__mmask16)(U)))
 
+#define _mm512_maskz_reduce_round_ps(U, A, B, R)			\
+  ((__m512) __builtin_ia32_reduceps512_mask_round ((__v16sf)(__m512)(A),\
+    (int)(B), (__v16sf)_mm512_setzero_ps (), (__mmask16)(U), (R)))
+
 #define _mm512_extractf32x8_ps(X, C)                                    \
   ((__m256) __builtin_ia32_extractf32x8_mask ((__v16sf)(__m512) (X),    \
     (int) (C), (__v8sf)(__m256) _mm256_setzero_ps (), (__mmask8)-1))
@@ -2679,6 +2838,20 @@ _mm512_fpclass_ps_mask (__m512 __A, const int __imm)
     (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_setzero_pd (),		\
     (__mmask8)(U)))
 
+#define _mm_reduce_round_sd(A, B, C, R)				       \
+  ((__m128d) __builtin_ia32_reducesd_round ((__v2df)(__m128d)(A),      \
+    (__v2df)(__m128d)(B), (int)(C), (__mmask8)(U), (int)(R)))
+
+#define _mm_mask_reduce_round_sd(W, U, A, B, C, R)		       \
+  ((__m128d) __builtin_ia32_reducesd_mask_round ((__v2df)(__m128d)(A), \
+    (__v2df)(__m128d)(B), (int)(C), (__v2df)(__m128d)(W),	       \
+    (__mmask8)(U), (int)(R)))
+
+#define _mm_maskz_reduce_round_sd(U, A, B, C, R)		       \
+  ((__m128d) __builtin_ia32_reducesd_mask_round ((__v2df)(__m128d)(A), \
+    (__v2df)(__m128d)(B), (int)(C), (__v2df) _mm_setzero_pd (),	       \
+    (__mmask8)(U), (int)(R)))
+
 #define _mm_reduce_ss(A, B, C)						\
   ((__m128) __builtin_ia32_reducess_mask ((__v4sf)(__m128)(A),		\
     (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_setzero_ps (),		\
@@ -2693,6 +2866,19 @@ _mm512_fpclass_ps_mask (__m512 __A, const int __imm)
     (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_setzero_ps (),		\
     (__mmask8)(U)))
 
+#define _mm_reduce_round_ss(A, B, C, R)				       \
+  ((__m128) __builtin_ia32_reducess_round ((__v4sf)(__m128)(A),	       \
+    (__v4sf)(__m128)(B), (int)(C), (__mmask8)(U), (int)(R)))
+
+#define _mm_mask_reduce_round_ss(W, U, A, B, C, R)		       \
+  ((__m128) __builtin_ia32_reducess_mask_round ((__v4sf)(__m128)(A),   \
+    (__v4sf)(__m128)(B), (int)(C), (__v4sf)(__m128)(W),		       \
+    (__mmask8)(U), (int)(R)))
+
+#define _mm_maskz_reduce_round_ss(U, A, B, C, R)		       \
+  ((__m128) __builtin_ia32_reducesd_mask_round ((__v4sf)(__m128)(A),   \
+    (__v4sf)(__m128)(B), (int)(C), (__v4sf) _mm_setzero_ps (),	       \
+    (__mmask8)(U), (int)(R)))
 
 
 #endif
diff --git a/gcc/config/i386/avx512erintrin.h b/gcc/config/i386/avx512erintrin.h
index b9804c9..6ec8ee2 100644
--- a/gcc/config/i386/avx512erintrin.h
+++ b/gcc/config/i386/avx512erintrin.h
@@ -168,6 +168,30 @@ _mm_rcp28_round_sd (__m128d __A, __m128d __B, int __R)
 						 __R);
 }
 
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_rcp28_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
+			 __m128d __B, int __R)
+{
+  return (__m128d) __builtin_ia32_rcp28sd_mask_round ((__v2df) __B,
+						      (__v2df) __A,
+						      (__v2df) __W,
+						      __U,
+						      __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_rcp28_round_sd (__mmask8 __U, __m128d __A, __m128d __B, int __R)
+{
+  return (__m128d) __builtin_ia32_rcp28sd_mask_round ((__v2df) __B,
+						      (__v2df) __A,
+						      (__v2df)
+						      _mm_setzero_pd (),
+						      __U,
+						      __R);
+}
+
 extern __inline __m128
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_rcp28_round_ss (__m128 __A, __m128 __B, int __R)
@@ -177,6 +201,30 @@ _mm_rcp28_round_ss (__m128 __A, __m128 __B, int __R)
 						__R);
 }
 
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_rcp28_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
+			 __m128 __B, int __R)
+{
+  return (__m128) __builtin_ia32_rcp28ss_mask_round ((__v4sf) __B,
+						     (__v4sf) __A,
+						     (__v4sf) __W,
+						     __U,
+						     __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_rcp28_round_ss (__mmask8 __U, __m128 __A, __m128 __B, int __R)
+{
+  return (__m128) __builtin_ia32_rcp28ss_mask_round ((__v4sf) __B,
+						     (__v4sf) __A,
+						     (__v4sf)
+						     _mm_setzero_ps (),
+						     __U,
+						     __R);
+}
+
 extern __inline __m512d
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_rsqrt28_round_pd (__m512d __A, int __R)
@@ -242,6 +290,30 @@ _mm_rsqrt28_round_sd (__m128d __A, __m128d __B, int __R)
 						   __R);
 }
 
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_rsqrt28_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
+			   __m128d __B, int __R)
+{
+  return (__m128d) __builtin_ia32_rsqrt28sd_mask_round ((__v2df) __B,
+							(__v2df) __A,
+							(__v2df) __W,
+							__U,
+							__R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_rsqrt28_round_sd (__mmask8 __U, __m128d __A, __m128d __B, int __R)
+{
+  return (__m128d) __builtin_ia32_rsqrt28sd_mask_round ((__v2df) __B,
+							(__v2df) __A,
+							(__v2df)
+							_mm_setzero_pd (),
+							__U,
+							__R);
+}
+
 extern __inline __m128
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_rsqrt28_round_ss (__m128 __A, __m128 __B, int __R)
@@ -251,6 +323,30 @@ _mm_rsqrt28_round_ss (__m128 __A, __m128 __B, int __R)
 						  __R);
 }
 
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_rsqrt28_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
+			   __m128 __B, int __R)
+{
+  return (__m128) __builtin_ia32_rsqrt28ss_mask_round ((__v4sf) __B,
+						       (__v4sf) __A,
+						       (__v4sf) __W,
+						       __U,
+						       __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_rsqrt28_round_ss (__mmask8 __U, __m128 __A, __m128 __B, int __R)
+{
+  return (__m128) __builtin_ia32_rsqrt28ss_mask_round ((__v4sf) __B,
+						       (__v4sf) __A,
+						       (__v4sf)
+						       _mm_setzero_ps (),
+						       __U,
+						       __R);
+}
+
 #else
 #define _mm512_exp2a23_round_pd(A, C)            \
     __builtin_ia32_exp2pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
@@ -309,17 +405,69 @@ _mm_rsqrt28_round_ss (__m128 __A, __m128 __B, int __R)
 #define _mm_rcp28_round_sd(A, B, R)	\
     __builtin_ia32_rcp28sd_round(A, B, R)
 
+#define _mm_mask_rcp28_round_sd(W, U, A, B, R)	\
+    __builtin_ia32_rcp28sd_mask_round ((A), (B), (W), (U), (R))
+
+#define _mm_maskz_rcp28_round_sd(U, A, B, R)	\
+    __builtin_ia32_rcp28sd_mask_round ((A), (B), (__v2df) _mm_setzero_pd (), \
+				       (U), (R))
+
 #define _mm_rcp28_round_ss(A, B, R)	\
     __builtin_ia32_rcp28ss_round(A, B, R)
 
+#define _mm_mask_rcp28_round_ss(W, U, A, B, R)	\
+    __builtin_ia32_rcp28ss_mask_round ((A), (B), (W), (U), (R))
+
+#define _mm_maskz_rcp28_round_ss(U, A, B, R)	\
+    __builtin_ia32_rcp28ss_mask_round ((A), (B), (__v4sf) _mm_setzero_ps (), \
+				       (U), (R))
+
 #define _mm_rsqrt28_round_sd(A, B, R)	\
     __builtin_ia32_rsqrt28sd_round(A, B, R)
 
+#define _mm_mask_rsqrt28_round_sd(W, U, A, B, R)	\
+    __builtin_ia32_rsqrt28sd_mask_round ((A), (B), (W), (U), (R))
+
+#define _mm_maskz_rsqrt28_round_sd(U, A, B, R)	\
+    __builtin_ia32_rsqrt28sd_mask_round ((A), (B), (__v2df) _mm_setzero_pd (),\
+					 (U), (R))
+
 #define _mm_rsqrt28_round_ss(A, B, R)	\
     __builtin_ia32_rsqrt28ss_round(A, B, R)
 
+#define _mm_mask_rsqrt28_round_ss(W, U, A, B, R)	\
+    __builtin_ia32_rsqrt28ss_mask_round ((A), (B), (W), (U), (R))
+
+#define _mm_maskz_rsqrt28_round_ss(U, A, B, R)	\
+    __builtin_ia32_rsqrt28ss_mask_round ((A), (B), (__v4sf) _mm_setzero_ps (),\
+					 (U), (R))
+
 #endif
 
+#define _mm_mask_rcp28_sd(W, U, A, B)\
+    _mm_mask_rcp28_round_sd ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_maskz_rcp28_sd(U, A, B)\
+    _mm_maskz_rcp28_round_sd ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_mask_rcp28_ss(W, U, A, B)\
+    _mm_mask_rcp28_round_ss ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_maskz_rcp28_ss(U, A, B)\
+    _mm_maskz_rcp28_round_ss ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_mask_rsqrt28_sd(W, U, A, B)\
+    _mm_mask_rsqrt28_round_sd ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_maskz_rsqrt28_sd(U, A, B)\
+    _mm_maskz_rsqrt28_round_sd ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_mask_rsqrt28_ss(W, U, A, B)\
+    _mm_mask_rsqrt28_round_ss ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_maskz_rsqrt28_ss(U, A, B)\
+    _mm_maskz_rsqrt28_round_ss ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
+
 #define _mm512_exp2a23_pd(A)                    \
     _mm512_exp2a23_round_pd(A, _MM_FROUND_CUR_DIRECTION)
 
diff --git a/gcc/config/i386/avx512fintrin.h b/gcc/config/i386/avx512fintrin.h
index 729d568..6342fde 100644
--- a/gcc/config/i386/avx512fintrin.h
+++ b/gcc/config/i386/avx512fintrin.h
@@ -2124,6 +2124,18 @@ _mm_maskz_sqrt_round_ss (__mmask8 __U, __m128 __A, __m128 __B, const int __R)
 	(__v4sf) _mm_setzero_ps (), U, C)
 #endif
 
+#define _mm_mask_sqrt_sd(W, U, A, B) \
+    _mm_mask_sqrt_round_sd ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_maskz_sqrt_sd(U, A, B) \
+    _mm_maskz_sqrt_round_sd ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_mask_sqrt_ss(W, U, A, B) \
+    _mm_mask_sqrt_round_ss ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_maskz_sqrt_ss(U, A, B) \
+    _mm_maskz_sqrt_round_ss ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
+
 extern __inline __m512i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_cvtepi8_epi32 (__m128i __A)
@@ -3259,6 +3271,18 @@ _mm_maskz_scalef_round_ss (__mmask8 __U, __m128 __A, __m128 __B, const int __R)
 	(__v4sf)_mm_setzero_ps (), -1, C)
 #endif
 
+#define _mm_mask_scalef_sd(W, U, A, B) \
+    _mm_mask_scalef_round_sd ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_maskz_scalef_sd(U, A, B) \
+    _mm_maskz_scalef_round_sd ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_mask_scalef_ss(W, U, A, B) \
+    _mm_mask_scalef_round_ss ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_maskz_scalef_ss(U, A, B) \
+    _mm_maskz_scalef_round_ss ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
+
 #ifdef __OPTIMIZE__
 extern __inline __m512d
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
@@ -8621,6 +8645,30 @@ _mm_cvt_roundsd_ss (__m128 __A, __m128d __B, const int __R)
 						 __R);
 }
 
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvt_roundsd_ss (__m128 __W, __mmask8 __U, __m128 __A,
+			 __m128d __B, const int __R)
+{
+  return (__m128) __builtin_ia32_cvtsd2ss_mask_round ((__v4sf) __A,
+						      (__v2df) __B,
+						      (__v4sf) __W,
+						      __U,
+						      __R);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvt_roundsd_ss (__mmask8 __U, __m128 __A,
+			 __m128d __B, const int __R)
+{
+  return (__m128) __builtin_ia32_cvtsd2ss_mask_round ((__v4sf) __A,
+						      (__v2df) __B,
+						      _mm_setzero_ps (),
+						      __U,
+						      __R);
+}
+
 extern __inline __m128d
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cvt_roundss_sd (__m128d __A, __m128 __B, const int __R)
@@ -8629,6 +8677,30 @@ _mm_cvt_roundss_sd (__m128d __A, __m128 __B, const int __R)
 						  (__v4sf) __B,
 						  __R);
 }
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvt_roundss_sd (__m128d __W, __mmask8 __U, __m128d __A,
+			 __m128 __B, const int __R)
+{
+  return (__m128d) __builtin_ia32_cvtss2sd_mask_round ((__v2df) __A,
+						       (__v4sf) __B,
+						       (__v2df) __W,
+						       __U,
+						       __R);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvt_roundss_sd (__mmask8 __U, __m128d __A,
+			  __m128 __B, const int __R)
+{
+  return (__m128d) __builtin_ia32_cvtss2sd_mask_round ((__v2df) __A,
+						       (__v4sf) __B,
+						       _mm_setzero_pd (),
+						       __U,
+						       __R);
+}
 #else
 #define _mm512_cvt_roundpd_ps(A, B)		 \
     (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_undefined_ps(), -1, B)
@@ -8642,10 +8714,37 @@ _mm_cvt_roundss_sd (__m128d __A, __m128 __B, const int __R)
 #define _mm_cvt_roundsd_ss(A, B, C)		 \
     (__m128)__builtin_ia32_cvtsd2ss_round(A, B, C)
 
+#define _mm_mask_cvt_roundsd_ss(W, U, A, B, C)	\
+    (__m128)__builtin_ia32_cvtsd2ss_mask_round ((A), (B), (W), (U), (C))
+
+#define _mm_maskz_cvt_roundsd_ss(U, A, B, C)	\
+    (__m128)__builtin_ia32_cvtsd2ss_mask_round ((A), (B), _mm_setzero_ps (), \
+						(U), (C))
+
 #define _mm_cvt_roundss_sd(A, B, C)		 \
     (__m128d)__builtin_ia32_cvtss2sd_round(A, B, C)
+
+#define _mm_mask_cvt_roundss_sd(W, U, A, B, C)	\
+    (__m128d)__builtin_ia32_cvtss2sd_mask_round ((A), (B), (W), (U), (C))
+
+#define _mm_maskz_cvt_roundss_sd(U, A, B, C)	\
+    (__m128d)__builtin_ia32_cvtss2sd_mask_round ((A), (B), _mm_setzero_pd (), \
+						 (U), (C))
+
 #endif
 
+#define _mm_mask_cvtss_sd(W, U, A, B) \
+    _mm_mask_cvt_roundss_sd ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_maskz_cvtss_sd(U, A, B) \
+    _mm_maskz_cvt_roundss_sd ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_mask_cvtsd_ss(W, U, A, B) \
+    _mm_mask_cvt_roundsd_ss ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_maskz_cvtsd_ss(U, A, B) \
+    _mm_maskz_cvt_roundsd_ss ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
+
 extern __inline void
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_stream_si512 (__m512i * __P, __m512i __A)
@@ -14265,6 +14364,14 @@ _mm_cvttss_i64 (__m128 __A)
 }
 #endif /* __x86_64__ */
 
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtsi512_si32 (__m512i __A)
+{
+  __v16si __B = (__v16si) __A;
+  return __B[0];
+}
+
 extern __inline unsigned
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cvtss_u32 (__m128 __A)
@@ -14289,6 +14396,34 @@ _mm_cvttss_i32 (__m128 __A)
 					    _MM_FROUND_CUR_DIRECTION);
 }
 
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsd_i32 (__m128d __A)
+{
+  return (int) __builtin_ia32_cvtsd2si ((__v2df) __A);
+}
+
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtss_i32 (__m128 __A)
+{
+  return (int) __builtin_ia32_cvtss2si ((__v4sf) __A);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvti32_sd (__m128d __A, int __B)
+{
+  return (__m128d) __builtin_ia32_cvtsi2sd ((__v2df) __A, __B);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvti32_ss (__m128 __A, int __B)
+{
+  return (__m128) __builtin_ia32_cvtsi2ss ((__v4sf) __A, __B);
+}
+
 #ifdef __x86_64__
 extern __inline unsigned long long
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
@@ -14315,6 +14450,34 @@ _mm_cvttsd_i64 (__m128d __A)
   return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A,
 						  _MM_FROUND_CUR_DIRECTION);
 }
+
+extern __inline long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtsd_i64 (__m128d __A)
+{
+  return (long long) __builtin_ia32_cvtsd2si64 ((__v2df) __A);
+}
+
+extern __inline long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtss_i64 (__m128 __A)
+{
+  return (long long) __builtin_ia32_cvtss2si64 ((__v4sf) __A);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvti64_sd (__m128d __A, long long __B)
+{
+  return (__m128d) __builtin_ia32_cvtsi642sd ((__v2df) __A, __B);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvti64_ss (__m128 __A, long long __B)
+{
+  return (__m128) __builtin_ia32_cvtsi642ss ((__v4sf) __A, __B);
+}
 #endif /* __x86_64__ */
 
 extern __inline unsigned
diff --git a/gcc/config/i386/avx512vlbwintrin.h b/gcc/config/i386/avx512vlbwintrin.h
index cd4275e..b4b1d7f 100644
--- a/gcc/config/i386/avx512vlbwintrin.h
+++ b/gcc/config/i386/avx512vlbwintrin.h
@@ -34,6 +34,15 @@
 #define __DISABLE_AVX512VLBW__
 #endif /* __AVX512VLBW__ */
 
+/* Internal data types for implementing the intrinsics.  */
+typedef short __v16hi_u __attribute__ ((__vector_size__ (32),	\
+					__may_alias__, __aligned__ (1)));
+typedef short __v8hi_u __attribute__ ((__vector_size__ (16),	\
+				       __may_alias__, __aligned__ (1)));
+typedef char __v32qi_u __attribute__ ((__vector_size__ (32),	\
+				       __may_alias__, __aligned__ (1)));
+typedef char __v16qi_u __attribute__ ((__vector_size__ (16),	\
+				       __may_alias__, __aligned__ (1)));
 
 extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
@@ -75,6 +84,13 @@ _mm_maskz_mov_epi8 (__mmask16 __U, __m128i __A)
 
 extern __inline void
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_storeu_epi8 (void *__P, __m256i __A)
+{
+  *(__v32qi_u *) __P = (__v32qi_u) __A;
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_mask_storeu_epi8 (void *__P, __mmask32 __U, __m256i __A)
 {
   __builtin_ia32_storedquqi256_mask ((char *) __P,
@@ -84,6 +100,13 @@ _mm256_mask_storeu_epi8 (void *__P, __mmask32 __U, __m256i __A)
 
 extern __inline void
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_storeu_epi8 (void *__P, __m128i __A)
+{
+  *(__v16qi_u *) __P = (__v16qi_u) __A;
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mask_storeu_epi8 (void *__P, __mmask16 __U, __m128i __A)
 {
   __builtin_ia32_storedquqi128_mask ((char *) __P,
@@ -93,6 +116,13 @@ _mm_mask_storeu_epi8 (void *__P, __mmask16 __U, __m128i __A)
 
 extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_loadu_epi16 (void const *__P)
+{
+  return (__m256i) (*(__v16hi_u *) __P);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_mask_loadu_epi16 (__m256i __W, __mmask16 __U, void const *__P)
 {
   return (__m256i) __builtin_ia32_loaddquhi256_mask ((const short *) __P,
@@ -112,6 +142,13 @@ _mm256_maskz_loadu_epi16 (__mmask16 __U, void const *__P)
 
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_loadu_epi16 (void const *__P)
+{
+  return (__m128i) (*(__v8hi_u *) __P);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mask_loadu_epi16 (__m128i __W, __mmask8 __U, void const *__P)
 {
   return (__m128i) __builtin_ia32_loaddquhi128_mask ((const short *) __P,
@@ -170,6 +207,13 @@ _mm_maskz_mov_epi16 (__mmask8 __U, __m128i __A)
 
 extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_loadu_epi8 (void const *__P)
+{
+  return (__m256i) (*(__v32qi_u *) __P);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_mask_loadu_epi8 (__m256i __W, __mmask32 __U, void const *__P)
 {
   return (__m256i) __builtin_ia32_loaddquqi256_mask ((const char *) __P,
@@ -189,6 +233,13 @@ _mm256_maskz_loadu_epi8 (__mmask32 __U, void const *__P)
 
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_loadu_epi8 (void const *__P)
+{
+  return (__m128i) (*(__v16qi_u *) __P);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mask_loadu_epi8 (__m128i __W, __mmask16 __U, void const *__P)
 {
   return (__m128i) __builtin_ia32_loaddquqi128_mask ((const char *) __P,
@@ -3710,6 +3761,13 @@ _mm256_cmple_epu16_mask (__m256i __X, __m256i __Y)
 
 extern __inline void
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_storeu_epi16 (void *__P, __m256i __A)
+{
+  *(__v16hi_u *) __P = (__v16hi_u) __A;
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_mask_storeu_epi16 (void *__P, __mmask16 __U, __m256i __A)
 {
   __builtin_ia32_storedquhi256_mask ((short *) __P,
@@ -3719,6 +3777,13 @@ _mm256_mask_storeu_epi16 (void *__P, __mmask16 __U, __m256i __A)
 
 extern __inline void
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_storeu_epi16 (void *__P, __m128i __A)
+{
+  *(__v8hi_u *) __P = (__v8hi_u) __A;
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mask_storeu_epi16 (void *__P, __mmask8 __U, __m128i __A)
 {
   __builtin_ia32_storedquhi128_mask ((short *) __P,
diff --git a/gcc/config/i386/avx512vlintrin.h b/gcc/config/i386/avx512vlintrin.h
index 7abd601..99666c7 100644
--- a/gcc/config/i386/avx512vlintrin.h
+++ b/gcc/config/i386/avx512vlintrin.h
@@ -36,6 +36,14 @@
 
 /* Internal data types for implementing the intrinsics.  */
 typedef unsigned int __mmask32;
+typedef int __v4si_u __attribute__ ((__vector_size__ (16),	\
+				     __may_alias__, __aligned__ (1)));
+typedef int __v8si_u __attribute__ ((__vector_size__ (32),	\
+				     __may_alias__, __aligned__ (1)));
+typedef long long __v2di_u __attribute__ ((__vector_size__ (16),	\
+					   __may_alias__, __aligned__ (1)));
+typedef long long __v4di_u __attribute__ ((__vector_size__ (32),	\
+					   __may_alias__, __aligned__ (1)));
 
 extern __inline __m256d
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
@@ -265,6 +273,13 @@ _mm_maskz_mov_epi64 (__mmask8 __U, __m128i __A)
 
 extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_load_epi64 (void const *__P)
+{
+  return (__m256i) (*(__v4di *) __P);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_mask_load_epi64 (__m256i __W, __mmask8 __U, void const *__P)
 {
   return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P,
@@ -286,6 +301,13 @@ _mm256_maskz_load_epi64 (__mmask8 __U, void const *__P)
 
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_load_epi64 (void const *__P)
+{
+  return (__m128i) (*(__v2di *) __P);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mask_load_epi64 (__m128i __W, __mmask8 __U, void const *__P)
 {
   return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P,
@@ -363,6 +385,13 @@ _mm_maskz_mov_epi32 (__mmask8 __U, __m128i __A)
 
 extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_load_epi32 (void const *__P)
+{
+  return (__m256i) (*(__v8si *) __P);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_mask_load_epi32 (__m256i __W, __mmask8 __U, void const *__P)
 {
   return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P,
@@ -384,6 +413,13 @@ _mm256_maskz_load_epi32 (__mmask8 __U, void const *__P)
 
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_load_epi32 (void const *__P)
+{
+  return (__m128i) (*(__v4si *) __P);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mask_load_epi32 (__m128i __W, __mmask8 __U, void const *__P)
 {
   return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P,
@@ -405,6 +441,13 @@ _mm_maskz_load_epi32 (__mmask8 __U, void const *__P)
 
 extern __inline void
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_store_epi32 (void *__P, __m256i __A)
+{
+  *(__v8si *) __P = (__v8si) __A;
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_mask_store_epi32 (void *__P, __mmask8 __U, __m256i __A)
 {
   __builtin_ia32_movdqa32store256_mask ((__v8si *) __P,
@@ -414,6 +457,13 @@ _mm256_mask_store_epi32 (void *__P, __mmask8 __U, __m256i __A)
 
 extern __inline void
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_store_epi32 (void *__P, __m128i __A)
+{
+  *(__v4si *) __P = (__v4si) __A;
+}
+
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mask_store_epi32 (void *__P, __mmask8 __U, __m128i __A)
 {
   __builtin_ia32_movdqa32store128_mask ((__v4si *) __P,
@@ -719,6 +769,13 @@ _mm_mask_storeu_ps (void *__P, __mmask8 __U, __m128 __A)
 
 extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_loadu_epi64 (void const *__P)
+{
+  return (__m256i) (*(__v4di_u *) __P);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_mask_loadu_epi64 (__m256i __W, __mmask8 __U, void const *__P)
 {
   return (__m256i) __builtin_ia32_loaddqudi256_mask ((const long long *) __P,
@@ -738,6 +795,13 @@ _mm256_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
 
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_loadu_epi64 (void const *__P)
+{
+  return (__m128i) (*(__v2di_u *) __P);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mask_loadu_epi64 (__m128i __W, __mmask8 __U, void const *__P)
 {
   return (__m128i) __builtin_ia32_loaddqudi128_mask ((const long long *) __P,
@@ -789,6 +853,13 @@ _mm_mask_storeu_epi64 (void *__P, __mmask8 __U, __m128i __A)
 
 extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_loadu_epi32 (void const *__P)
+{
+  return (__m256i) (*(__v8si_u *) __P);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_mask_loadu_epi32 (__m256i __W, __mmask8 __U, void const *__P)
 {
   return (__m256i) __builtin_ia32_loaddqusi256_mask ((const int *) __P,
@@ -808,6 +879,13 @@ _mm256_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
 
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_loadu_epi32 (void const *__P)
+{
+  return (__m128i) (*(__v4si_u *) __P);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mask_loadu_epi32 (__m128i __W, __mmask8 __U, void const *__P)
 {
   return (__m128i) __builtin_ia32_loaddqusi128_mask ((const int *) __P,
@@ -13730,6 +13808,13 @@ _mm256_permutex_pd (__m256d __X, const int __M)
 #endif
 
 #define _mm256_permutexvar_ps(A, B)	_mm256_permutevar8x32_ps ((B), (A))
+#define _mm256_mask_cvt_roundps_ph(A, B, C, D)	\
+  _mm256_mask_cvtps_ph ((A), (B), (C), (D))
+#define _mm256_maskz_cvt_roundps_ph(A, B, C)	\
+  _mm256_maskz_cvtps_ph ((A), (B), (C))
+#define _mm_mask_cvt_roundps_ph(A, B, C, D)	\
+  _mm_mask_cvtps_ph ((A), (B), (C), (D))
+#define _mm_maskz_cvt_roundps_ph(A, B, C) _mm_maskz_cvtps_ph ((A), (B), (C))
 
 #ifdef __DISABLE_AVX512VL__
 #undef __DISABLE_AVX512VL__
diff --git a/gcc/config/i386/avx512vp2intersectintrin.h b/gcc/config/i386/avx512vp2intersectintrin.h
index 60cb52c..f368d83 100644
--- a/gcc/config/i386/avx512vp2intersectintrin.h
+++ b/gcc/config/i386/avx512vp2intersectintrin.h
@@ -1,3 +1,26 @@
+/* Copyright (C) 2019-2020 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
 #if !defined _IMMINTRIN_H_INCLUDED
 #error "Never use <avx512vp2intersectintrin.h> directly; include <immintrin.h> instead."
 #endif
diff --git a/gcc/config/i386/avx512vp2intersectvlintrin.h b/gcc/config/i386/avx512vp2intersectvlintrin.h
index 26eee36..f657840 100644
--- a/gcc/config/i386/avx512vp2intersectvlintrin.h
+++ b/gcc/config/i386/avx512vp2intersectvlintrin.h
@@ -1,3 +1,26 @@
+/* Copyright (C) 2019-2020 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
 #if !defined _IMMINTRIN_H_INCLUDED
 #error "Never use <avx512vp2intersectintrin.h> directly; include <immintrin.h> instead."
 #endif
diff --git a/gcc/config/i386/avxintrin.h b/gcc/config/i386/avxintrin.h
index 22b2bae..fd5cf6a 100644
--- a/gcc/config/i386/avxintrin.h
+++ b/gcc/config/i386/avxintrin.h
@@ -444,6 +444,13 @@ _mm_cmp_ss (__m128 __X, __m128 __Y, const int __P)
 				  (__v4sf)(__m128)(Y), (int)(P)))
 #endif
 
+extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtsi256_si32 (__m256i __A)
+{
+  __v8si __B = (__v8si) __A;
+  return __B[0];
+}
+
 extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_cvtepi32_pd (__m128i __A)
 {
diff --git a/gcc/config/i386/bmi2intrin.h b/gcc/config/i386/bmi2intrin.h
index c5de9eb..9fdd08c 100644
--- a/gcc/config/i386/bmi2intrin.h
+++ b/gcc/config/i386/bmi2intrin.h
@@ -21,8 +21,8 @@
    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
    <http://www.gnu.org/licenses/>.  */
 
-#if !defined _X86INTRIN_H_INCLUDED && !defined _IMMINTRIN_H_INCLUDED
-# error "Never use <bmi2intrin.h> directly; include <x86intrin.h> instead."
+#ifndef _X86GPRINTRIN_H_INCLUDED
+# error "Never use <bmi2intrin.h> directly; include <x86gprintrin.h> instead."
 #endif
 
 #ifndef _BMI2INTRIN_H_INCLUDED
diff --git a/gcc/config/i386/bmiintrin.h b/gcc/config/i386/bmiintrin.h
index 8ba6e5b..5bd712a 100644
--- a/gcc/config/i386/bmiintrin.h
+++ b/gcc/config/i386/bmiintrin.h
@@ -21,8 +21,8 @@
    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
    <http://www.gnu.org/licenses/>.  */
 
-#if !defined _X86INTRIN_H_INCLUDED && !defined _IMMINTRIN_H_INCLUDED
-# error "Never use <bmiintrin.h> directly; include <x86intrin.h> instead."
+#ifndef _X86GPRINTRIN_H_INCLUDED
+# error "Never use <bmiintrin.h> directly; include <x86gprintrin.h> instead."
 #endif
 
 #ifndef _BMIINTRIN_H_INCLUDED
diff --git a/gcc/config/i386/cetintrin.h b/gcc/config/i386/cetintrin.h
index 095bbe0..81c4d72 100644
--- a/gcc/config/i386/cetintrin.h
+++ b/gcc/config/i386/cetintrin.h
@@ -21,8 +21,8 @@
    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
    <http://www.gnu.org/licenses/>.  */
 
-#if !defined _IMMINTRIN_H_INCLUDED
-# error "Never use <cetintrin.h> directly; include <x86intrin.h> instead."
+#ifndef _X86GPRINTRIN_H_INCLUDED
+# error "Never use <cetintrin.h> directly; include <x86gprintrin.h> instead."
 #endif
 
 #ifndef _CETINTRIN_H_INCLUDED
diff --git a/gcc/config/i386/cldemoteintrin.h b/gcc/config/i386/cldemoteintrin.h
index 8c0feca..0c31c35 100644
--- a/gcc/config/i386/cldemoteintrin.h
+++ b/gcc/config/i386/cldemoteintrin.h
@@ -21,8 +21,8 @@
    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
    <http://www.gnu.org/licenses/>.  */
 
-#if !defined _IMMINTRIN_H_INCLUDED
-# error "Never use <cldemoteintrin.h> directly; include <immintrin.h> instead."
+#ifndef _X86GPRINTRIN_H_INCLUDED
+# error "Never use <cldemoteintrin.h> directly; include <x86gprintrin.h> instead."
 #endif
 
 #ifndef _CLDEMOTE_H_INCLUDED
diff --git a/gcc/config/i386/clflushoptintrin.h b/gcc/config/i386/clflushoptintrin.h
index 037f044..a3697f0 100644
--- a/gcc/config/i386/clflushoptintrin.h
+++ b/gcc/config/i386/clflushoptintrin.h
@@ -21,8 +21,8 @@
    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
    <http://www.gnu.org/licenses/>.  */
 
-#if !defined _IMMINTRIN_H_INCLUDED
-# error "Never use <clflushoptintrin.h> directly; include <immintrin.h> instead."
+#ifndef _X86GPRINTRIN_H_INCLUDED
+# error "Never use <clflushoptintrin.h> directly; include <x86gprintrin.h> instead."
 #endif
 
 #ifndef _CLFLUSHOPTINTRIN_H_INCLUDED
diff --git a/gcc/config/i386/clwbintrin.h b/gcc/config/i386/clwbintrin.h
index 84d0939..3f83962 100644
--- a/gcc/config/i386/clwbintrin.h
+++ b/gcc/config/i386/clwbintrin.h
@@ -21,8 +21,8 @@
    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
    <http://www.gnu.org/licenses/>.  */
 
-#if !defined _IMMINTRIN_H_INCLUDED
-# error "Never use <clwbintrin.h> directly; include <immintrin.h> instead."
+#ifndef _X86GPRINTRIN_H_INCLUDED
+# error "Never use <clwbintrin.h> directly; include <x86gprintrin.h> instead."
 #endif
 
 #ifndef _CLWBINTRIN_H_INCLUDED
diff --git a/gcc/config/i386/constraints.md b/gcc/config/i386/constraints.md
index af37f5c..0b902d5 100644
--- a/gcc/config/i386/constraints.md
+++ b/gcc/config/i386/constraints.md
@@ -183,6 +183,10 @@
   "@internal Memory operand without REX prefix."
   (match_operand 0 "norex_memory_operand"))
 
+(define_special_memory_constraint "Br"
+  "@internal bcst memory operand."
+  (match_operand 0 "bcst_mem_operand"))
+
 (define_constraint "Bs"
   "@internal Sibcall memory operand."
   (ior (and (not (match_test "TARGET_INDIRECT_BRANCH_REGISTER"))
diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h
index bca61d62..22d284e 100644
--- a/gcc/config/i386/cpuid.h
+++ b/gcc/config/i386/cpuid.h
@@ -26,6 +26,7 @@
 
 /* %eax */
 #define bit_AVX512BF16	(1 << 5)
+#define bit_HRESET	(1 << 22)
 
 /* %ecx */
 #define bit_SSE3	(1 << 0)
@@ -124,9 +125,13 @@
 #define bit_AVX5124FMAPS (1 << 3)
 #define bit_AVX512VP2INTERSECT	(1 << 8)
 #define bit_IBT	(1 << 20)
+#define bit_UINTR (1 << 5)
 #define bit_PCONFIG	(1 << 18)
 #define bit_SERIALIZE	(1 << 14)
 #define bit_TSXLDTRK    (1 << 16)
+#define bit_AMX_BF16    (1 << 22)
+#define bit_AMX_TILE    (1 << 24)
+#define bit_AMX_INT8    (1 << 25)
 
 /* XFEATURE_ENABLED_MASK register bits (%eax == 0xd, %ecx == 0) */
 #define bit_BNDREGS     (1 << 3)
diff --git a/gcc/config/i386/emmintrin.h b/gcc/config/i386/emmintrin.h
index 545d3bf..8ff240e 100644
--- a/gcc/config/i386/emmintrin.h
+++ b/gcc/config/i386/emmintrin.h
@@ -715,6 +715,19 @@ _mm_loadu_si64 (void const *__P)
   return _mm_loadl_epi64 ((__m128i_u *)__P);
 }
 
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_loadu_si32 (void const *__P)
+{
+  return _mm_set_epi32 (*(int *)__P, (int)0, (int)0, (int)0);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_loadu_si16 (void const *__P)
+{
+  return _mm_set_epi16 (*(short *)__P, (short)0, (short)0, (short)0,
+			(short)0, (short)0, (short)0, (short)0);
+}
+
 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_store_si128 (__m128i *__P, __m128i __B)
 {
@@ -739,6 +752,18 @@ _mm_storeu_si64 (void *__P, __m128i __B)
   _mm_storel_epi64 ((__m128i_u *)__P, __B);
 }
 
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_storeu_si32 (void *__P, __m128i __B)
+{
+  *(__m32_u *)__P = (__m32) ((__v4si)__B)[0];
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_storeu_si16 (void *__P, __m128i __B)
+{
+  *(__m16_u *)__P = (__m16) ((__v8hi)__B)[0];
+}
+
 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_movepi64_pi64 (__m128i __B)
 {
diff --git a/gcc/config/i386/enqcmdintrin.h b/gcc/config/i386/enqcmdintrin.h
index 4b2efcb..dcb6507 100644
--- a/gcc/config/i386/enqcmdintrin.h
+++ b/gcc/config/i386/enqcmdintrin.h
@@ -21,12 +21,12 @@
    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
    <http://www.gnu.org/licenses/>.  */
 
-#if !defined _IMMINTRIN_H_INCLUDED
-# error "Never use <enqcmdntrin.h> directly; include <x86intrin.h> instead."
+#ifndef _X86GPRINTRIN_H_INCLUDED
+# error "Never use <enqcmdintrin.h> directly; include <x86gprintrin.h> instead."
 #endif
 
-#ifndef _ENQCMDNTRIN_H_INCLUDED
-#define _ENQCMDNTRIN_H_INCLUDED
+#ifndef _ENQCMDINTRIN_H_INCLUDED
+#define _ENQCMDINTRIN_H_INCLUDED
 
 #ifndef __ENQCMD__
 #pragma GCC push_options
@@ -52,4 +52,4 @@ _enqcmds (void * __P, const void * __Q)
 #undef __DISABLE_ENQCMD__
 #pragma GCC pop_options
 #endif /* __DISABLE_ENQCMD__ */
-#endif /* _ENQCMDNTRIN_H_INCLUDED.  */
+#endif /* _ENQCMDINTRIN_H_INCLUDED.  */
diff --git a/gcc/config/i386/fxsrintrin.h b/gcc/config/i386/fxsrintrin.h
index fde05a7..6e059df 100644
--- a/gcc/config/i386/fxsrintrin.h
+++ b/gcc/config/i386/fxsrintrin.h
@@ -21,8 +21,8 @@
    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
    <http://www.gnu.org/licenses/>.  */
 
-#if !defined _IMMINTRIN_H_INCLUDED
-# error "Never use <fxsrintrin.h> directly; include <immintrin.h> instead."
+#ifndef _X86GPRINTRIN_H_INCLUDED
+# error "Never use <fxsrintrin.h> directly; include <x86gprintrin.h> instead."
 #endif
 
 #ifndef _FXSRINTRIN_H_INCLUDED
diff --git a/gcc/config/i386/hresetintrin.h b/gcc/config/i386/hresetintrin.h
new file mode 100644
index 0000000..bdbe253
--- /dev/null
+++ b/gcc/config/i386/hresetintrin.h
@@ -0,0 +1,48 @@
+/* Copyright (C) 2020 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#if !defined _X86GPRINTRIN_H_INCLUDED
+# error "Never use <hresetintrin.h> directly; include <x86gprintrin.h> instead."
+#endif
+
+#ifndef _HRESETINTRIN_H_INCLUDED
+#define _HRESETINTRIN_H_INCLUDED
+
+#ifndef __HRESET__
+#pragma GCC push_options
+#pragma GCC target ("hreset")
+#define __DISABLE_HRESET__
+#endif /* __HRESET__ */
+
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_hreset (unsigned int __EAX)
+{
+  __builtin_ia32_hreset (__EAX);
+}
+
+#ifdef __DISABLE_HRESET__
+#undef __DISABLE_HRESET__
+#pragma GCC pop_options
+#endif /* __DISABLE_HRESET__ */
+#endif /* _HRESETINTRIN_H_INCLUDED.  */
diff --git a/gcc/config/i386/i386-builtin-types.def b/gcc/config/i386/i386-builtin-types.def
index 1adf7c4..964633d 100644
--- a/gcc/config/i386/i386-builtin-types.def
+++ b/gcc/config/i386/i386-builtin-types.def
@@ -194,6 +194,7 @@ DEF_FUNCTION_TYPE (UNSIGNED)
 DEF_FUNCTION_TYPE (UINT)
 DEF_FUNCTION_TYPE (USHORT)
 DEF_FUNCTION_TYPE (INT)
+DEF_FUNCTION_TYPE (UINT8)
 DEF_FUNCTION_TYPE (VOID)
 DEF_FUNCTION_TYPE (PVOID)
 
@@ -443,6 +444,7 @@ DEF_FUNCTION_TYPE (V8DF, V8DF, V8DF, INT)
 DEF_FUNCTION_TYPE (V8DF, V8DF, V8DF, INT, V8DF, UQI)
 DEF_FUNCTION_TYPE (V8DF, V8DF, V8DF, INT, V8DF, QI, INT)
 DEF_FUNCTION_TYPE (V8DF, V8DF, INT, V8DF, UQI)
+DEF_FUNCTION_TYPE (V8DF, V8DF, INT, V8DF, UQI, INT)
 DEF_FUNCTION_TYPE (V8DF, V8DF, V8DF, V8DI, INT)
 DEF_FUNCTION_TYPE (V4DF, V4DF, V4DF, V4DI, INT, UQI)
 DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, V2DI, INT, UQI)
@@ -452,6 +454,7 @@ DEF_FUNCTION_TYPE (V16SF, V16SF, V16SF, INT)
 DEF_FUNCTION_TYPE (V16SF, V16SF, V16SF, INT, V16SF, UHI)
 DEF_FUNCTION_TYPE (V16SF, V16SF, V16SF, INT, V16SF, HI, INT)
 DEF_FUNCTION_TYPE (V16SF, V16SF, INT, V16SF, UHI)
+DEF_FUNCTION_TYPE (V16SF, V16SF, INT, V16SF, UHI, INT)
 DEF_FUNCTION_TYPE (V16SI, V16SI, V4SI, INT, V16SI, UHI)
 DEF_FUNCTION_TYPE (V16SF, V16SF, V16SF, V16SI, INT)
 DEF_FUNCTION_TYPE (V16SF, V16SF, V16SF, V16SI, INT, HI, INT)
@@ -1026,8 +1029,10 @@ DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, V2DF, UQI, INT)
 DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF, V4SF, UQI, INT)
 DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF, V4SF, QI, INT)
 DEF_FUNCTION_TYPE (V4SF, V4SF, V2DF, V4SF, QI, INT)
+DEF_FUNCTION_TYPE (V4SF, V4SF, V2DF, V4SF, UQI, INT)
 DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, V2DF, QI, INT)
 DEF_FUNCTION_TYPE (V2DF, V2DF, V4SF, V2DF, QI, INT)
+DEF_FUNCTION_TYPE (V2DF, V2DF, V4SF, V2DF, UQI, INT)
 DEF_FUNCTION_TYPE (V2DF, V2DF, V2DF, V2DF, INT)
 DEF_FUNCTION_TYPE (V4SF, V4SF, V4SF, V4SF, INT)
 
diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
index fec5cef..882cba5 100644
--- a/gcc/config/i386/i386-builtin.def
+++ b/gcc/config/i386/i386-builtin.def
@@ -452,6 +452,14 @@ BDESC (0, OPTION_MASK_ISA2_SERIALIZE, CODE_FOR_serialize, "__builtin_ia32_serial
 BDESC (0, OPTION_MASK_ISA2_TSXLDTRK, CODE_FOR_xsusldtrk, "__builtin_ia32_xsusldtrk", IX86_BUILTIN_XSUSLDTRK, UNKNOWN, (int) VOID_FTYPE_VOID)
 BDESC (0, OPTION_MASK_ISA2_TSXLDTRK, CODE_FOR_xresldtrk, "__builtin_ia32_xresldtrk", IX86_BUILTIN_XRESLDTRK, UNKNOWN, (int) VOID_FTYPE_VOID)
 
+/* UINTR.  */
+BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_UINTR, CODE_FOR_clui, "__builtin_ia32_clui", IX86_BUILTIN_CLUI, UNKNOWN, (int) VOID_FTYPE_VOID)
+BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_UINTR, CODE_FOR_stui, "__builtin_ia32_stui", IX86_BUILTIN_STUI, UNKNOWN, (int) VOID_FTYPE_VOID)
+BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_UINTR, CODE_FOR_senduipi, "__builtin_ia32_senduipi", IX86_BUILTIN_SENDUIPI, UNKNOWN, (int) VOID_FTYPE_UINT64)
+
+/* HRESET */
+BDESC (0, OPTION_MASK_ISA2_HRESET, CODE_FOR_hreset, "__builtin_ia32_hreset", IX86_BUILTIN_HRESET, UNKNOWN, (int) VOID_FTYPE_UNSIGNED)
+
 BDESC_END (SPECIAL_ARGS, ARGS)
 
 /* Builtins with variable number of arguments.  */
@@ -2772,10 +2780,12 @@ BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_r
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ufix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_cvtsd2ss_round, "__builtin_ia32_cvtsd2ss_round", IX86_BUILTIN_CVTSD2SS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_cvtsd2ss_mask_round, "__builtin_ia32_cvtsd2ss_mask_round", IX86_BUILTIN_CVTSD2SS_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_V4SF_UQI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, 0, CODE_FOR_sse2_cvtsi2sdq_round, "__builtin_ia32_cvtsi2sd64", IX86_BUILTIN_CVTSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT64_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_cvtsi2ss_round, "__builtin_ia32_cvtsi2ss32", IX86_BUILTIN_CVTSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_INT)
 BDESC (OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, 0, CODE_FOR_sse_cvtsi2ssq_round, "__builtin_ia32_cvtsi2ss64", IX86_BUILTIN_CVTSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT64_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_cvtss2sd_round, "__builtin_ia32_cvtss2sd_round", IX86_BUILTIN_CVTSS2SD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_cvtss2sd_mask_round, "__builtin_ia32_cvtss2sd_mask_round", IX86_BUILTIN_CVTSS2SD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_V2DF_UQI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_fixuns_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT)
@@ -2911,13 +2921,21 @@ BDESC (OPTION_MASK_ISA_AVX512ER, 0, CODE_FOR_avx512er_exp2v16sf_mask_round, "__b
 BDESC (OPTION_MASK_ISA_AVX512ER, 0, CODE_FOR_avx512er_rcp28v8df_mask_round, "__builtin_ia32_rcp28pd_mask", IX86_BUILTIN_RCP28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT)
 BDESC (OPTION_MASK_ISA_AVX512ER, 0, CODE_FOR_avx512er_rcp28v16sf_mask_round, "__builtin_ia32_rcp28ps_mask", IX86_BUILTIN_RCP28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT)
 BDESC (OPTION_MASK_ISA_AVX512ER, 0, CODE_FOR_avx512er_vmrcp28v2df_round, "__builtin_ia32_rcp28sd_round", IX86_BUILTIN_RCP28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT)
+BDESC (OPTION_MASK_ISA_AVX512ER, 0, CODE_FOR_avx512er_vmrcp28v2df_mask_round, "__builtin_ia32_rcp28sd_mask_round", IX86_BUILTIN_RCP28SD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT)
 BDESC (OPTION_MASK_ISA_AVX512ER, 0, CODE_FOR_avx512er_vmrcp28v4sf_round, "__builtin_ia32_rcp28ss_round", IX86_BUILTIN_RCP28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT)
+BDESC (OPTION_MASK_ISA_AVX512ER, 0, CODE_FOR_avx512er_vmrcp28v4sf_mask_round, "__builtin_ia32_rcp28ss_mask_round", IX86_BUILTIN_RCP28SS_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT)
 BDESC (OPTION_MASK_ISA_AVX512ER, 0, CODE_FOR_avx512er_rsqrt28v8df_mask_round, "__builtin_ia32_rsqrt28pd_mask", IX86_BUILTIN_RSQRT28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT)
 BDESC (OPTION_MASK_ISA_AVX512ER, 0, CODE_FOR_avx512er_rsqrt28v16sf_mask_round, "__builtin_ia32_rsqrt28ps_mask", IX86_BUILTIN_RSQRT28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT)
 BDESC (OPTION_MASK_ISA_AVX512ER, 0, CODE_FOR_avx512er_vmrsqrt28v2df_round, "__builtin_ia32_rsqrt28sd_round", IX86_BUILTIN_RSQRT28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT)
+BDESC (OPTION_MASK_ISA_AVX512ER, 0, CODE_FOR_avx512er_vmrsqrt28v2df_mask_round, "__builtin_ia32_rsqrt28sd_mask_round", IX86_BUILTIN_RSQRT28SD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT)
 BDESC (OPTION_MASK_ISA_AVX512ER, 0, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT)
+BDESC (OPTION_MASK_ISA_AVX512ER, 0, CODE_FOR_avx512er_vmrsqrt28v4sf_mask_round, "__builtin_ia32_rsqrt28ss_mask_round", IX86_BUILTIN_RSQRT28SS_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT)
 
 /* AVX512DQ.  */
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_reducepv8df_mask_round, "__builtin_ia32_reducepd512_mask_round", IX86_BUILTIN_REDUCEPD512_MASK_ROUND, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_reducepv16sf_mask_round, "__builtin_ia32_reduceps512_mask_round", IX86_BUILTIN_REDUCEPS512_MASK_ROUND, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_UHI_INT)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_reducesv2df_mask_round, "__builtin_ia32_reducesd_mask_round", IX86_BUILTIN_REDUCESD128_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_reducesv4sf_mask_round, "__builtin_ia32_reducess_mask_round", IX86_BUILTIN_REDUCESS128_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI_INT)
 BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_rangesv2df_mask_round, "__builtin_ia32_rangesd128_mask_round", IX86_BUILTIN_RANGESD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI_INT)
 BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_rangesv4sf_mask_round, "__builtin_ia32_rangess128_mask_round", IX86_BUILTIN_RANGESS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI_INT)
 BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_fix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2qq512_mask", IX86_BUILTIN_CVTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT)
diff --git a/gcc/config/i386/i386-builtins.c b/gcc/config/i386/i386-builtins.c
index ca7a870..504987a 100644
--- a/gcc/config/i386/i386-builtins.c
+++ b/gcc/config/i386/i386-builtins.c
@@ -1194,6 +1194,11 @@ ix86_init_mmx_sse_builtins (void)
   def_builtin (0, OPTION_MASK_ISA2_WAITPKG, "__builtin_ia32_tpause",
 	       UINT8_FTYPE_UNSIGNED_UINT64, IX86_BUILTIN_TPAUSE);
 
+  /* UINTR.  */
+  def_builtin (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_UINTR,
+	       "__builtin_ia32_testui",
+	       UINT8_FTYPE_VOID, IX86_BUILTIN_TESTUI);
+
   /* CLDEMOTE.  */
   def_builtin (0, OPTION_MASK_ISA2_CLDEMOTE, "__builtin_ia32_cldemote",
 	       VOID_FTYPE_PCVOID, IX86_BUILTIN_CLDEMOTE);
diff --git a/gcc/config/i386/i386-builtins.h b/gcc/config/i386/i386-builtins.h
index cc6a8ce..a88cc0c 100644
--- a/gcc/config/i386/i386-builtins.h
+++ b/gcc/config/i386/i386-builtins.h
@@ -40,6 +40,7 @@ enum ix86_builtins
   IX86_BUILTIN_UMONITOR,
   IX86_BUILTIN_UMWAIT,
   IX86_BUILTIN_TPAUSE,
+  IX86_BUILTIN_TESTUI,
   IX86_BUILTIN_CLZERO,
   IX86_BUILTIN_CLDEMOTE,
   IX86_BUILTIN_VEC_INIT_V2SI,
diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c
index 3553a37..bbe9ac5 100644
--- a/gcc/config/i386/i386-c.c
+++ b/gcc/config/i386/i386-c.c
@@ -588,6 +588,20 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
     def_or_undef (parse_in, "__ENQCMD__");
   if (isa_flag2 & OPTION_MASK_ISA2_TSXLDTRK)
     def_or_undef (parse_in, "__TSXLDTRK__");
+  if (isa_flag2 & OPTION_MASK_ISA2_AMX_TILE)
+    def_or_undef (parse_in, "__AMX_TILE__");
+  if (isa_flag2 & OPTION_MASK_ISA2_AMX_INT8)
+    def_or_undef (parse_in, "__AMX_INT8__");
+  if (isa_flag2 & OPTION_MASK_ISA2_AMX_BF16)
+    def_or_undef (parse_in, "__AMX_BF16__");
+  if (isa_flag & OPTION_MASK_ISA_SAHF)
+    def_or_undef (parse_in, "__LAHF_SAHF__");
+  if (isa_flag2 & OPTION_MASK_ISA2_MOVBE)
+    def_or_undef (parse_in, "__MOVBE__");
+  if (isa_flag2 & OPTION_MASK_ISA2_UINTR)
+    def_or_undef (parse_in, "__UINTR__");
+  if (isa_flag2 & OPTION_MASK_ISA2_HRESET)
+    def_or_undef (parse_in, "__HRESET__");
   if (TARGET_IAMCU)
     {
       def_or_undef (parse_in, "__iamcu");
diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
index e6f8b31..3e8afe6 100644
--- a/gcc/config/i386/i386-expand.c
+++ b/gcc/config/i386/i386-expand.c
@@ -1045,7 +1045,8 @@ ix86_binary_operator_ok (enum rtx_code code, machine_mode mode,
   rtx src2 = operands[2];
 
   /* Both source operands cannot be in memory.  */
-  if (MEM_P (src1) && MEM_P (src2))
+  if ((MEM_P (src1) || bcst_mem_operand (src1, mode))
+      && (MEM_P (src2) || bcst_mem_operand (src2, mode)))
     return false;
 
   /* Canonicalize operand order for commutative operators.  */
@@ -3525,6 +3526,13 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
   machine_mode mode = GET_MODE (dest);
   machine_mode cmpmode = GET_MODE (cmp);
 
+  /* Simplify trivial VEC_COND_EXPR to avoid ICE in pr97506.  */
+  if (rtx_equal_p (op_true, op_false))
+    {
+      emit_move_insn (dest, op_true);
+      return;
+    }
+
   /* In AVX512F the result of comparison is an integer mask.  */
   bool maskcmp = mode != cmpmode && ix86_valid_mask_cmp_mode (mode);
 
@@ -10225,12 +10233,16 @@ ix86_expand_round_builtin (const struct builtin_description *d,
     case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT:
     case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT:
     case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT:
+    case V2DF_FTYPE_V2DF_V4SF_V2DF_UQI_INT:
     case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT:
     case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT:
+    case V4SF_FTYPE_V4SF_V2DF_V4SF_UQI_INT:
       nargs = 5;
       break;
     case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT:
     case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT:
+    case V8DF_FTYPE_V8DF_INT_V8DF_UQI_INT:
+    case V16SF_FTYPE_V16SF_INT_V16SF_UHI_INT:
       nargs_constant = 4;
       nargs = 5;
       break;
@@ -10413,6 +10425,7 @@ ix86_expand_special_args_builtin (const struct builtin_description *d,
     case USHORT_FTYPE_VOID:
     case UINT64_FTYPE_VOID:
     case UINT_FTYPE_VOID:
+    case UINT8_FTYPE_VOID:
     case UNSIGNED_FTYPE_VOID:
       nargs = 0;
       klass = load;
@@ -11203,6 +11216,19 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
 
       return target;
 
+    case IX86_BUILTIN_TESTUI:
+      emit_insn (gen_testui ());
+
+      if (target == 0
+	  || !register_operand (target, QImode))
+	target = gen_reg_rtx (QImode);
+
+      pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG),
+			 const0_rtx);
+      emit_insn (gen_rtx_SET (target, pat));
+
+      return target;
+
     case IX86_BUILTIN_CLZERO:
       arg0 = CALL_EXPR_ARG (exp, 0);
       op0 = expand_normal (arg0);
@@ -12807,6 +12833,14 @@ rdseed_step:
       emit_insn (gen_incssp (mode, op0));
       return 0;
 
+    case IX86_BUILTIN_HRESET:
+      icode = CODE_FOR_hreset;
+      arg0 = CALL_EXPR_ARG (exp, 0);
+      op0 = expand_normal (arg0);
+      op0 = force_reg (SImode, op0);
+      emit_insn (gen_hreset (op0));
+      return 0;
+
     case IX86_BUILTIN_RSTORSSP:
     case IX86_BUILTIN_CLRSSBSY:
       arg0 = CALL_EXPR_ARG (exp, 0);
diff --git a/gcc/config/i386/i386-options.c b/gcc/config/i386/i386-options.c
index 2fabd20..82c8091 100644
--- a/gcc/config/i386/i386-options.c
+++ b/gcc/config/i386/i386-options.c
@@ -209,7 +209,12 @@ static struct ix86_target_opts isa2_opts[] =
   { "-mavx512bf16",	OPTION_MASK_ISA2_AVX512BF16 },
   { "-menqcmd",		OPTION_MASK_ISA2_ENQCMD },
   { "-mserialize",	OPTION_MASK_ISA2_SERIALIZE },
-  { "-mtsxldtrk",	OPTION_MASK_ISA2_TSXLDTRK }
+  { "-mtsxldtrk",	OPTION_MASK_ISA2_TSXLDTRK },
+  { "-mamx-tile",	OPTION_MASK_ISA2_AMX_TILE },
+  { "-mamx-int8",	OPTION_MASK_ISA2_AMX_INT8 },
+  { "-mamx-bf16",	OPTION_MASK_ISA2_AMX_BF16 },
+  { "-muintr",		OPTION_MASK_ISA2_UINTR },
+  { "-mhreset",		OPTION_MASK_ISA2_HRESET }
 };
 static struct ix86_target_opts isa_opts[] =
 {
@@ -1028,11 +1033,16 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree args, char *p_strings[],
     IX86_ATTR_ISA ("movdir64b", OPT_mmovdir64b),
     IX86_ATTR_ISA ("waitpkg", OPT_mwaitpkg),
     IX86_ATTR_ISA ("cldemote", OPT_mcldemote),
+    IX86_ATTR_ISA ("uintr", OPT_muintr),
     IX86_ATTR_ISA ("ptwrite",   OPT_mptwrite),
     IX86_ATTR_ISA ("avx512bf16",   OPT_mavx512bf16),
     IX86_ATTR_ISA ("enqcmd", OPT_menqcmd),
     IX86_ATTR_ISA ("serialize", OPT_mserialize),
     IX86_ATTR_ISA ("tsxldtrk", OPT_mtsxldtrk),
+    IX86_ATTR_ISA ("amx-tile", OPT_mamx_tile),
+    IX86_ATTR_ISA ("amx-int8", OPT_mamx_int8),
+    IX86_ATTR_ISA ("amx-bf16", OPT_mamx_bf16),
+    IX86_ATTR_ISA ("hreset", OPT_mhreset),
 
     /* enum options */
     IX86_ATTR_ENUM ("fpmath=",	OPT_mfpmath_),
@@ -1893,6 +1903,9 @@ ix86_option_override_internal (bool main_args_p,
       opts->x_ix86_stringop_alg = no_stringop;
     }
 
+  if (TARGET_UINTR && !TARGET_64BIT)
+    error ("%<-muintr%> not supported for 32-bit code");
+
   if (!opts->x_ix86_arch_string)
     opts->x_ix86_arch_string
       = TARGET_64BIT_P (opts->x_ix86_isa_flags)
@@ -2052,10 +2065,27 @@ ix86_option_override_internal (bool main_args_p,
 	    return false;
 	  }
 
+	/* The feature-only micro-architecture levels that use
+	   PTA_NO_TUNE are only defined for the x86-64 psABI.  */
+	if ((processor_alias_table[i].flags & PTA_NO_TUNE) != 0
+	    && (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
+		|| opts->x_ix86_abi != SYSV_ABI))
+	  {
+	    error (G_("%<%s%> architecture level is only defined"
+		      " for the x86-64 psABI"), opts->x_ix86_arch_string);
+	    return false;
+	  }
+
 	ix86_schedule = processor_alias_table[i].schedule;
 	ix86_arch = processor_alias_table[i].processor;
-	/* Default cpu tuning to the architecture.  */
-	ix86_tune = ix86_arch;
+
+	/* Default cpu tuning to the architecture, unless the table
+	   entry requests not to do this.  Used by the x86-64 psABI
+	   micro-architecture levels.  */
+	if ((processor_alias_table[i].flags & PTA_NO_TUNE) == 0)
+	  ix86_tune = ix86_arch;
+	else
+	  ix86_tune = PROCESSOR_GENERIC;
 
 	if (((processor_alias_table[i].flags & PTA_MMX) != 0)
 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
@@ -2258,6 +2288,18 @@ ix86_option_override_internal (bool main_args_p,
 	    && !(opts->x_ix86_isa_flags2_explicit
 		 & OPTION_MASK_ISA2_AVX512BF16))
 	  opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AVX512BF16;
+	if (((processor_alias_table[i].flags & PTA_AMX_TILE) != 0)
+	    && !(opts->x_ix86_isa_flags2_explicit
+		 & OPTION_MASK_ISA2_AMX_TILE))
+	  opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AMX_TILE;
+	if (((processor_alias_table[i].flags & PTA_AMX_INT8) != 0)
+	    && !(opts->x_ix86_isa_flags2_explicit
+		 & OPTION_MASK_ISA2_AMX_INT8))
+	  opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AMX_INT8;
+	if (((processor_alias_table[i].flags & PTA_AMX_BF16) != 0)
+	    && !(opts->x_ix86_isa_flags2_explicit
+		 & OPTION_MASK_ISA2_AMX_BF16))
+	  opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AMX_BF16;
         if (((processor_alias_table[i].flags & PTA_MOVDIRI) != 0)
             && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVDIRI))
           opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVDIRI;
@@ -2366,7 +2408,8 @@ ix86_option_override_internal (bool main_args_p,
     ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
 
   for (i = 0; i < pta_size; i++)
-    if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name))
+    if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name)
+	&& (processor_alias_table[i].flags & PTA_NO_TUNE) == 0)
       {
 	ix86_schedule = processor_alias_table[i].schedule;
 	ix86_tune = processor_alias_table[i].processor;
@@ -2410,8 +2453,9 @@ ix86_option_override_internal (bool main_args_p,
 
       auto_vec <const char *> candidates;
       for (i = 0; i < pta_size; i++)
-	if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
-	    || ((processor_alias_table[i].flags & PTA_64BIT) != 0))
+	if ((!TARGET_64BIT_P (opts->x_ix86_isa_flags)
+	     || ((processor_alias_table[i].flags & PTA_64BIT) != 0))
+	    && (processor_alias_table[i].flags & PTA_NO_TUNE) == 0)
 	  candidates.safe_push (processor_alias_table[i].name);
 
 #ifdef HAVE_LOCAL_CPU_DETECT
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index c890a73..502d240 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -1484,7 +1484,7 @@ ix86_reg_parm_stack_space (const_tree fndecl)
 bool
 ix86_libc_has_function (enum function_class fn_class)
 {
-  return targetm.libc_has_function (fn_class);
+  return targetm.libc_has_function (fn_class, NULL_TREE);
 }
 
 /* Returns value SYSV_ABI, MS_ABI dependent on fntype,
@@ -13098,6 +13098,43 @@ ix86_print_operand (FILE *file, rtx x, int code)
       fputs (dstr, file);
     }
 
+  /* Print bcst_mem_operand.  */
+  else if (GET_CODE (x) == VEC_DUPLICATE)
+    {
+      machine_mode vmode = GET_MODE (x);
+      /* Must be bcst_memory_operand.  */
+      gcc_assert (bcst_mem_operand (x, vmode));
+
+      rtx mem = XEXP (x,0);
+      ix86_print_operand (file, mem, 0);
+
+      switch (vmode)
+	{
+	case E_V2DImode:
+	case E_V2DFmode:
+	  fputs ("{1to2}", file);
+	  break;
+	case E_V4SImode:
+	case E_V4SFmode:
+	case E_V4DImode:
+	case E_V4DFmode:
+	  fputs ("{1to4}", file);
+	  break;
+	case E_V8SImode:
+	case E_V8SFmode:
+	case E_V8DFmode:
+	case E_V8DImode:
+	  fputs ("{1to8}", file);
+	  break;
+	case E_V16SFmode:
+	case E_V16SImode:
+	  fputs ("{1to16}", file);
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+    }
+
   else
     {
       /* We have patterns that allow zero sets of memory, for instance.
@@ -15131,11 +15168,32 @@ ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
       /* Codes needing carry flag.  */
     case GEU:			/* CF=0 */
     case LTU:			/* CF=1 */
+      rtx geu;
       /* Detect overflow checks.  They need just the carry flag.  */
       if (GET_CODE (op0) == PLUS
 	  && (rtx_equal_p (op1, XEXP (op0, 0))
 	      || rtx_equal_p (op1, XEXP (op0, 1))))
 	return CCCmode;
+      /* Similarly for *setcc_qi_addqi3_cconly_overflow_1_* patterns.
+	 Match LTU of op0
+	 (neg:QI (geu:QI (reg:CC_CCC FLAGS_REG) (const_int 0)))
+	 and op1
+	 (ltu:QI (reg:CC_CCC FLAGS_REG) (const_int 0))
+	 where CC_CCC is either CC or CCC.  */
+      else if (code == LTU
+	       && GET_CODE (op0) == NEG
+	       && GET_CODE (geu = XEXP (op0, 0)) == GEU
+	       && REG_P (XEXP (geu, 0))
+	       && (GET_MODE (XEXP (geu, 0)) == CCCmode
+		   || GET_MODE (XEXP (geu, 0)) == CCmode)
+	       && REGNO (XEXP (geu, 0)) == FLAGS_REG
+	       && XEXP (geu, 1) == const0_rtx
+	       && GET_CODE (op1) == LTU
+	       && REG_P (XEXP (op1, 0))
+	       && GET_MODE (XEXP (op1, 0)) == GET_MODE (XEXP (geu, 0))
+	       && REGNO (XEXP (op1, 0)) == FLAGS_REG
+	       && XEXP (op1, 1) == const0_rtx)
+	return CCCmode;
       else
 	return CCmode;
     case GTU:			/* CF=0 & ZF=0 */
@@ -19749,33 +19807,56 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
       return false;
 
     case COMPARE:
-      if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
-	  && XEXP (XEXP (x, 0), 1) == const1_rtx
-	  && CONST_INT_P (XEXP (XEXP (x, 0), 2))
-	  && XEXP (x, 1) == const0_rtx)
+      rtx op0, op1;
+      op0 = XEXP (x, 0);
+      op1 = XEXP (x, 1);
+      if (GET_CODE (op0) == ZERO_EXTRACT
+	  && XEXP (op0, 1) == const1_rtx
+	  && CONST_INT_P (XEXP (op0, 2))
+	  && op1 == const0_rtx)
 	{
 	  /* This kind of construct is implemented using test[bwl].
 	     Treat it as if we had an AND.  */
-	  mode = GET_MODE (XEXP (XEXP (x, 0), 0));
+	  mode = GET_MODE (XEXP (op0, 0));
 	  *total = (cost->add
-		    + rtx_cost (XEXP (XEXP (x, 0), 0), mode, outer_code,
+		    + rtx_cost (XEXP (op0, 0), mode, outer_code,
 				opno, speed)
 		    + rtx_cost (const1_rtx, mode, outer_code, opno, speed));
 	  return true;
 	}
 
-      if (GET_CODE (XEXP (x, 0)) == PLUS
-	  && rtx_equal_p (XEXP (XEXP (x, 0), 0), XEXP (x, 1)))
+      if (GET_CODE (op0) == PLUS && rtx_equal_p (XEXP (op0, 0), op1))
 	{
 	  /* This is an overflow detection, count it as a normal compare.  */
-	  *total = rtx_cost (XEXP (x, 0), GET_MODE (XEXP (x, 0)),
-			     COMPARE, 0, speed);
+	  *total = rtx_cost (op0, GET_MODE (op0), COMPARE, 0, speed);
+	  return true;
+	}
+
+      rtx geu;
+      /* Match x
+	 (compare:CCC (neg:QI (geu:QI (reg:CC_CCC FLAGS_REG) (const_int 0)))
+		      (ltu:QI (reg:CC_CCC FLAGS_REG) (const_int 0)))  */
+      if (mode == CCCmode
+	  && GET_CODE (op0) == NEG
+	  && GET_CODE (geu = XEXP (op0, 0)) == GEU
+	  && REG_P (XEXP (geu, 0))
+	  && (GET_MODE (XEXP (geu, 0)) == CCCmode
+	      || GET_MODE (XEXP (geu, 0)) == CCmode)
+	  && REGNO (XEXP (geu, 0)) == FLAGS_REG
+	  && XEXP (geu, 1) == const0_rtx
+	  && GET_CODE (op1) == LTU
+	  && REG_P (XEXP (op1, 0))
+	  && GET_MODE (XEXP (op1, 0)) == GET_MODE (XEXP (geu, 0))
+	  && REGNO (XEXP (op1, 0)) == FLAGS_REG
+	  && XEXP (op1, 1) == const0_rtx)
+	{
+	  /* This is *setcc_qi_addqi3_cconly_overflow_1_* patterns, a nop.  */
+	  *total = 0;
 	  return true;
 	}
 
       /* The embedded comparison operand is completely free.  */
-      if (!general_operand (XEXP (x, 0), GET_MODE (XEXP (x, 0)))
-	  && XEXP (x, 1) == const0_rtx)
+      if (!general_operand (op0, GET_MODE (op0)) && op1 == const0_rtx)
 	*total = 0;
 
       return false;
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 92b7475..24207d0 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -203,6 +203,16 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 #define TARGET_SERIALIZE_P(x) TARGET_ISA2_SERIALIZE_P(x)
 #define TARGET_TSXLDTRK	TARGET_ISA2_TSXLDTRK
 #define TARGET_TSXLDTRK_P(x) TARGET_ISA2_TSXLDTRK_P(x)
+#define TARGET_AMX_TILE TARGET_ISA2_AMX_TILE
+#define TARGET_AMX_TILE_P(x) TARGET_ISA2_AMX_TILE(x)
+#define TARGET_AMX_INT8 TARGET_ISA2_AMX_INT8
+#define TARGET_AMX_INT8_P(x) TARGET_ISA2_AMX_INT8(x)
+#define TARGET_AMX_BF16 TARGET_ISA2_AMX_BF16
+#define TARGET_AMX_BF16_P(x) TARGET_ISA2_AMX_BF16(x)
+#define TARGET_UINTR	TARGET_ISA2_UINTR
+#define TARGET_UINTR_P(x) TARGET_ISA2_UINTR_P(x)
+#define TARGET_HRESET	    TARGET_ISA2_HRESET
+#define TARGET_HRESET_P(x)  TARGET_ISA2_HRESET_P(x)
 
 #define TARGET_LP64	TARGET_ABI_64
 #define TARGET_LP64_P(x)	TARGET_ABI_64_P(x)
@@ -1262,6 +1272,10 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
   (TARGET_FMA4 && ((MODE) == V4SFmode || (MODE) == V2DFmode \
 		  || (MODE) == V8SFmode || (MODE) == V4DFmode))
 
+#define VALID_BCST_MODE_P(MODE)			\
+  ((MODE) == SFmode || (MODE) == DFmode		\
+   || (MODE) == SImode || (MODE) == DImode)
+
 /* It is possible to write patterns to move flags; but until someone
    does it,  */
 #define AVOID_CCMODE_COPIES
@@ -2427,7 +2441,7 @@ const wide_int_bitmask PTA_AVX512F (HOST_WIDE_INT_1U << 40);
 const wide_int_bitmask PTA_AVX512ER (HOST_WIDE_INT_1U << 41);
 const wide_int_bitmask PTA_AVX512PF (HOST_WIDE_INT_1U << 42);
 const wide_int_bitmask PTA_AVX512CD (HOST_WIDE_INT_1U << 43);
-/* Hole after PTA_MPX was removed.  */
+const wide_int_bitmask PTA_NO_TUNE (HOST_WIDE_INT_1U << 44);
 const wide_int_bitmask PTA_SHA (HOST_WIDE_INT_1U << 45);
 const wide_int_bitmask PTA_PREFETCHWT1 (HOST_WIDE_INT_1U << 46);
 const wide_int_bitmask PTA_CLFLUSHOPT (HOST_WIDE_INT_1U << 47);
@@ -2466,6 +2480,21 @@ const wide_int_bitmask PTA_ENQCMD (0, HOST_WIDE_INT_1U << 15);
 const wide_int_bitmask PTA_CLDEMOTE (0, HOST_WIDE_INT_1U << 16);
 const wide_int_bitmask PTA_SERIALIZE (0, HOST_WIDE_INT_1U << 17);
 const wide_int_bitmask PTA_TSXLDTRK (0, HOST_WIDE_INT_1U << 18);
+const wide_int_bitmask PTA_AMX_TILE(0, HOST_WIDE_INT_1U << 19);
+const wide_int_bitmask PTA_AMX_INT8(0, HOST_WIDE_INT_1U << 20);
+const wide_int_bitmask PTA_AMX_BF16(0, HOST_WIDE_INT_1U << 21);
+const wide_int_bitmask PTA_UINTR (0, HOST_WIDE_INT_1U << 22);
+const wide_int_bitmask PTA_HRESET(0, HOST_WIDE_INT_1U << 23);
+
+const wide_int_bitmask PTA_X86_64_BASELINE = PTA_64BIT | PTA_MMX | PTA_SSE
+  | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR;
+const wide_int_bitmask PTA_X86_64_V2 = (PTA_X86_64_BASELINE & (~PTA_NO_SAHF))
+  | PTA_CX16 | PTA_POPCNT | PTA_SSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_SSSE3;
+const wide_int_bitmask PTA_X86_64_V3 = PTA_X86_64_V2
+  | PTA_AVX | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_F16C | PTA_FMA | PTA_LZCNT
+  | PTA_MOVBE | PTA_XSAVE;
+const wide_int_bitmask PTA_X86_64_V4 = PTA_X86_64_V3
+  | PTA_AVX512F | PTA_AVX512BW | PTA_AVX512CD | PTA_AVX512DQ | PTA_AVX512VL;
 
 const wide_int_bitmask PTA_CORE2 = PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2
   | PTA_SSE3 | PTA_SSSE3 | PTA_CX16 | PTA_FXSR;
@@ -2499,9 +2528,10 @@ const wide_int_bitmask PTA_TIGERLAKE = PTA_ICELAKE_CLIENT | PTA_MOVDIRI
   | PTA_MOVDIR64B | PTA_CLWB | PTA_AVX512VP2INTERSECT;
 const wide_int_bitmask PTA_SAPPHIRERAPIDS = PTA_COOPERLAKE | PTA_MOVDIRI
   | PTA_MOVDIR64B | PTA_AVX512VP2INTERSECT | PTA_ENQCMD | PTA_CLDEMOTE
-  | PTA_PTWRITE | PTA_WAITPKG | PTA_SERIALIZE | PTA_TSXLDTRK;
+  | PTA_PTWRITE | PTA_WAITPKG | PTA_SERIALIZE | PTA_TSXLDTRK | PTA_AMX_TILE
+  | PTA_AMX_INT8 | PTA_AMX_BF16 | PTA_UINTR;
 const wide_int_bitmask PTA_ALDERLAKE = PTA_SKYLAKE | PTA_CLDEMOTE | PTA_PTWRITE
-  | PTA_WAITPKG | PTA_SERIALIZE;
+  | PTA_WAITPKG | PTA_SERIALIZE | PTA_HRESET;
 const wide_int_bitmask PTA_KNL = PTA_BROADWELL | PTA_AVX512PF | PTA_AVX512ER
   | PTA_AVX512F | PTA_AVX512CD;
 const wide_int_bitmask PTA_BONNELL = PTA_CORE2 | PTA_MOVBE;
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 93aae81..8730816 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -186,6 +186,10 @@
 
   ;; IRET support
   UNSPEC_INTERRUPT_RETURN
+
+  ;; For MOVDIRI and MOVDIR64B support
+  UNSPEC_MOVDIRI
+  UNSPEC_MOVDIR64B
 ])
 
 (define_c_enum "unspecv" [
@@ -280,10 +284,6 @@
   UNSPECV_SETSSBSY
   UNSPECV_CLRSSBSY
 
-  ;; For MOVDIRI and MOVDIR64B support
-  UNSPECV_MOVDIRI
-  UNSPECV_MOVDIR64B
-
   ;; For TSXLDTRK support
   UNSPECV_XSUSLDTRK
   UNSPECV_XRESLDTRK
@@ -293,6 +293,12 @@
   UNSPECV_UMONITOR
   UNSPECV_TPAUSE
 
+  ;; For UINTR support
+  UNSPECV_CLUI
+  UNSPECV_STUI
+  UNSPECV_TESTUI
+  UNSPECV_SENDUIPI
+
   ;; For CLDEMOTE support
   UNSPECV_CLDEMOTE
 
@@ -310,6 +316,9 @@
 
   ;; For patchable area support
   UNSPECV_PATCHABLE_AREA
+
+  ;; For HRESET support
+  UNSPECV_HRESET
 ])
 
 ;; Constants to represent rounding modes in the ROUND instruction
@@ -7039,6 +7048,20 @@
       (set (match_operand:SWI48 0 "register_operand")
 	   (minus:SWI48 (match_dup 1) (match_dup 2)))])]
   "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)")
+
+(define_mode_iterator CC_CCC [CC CCC])
+
+;; Pre-reload splitter to optimize
+;; *setcc_qi followed by *addqi3_cconly_overflow_1 with the same QI
+;; operand and no intervening flags modifications into nothing.
+(define_insn_and_split "*setcc_qi_addqi3_cconly_overflow_1_<mode>"
+  [(set (reg:CCC FLAGS_REG)
+	(compare:CCC (neg:QI (geu:QI (reg:CC_CCC FLAGS_REG) (const_int 0)))
+		     (ltu:QI (reg:CC_CCC FLAGS_REG) (const_int 0))))]
+  "ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(const_int 0)])
 
 ;; Overflow setting add instructions
 
@@ -13666,7 +13689,7 @@
    (unspec [(const_int 0)] UNSPEC_INTERRUPT_RETURN)]
   "reload_completed"
 {
-  return TARGET_64BIT ? "iretq" : "iret";
+  return TARGET_64BIT ? (TARGET_UINTR ? "uiret" : "iretq") : "iret";
 })
 
 ;; Used by x86_machine_dependent_reorg to avoid penalty on single byte RET
@@ -21531,17 +21554,17 @@
 ;; MOVDIRI and MOVDIR64B
 
 (define_insn "movdiri<mode>"
-  [(unspec_volatile:SWI48 [(match_operand:SWI48 0 "memory_operand" "m")
-			   (match_operand:SWI48 1 "register_operand" "r")]
-			  UNSPECV_MOVDIRI)]
+  [(set (match_operand:SWI48 0 "memory_operand" "=m")
+	(unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")]
+		      UNSPEC_MOVDIRI))]
   "TARGET_MOVDIRI"
   "movdiri\t{%1, %0|%0, %1}"
   [(set_attr "type" "other")])
 
 (define_insn "@movdir64b_<mode>"
-  [(unspec_volatile:XI [(match_operand:P 0 "register_operand" "r")
-			(match_operand:XI 1 "memory_operand")]
-		       UNSPECV_MOVDIR64B)]
+  [(set (mem:XI (match_operand:P 0 "register_operand" "r"))
+	(unspec:XI [(match_operand:XI 1 "memory_operand" "m")]
+		   UNSPEC_MOVDIR64B))]
   "TARGET_MOVDIR64B"
   "movdir64b\t{%1, %0|%0, %1}"
   [(set_attr "type" "other")])
@@ -21571,6 +21594,34 @@
   "enqcmd<enqcmd_sfx>\t{%1, %0|%0, %1}"
   [(set_attr "type" "other")])
 
+;; UINTR
+(define_int_iterator UINTR [UNSPECV_CLUI UNSPECV_STUI])
+(define_int_attr uintr [(UNSPECV_CLUI "clui") (UNSPECV_STUI "stui")])
+
+(define_insn "<uintr>"
+  [(unspec_volatile [(const_int 0)] UINTR)]
+  "TARGET_UINTR && TARGET_64BIT"
+  "<uintr>"
+  [(set_attr "type" "other")
+   (set_attr "length" "4")])
+
+(define_insn "testui"
+  [(set (reg:CCC FLAGS_REG)
+	(unspec_volatile:CCC [(const_int 0)] UNSPECV_TESTUI))]
+  "TARGET_UINTR && TARGET_64BIT"
+  "testui"
+  [(set_attr "type" "other")
+   (set_attr "length" "4")])
+
+(define_insn "senduipi"
+  [(unspec_volatile
+    [(match_operand:DI 0 "register_operand" "r")]
+    UNSPECV_SENDUIPI)]
+  "TARGET_UINTR && TARGET_64BIT"
+  "senduipi\t%0"
+  [(set_attr "type" "other")
+   (set_attr "length" "4")])
+
 ;; WAITPKG
 
 (define_insn "umwait"
@@ -21655,6 +21706,14 @@
    (set_attr "length_immediate" "0")
    (set_attr "modrm" "0")])
 
+(define_insn "hreset"
+  [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")]
+		     UNSPECV_HRESET)]
+  "TARGET_HRESET"
+  "hreset\t{$0|0}"
+  [(set_attr "type" "other")
+   (set_attr "length" "4")])
+
 (include "mmx.md")
 (include "sse.md")
 (include "sync.md")
diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
index c9f7195..e6b1695 100644
--- a/gcc/config/i386/i386.opt
+++ b/gcc/config/i386/i386.opt
@@ -788,6 +788,10 @@ mptwrite
 Target Report Mask(ISA2_PTWRITE) Var(ix86_isa_flags2) Save
 Support PTWRITE built-in functions and code generation.
 
+muintr
+Target Report Mask(ISA2_UINTR) Var(ix86_isa_flags2) Save
+Support UINTR built-in functions and code generation.
+
 msgx
 Target Report Mask(ISA2_SGX) Var(ix86_isa_flags2) Save
 Support SGX built-in functions and code generation.
@@ -1114,4 +1118,20 @@ Support SERIALIZE built-in functions and code generation.
 
 mtsxldtrk
 Target Report Mask(ISA2_TSXLDTRK) Var(ix86_isa_flags2) Save
-Support TSXLDTRK built-in functions and code generation.
-\ No newline at end of file
+Support TSXLDTRK built-in functions and code generation.
+
+mamx-tile
+Target Report Mask(ISA2_AMX_TILE) Var(ix86_isa_flags2) Save
+Support AMX-TILE built-in functions and code generation.
+
+mamx-int8
+Target Report Mask(ISA2_AMX_INT8) Var(ix86_isa_flags2) Save
+Support AMX-INT8 built-in functions and code generation.
+
+mamx-bf16
+Target Report Mask(ISA2_AMX_BF16) Var(ix86_isa_flags2) Save
+Support AMX-BF16 built-in functions and code generation.
+
+mhreset
+Target Report Mask(ISA2_HRESET) Var(ix86_isa_flags2) Save
+Support HRESET built-in functions and code generation.
diff --git a/gcc/config/i386/ia32intrin.h b/gcc/config/i386/ia32intrin.h
index fd29797..3568d1f 100644
--- a/gcc/config/i386/ia32intrin.h
+++ b/gcc/config/i386/ia32intrin.h
@@ -21,8 +21,8 @@
    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
    <http://www.gnu.org/licenses/>.  */
 
-#ifndef _X86INTRIN_H_INCLUDED
-# error "Never use <ia32intrin.h> directly; include <x86intrin.h> instead."
+#ifndef _X86GPRINTRIN_H_INCLUDED
+# error "Never use <ia32intrin.h> directly; include <x86gprintrin.h> instead."
 #endif
 
 /* 32bit bsf */
diff --git a/gcc/config/i386/immintrin.h b/gcc/config/i386/immintrin.h
index b660d0d..71eae83 100644
--- a/gcc/config/i386/immintrin.h
+++ b/gcc/config/i386/immintrin.h
@@ -24,6 +24,8 @@
 #ifndef _IMMINTRIN_H_INCLUDED
 #define _IMMINTRIN_H_INCLUDED
 
+#include <x86gprintrin.h>
+
 #include <mmintrin.h>
 
 #include <xmmintrin.h>
@@ -38,16 +40,6 @@
 
 #include <wmmintrin.h>
 
-#include <fxsrintrin.h>
-
-#include <xsaveintrin.h>
-
-#include <xsaveoptintrin.h>
-
-#include <xsavesintrin.h>
-
-#include <xsavecintrin.h>
-
 #include <avxintrin.h>
 
 #include <avx2intrin.h>
@@ -102,217 +94,28 @@
 
 #include <shaintrin.h>
 
-#include <lzcntintrin.h>
-
-#include <bmiintrin.h>
-
-#include <bmi2intrin.h>
-
 #include <fmaintrin.h>
 
 #include <f16cintrin.h>
 
 #include <rtmintrin.h>
 
-#include <xtestintrin.h>
-
-#include <cetintrin.h>
-
 #include <gfniintrin.h>
 
 #include <vaesintrin.h>
 
 #include <vpclmulqdqintrin.h>
 
-#include <movdirintrin.h>
-
-#include <sgxintrin.h>
-
-#include <pconfigintrin.h>
-
-#include <waitpkgintrin.h>
-
-#include <cldemoteintrin.h>
-
 #include <avx512bf16vlintrin.h>
 
 #include <avx512bf16intrin.h>
 
-#include <enqcmdintrin.h>
+#include <amxtileintrin.h>
 
-#include <serializeintrin.h>
+#include <amxint8intrin.h>
 
-#include <tsxldtrkintrin.h>
-
-#include <rdseedintrin.h>
+#include <amxbf16intrin.h>
 
 #include <prfchwintrin.h>
 
-#include <adxintrin.h>
-
-#include <clwbintrin.h>
-
-#include <clflushoptintrin.h>
-
-#include <wbnoinvdintrin.h>
-
-#include <pkuintrin.h>
-
-extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_wbinvd (void)
-{
-  __builtin_ia32_wbinvd ();
-}
-
-#ifndef __RDRND__
-#pragma GCC push_options
-#pragma GCC target("rdrnd")
-#define __DISABLE_RDRND__
-#endif /* __RDRND__ */
-extern __inline int
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_rdrand16_step (unsigned short *__P)
-{
-  return __builtin_ia32_rdrand16_step (__P);
-}
-
-extern __inline int
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_rdrand32_step (unsigned int *__P)
-{
-  return __builtin_ia32_rdrand32_step (__P);
-}
-#ifdef __DISABLE_RDRND__
-#undef __DISABLE_RDRND__
-#pragma GCC pop_options
-#endif /* __DISABLE_RDRND__ */
-
-#ifndef __RDPID__
-#pragma GCC push_options
-#pragma GCC target("rdpid")
-#define __DISABLE_RDPID__
-#endif /* __RDPID__ */
-extern __inline unsigned int
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_rdpid_u32 (void)
-{
-  return __builtin_ia32_rdpid ();
-}
-#ifdef __DISABLE_RDPID__
-#undef __DISABLE_RDPID__
-#pragma GCC pop_options
-#endif /* __DISABLE_RDPID__ */
-
-#ifdef  __x86_64__
-
-#ifndef __FSGSBASE__
-#pragma GCC push_options
-#pragma GCC target("fsgsbase")
-#define __DISABLE_FSGSBASE__
-#endif /* __FSGSBASE__ */
-extern __inline unsigned int
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_readfsbase_u32 (void)
-{
-  return __builtin_ia32_rdfsbase32 ();
-}
-
-extern __inline unsigned long long
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_readfsbase_u64 (void)
-{
-  return __builtin_ia32_rdfsbase64 ();
-}
-
-extern __inline unsigned int
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_readgsbase_u32 (void)
-{
-  return __builtin_ia32_rdgsbase32 ();
-}
-
-extern __inline unsigned long long
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_readgsbase_u64 (void)
-{
-  return __builtin_ia32_rdgsbase64 ();
-}
-
-extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_writefsbase_u32 (unsigned int __B)
-{
-  __builtin_ia32_wrfsbase32 (__B);
-}
-
-extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_writefsbase_u64 (unsigned long long __B)
-{
-  __builtin_ia32_wrfsbase64 (__B);
-}
-
-extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_writegsbase_u32 (unsigned int __B)
-{
-  __builtin_ia32_wrgsbase32 (__B);
-}
-
-extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_writegsbase_u64 (unsigned long long __B)
-{
-  __builtin_ia32_wrgsbase64 (__B);
-}
-#ifdef __DISABLE_FSGSBASE__
-#undef __DISABLE_FSGSBASE__
-#pragma GCC pop_options
-#endif /* __DISABLE_FSGSBASE__ */
-
-#ifndef __RDRND__
-#pragma GCC push_options
-#pragma GCC target("rdrnd")
-#define __DISABLE_RDRND__
-#endif /* __RDRND__ */
-extern __inline int
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_rdrand64_step (unsigned long long *__P)
-{
-  return __builtin_ia32_rdrand64_step (__P);
-}
-#ifdef __DISABLE_RDRND__
-#undef __DISABLE_RDRND__
-#pragma GCC pop_options
-#endif /* __DISABLE_RDRND__ */
-
-#endif /* __x86_64__  */
-
-#ifndef __PTWRITE__
-#pragma GCC push_options
-#pragma GCC target("ptwrite")
-#define __DISABLE_PTWRITE__
-#endif
-
-#ifdef __x86_64__
-extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_ptwrite64 (unsigned long long __B)
-{
-  __builtin_ia32_ptwrite64 (__B);
-}
-#endif /* __x86_64__ */
-
-extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_ptwrite32 (unsigned __B)
-{
-  __builtin_ia32_ptwrite32 (__B);
-}
-#ifdef __DISABLE_PTWRITE__
-#undef __DISABLE_PTWRITE__
-#pragma GCC pop_options
-#endif /* __DISABLE_PTWRITE__ */
-
 #endif /* _IMMINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/intelmic-mkoffload.c b/gcc/config/i386/intelmic-mkoffload.c
index 15b5c3d..668208c 100644
--- a/gcc/config/i386/intelmic-mkoffload.c
+++ b/gcc/config/i386/intelmic-mkoffload.c
@@ -231,7 +231,7 @@ compile_for_target (struct obstack *argv_obstack)
   unsetenv ("LIBRARY_PATH");
   unsetenv ("LD_RUN_PATH");
 
-  fork_execute (argv[0], argv, false);
+  fork_execute (argv[0], argv, false, NULL);
   obstack_free (argv_obstack, NULL);
 
   /* Restore environment variables.  */
@@ -455,7 +455,7 @@ generate_host_descr_file (const char *host_compiler)
   obstack_ptr_grow (&argv_obstack, NULL);
 
   char **argv = XOBFINISH (&argv_obstack, char **);
-  fork_execute (argv[0], argv, false);
+  fork_execute (argv[0], argv, false, NULL);
   obstack_free (&argv_obstack, NULL);
 
   return obj_filename;
@@ -538,7 +538,7 @@ prepare_target_image (const char *target_compiler, int argc, char **argv)
   obstack_ptr_grow (&argv_obstack, rename_section_opt);
   obstack_ptr_grow (&argv_obstack, NULL);
   char **new_argv = XOBFINISH (&argv_obstack, char **);
-  fork_execute (new_argv[0], new_argv, false);
+  fork_execute (new_argv[0], new_argv, false, NULL);
   obstack_free (&argv_obstack, NULL);
 
   /* Objcopy has created symbols, containing the input file name with
@@ -580,7 +580,7 @@ prepare_target_image (const char *target_compiler, int argc, char **argv)
   obstack_ptr_grow (&argv_obstack, opt_for_objcopy[2]);
   obstack_ptr_grow (&argv_obstack, NULL);
   new_argv = XOBFINISH (&argv_obstack, char **);
-  fork_execute (new_argv[0], new_argv, false);
+  fork_execute (new_argv[0], new_argv, false, NULL);
   obstack_free (&argv_obstack, NULL);
 
   return target_so_filename;
@@ -672,7 +672,7 @@ main (int argc, char **argv)
   obstack_ptr_grow (&argv_obstack, out_obj_filename);
   obstack_ptr_grow (&argv_obstack, NULL);
   char **new_argv = XOBFINISH (&argv_obstack, char **);
-  fork_execute (new_argv[0], new_argv, false);
+  fork_execute (new_argv[0], new_argv, false, NULL);
   obstack_free (&argv_obstack, NULL);
 
   /* Run objcopy on the resultant object file to localize generated symbols
@@ -688,7 +688,7 @@ main (int argc, char **argv)
   obstack_ptr_grow (&argv_obstack, out_obj_filename);
   obstack_ptr_grow (&argv_obstack, NULL);
   new_argv = XOBFINISH (&argv_obstack, char **);
-  fork_execute (new_argv[0], new_argv, false);
+  fork_execute (new_argv[0], new_argv, false, NULL);
   obstack_free (&argv_obstack, NULL);
 
   return 0;
diff --git a/gcc/config/i386/lwpintrin.h b/gcc/config/i386/lwpintrin.h
index d7c3acb..0b5c8bb 100644
--- a/gcc/config/i386/lwpintrin.h
+++ b/gcc/config/i386/lwpintrin.h
@@ -21,8 +21,8 @@
    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
    <http://www.gnu.org/licenses/>.  */
 
-#ifndef _X86INTRIN_H_INCLUDED
-# error "Never use <lwpintrin.h> directly; include <x86intrin.h> instead."
+#ifndef _X86GPRINTRIN_H_INCLUDED
+# error "Never use <lwpintrin.h> directly; include <x86gprintrin.h> instead."
 #endif
 
 #ifndef _LWPINTRIN_H_INCLUDED
diff --git a/gcc/config/i386/lzcntintrin.h b/gcc/config/i386/lzcntintrin.h
index 1863a58..6d00e9f 100644
--- a/gcc/config/i386/lzcntintrin.h
+++ b/gcc/config/i386/lzcntintrin.h
@@ -21,8 +21,8 @@
    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
    <http://www.gnu.org/licenses/>.  */
 
-#if !defined _X86INTRIN_H_INCLUDED && !defined _IMMINTRIN_H_INCLUDED
-# error "Never use <lzcntintrin.h> directly; include <x86intrin.h> instead."
+#ifndef _X86GPRINTRIN_H_INCLUDED
+# error "Never use <lzcntintrin.h> directly; include <x86gprintrin.h> instead."
 #endif
 
 
diff --git a/gcc/config/i386/mingw-w64.h b/gcc/config/i386/mingw-w64.h
index 408e57c..0d0aa93 100644
--- a/gcc/config/i386/mingw-w64.h
+++ b/gcc/config/i386/mingw-w64.h
@@ -98,3 +98,9 @@ along with GCC; see the file COPYING3.  If not see
   %{shared|mdll: " SUB_LINK_ENTRY " --enable-auto-image-base} \
   " LINK_SPEC_LARGE_ADDR_AWARE "\
   %(shared_libgcc_undefs)"
+
+/* Enable sincos optimization, overriding cygming.h.  sincos, sincosf
+   and sincosl functions are available on mingw-w64, but not on the
+   original mingw32.  */
+#undef TARGET_LIBC_HAS_FUNCTION
+#define TARGET_LIBC_HAS_FUNCTION gnu_libc_has_function
diff --git a/gcc/config/i386/mmintrin.h b/gcc/config/i386/mmintrin.h
index 77de7ca..dff42fd 100644
--- a/gcc/config/i386/mmintrin.h
+++ b/gcc/config/i386/mmintrin.h
@@ -42,9 +42,15 @@
 /* The Intel API is flexible enough that we must allow aliasing with other
    vector types, and their scalar components.  */
 typedef int __m64 __attribute__ ((__vector_size__ (8), __may_alias__));
+typedef int __m32 __attribute__ ((__vector_size__ (4), __may_alias__));
+typedef short __m16 __attribute__ ((__vector_size__ (2), __may_alias__));
 
 /* Unaligned version of the same type  */
 typedef int __m64_u __attribute__ ((__vector_size__ (8), __may_alias__, __aligned__ (1)));
+typedef int __m32_u __attribute__ ((__vector_size__ (4), \
+				    __may_alias__, __aligned__ (1)));
+typedef short __m16_u __attribute__ ((__vector_size__ (2), \
+				      __may_alias__, __aligned__ (1)));
 
 /* Internal data types for implementing the intrinsics.  */
 typedef int __v2si __attribute__ ((__vector_size__ (8)));
diff --git a/gcc/config/i386/movdirintrin.h b/gcc/config/i386/movdirintrin.h
index e7f374a..b2f8406 100644
--- a/gcc/config/i386/movdirintrin.h
+++ b/gcc/config/i386/movdirintrin.h
@@ -21,8 +21,8 @@
    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
    <http://www.gnu.org/licenses/>.  */
 
-#if !defined _IMMINTRIN_H_INCLUDED
-# error "Never use <movdirintrin.h> directly; include <x86intrin.h> instead."
+#ifndef _X86GPRINTRIN_H_INCLUDED
+# error "Never use <movdirintrin.h> directly; include <x86gprintrin.h> instead."
 #endif
 
 #ifndef _MOVDIRINTRIN_H_INCLUDED
diff --git a/gcc/config/i386/pconfigintrin.h b/gcc/config/i386/pconfigintrin.h
index d2a3261..31c493a 100644
--- a/gcc/config/i386/pconfigintrin.h
+++ b/gcc/config/i386/pconfigintrin.h
@@ -1,5 +1,28 @@
-#ifndef _IMMINTRIN_H_INCLUDED
-#error "Never use <pconfigintrin.h> directly; include <immintrin.h> instead."
+/* Copyright (C) 2018-2020 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _X86GPRINTRIN_H_INCLUDED
+# error "Never use <pconfigintrin.h> directly; include <x86gprintrin.h> instead."
 #endif
 
 #ifndef _PCONFIGINTRIN_H_INCLUDED
diff --git a/gcc/config/i386/pkuintrin.h b/gcc/config/i386/pkuintrin.h
index 6840914..0d2dd51 100644
--- a/gcc/config/i386/pkuintrin.h
+++ b/gcc/config/i386/pkuintrin.h
@@ -21,8 +21,8 @@
    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
    <http://www.gnu.org/licenses/>.  */
 
-#if !defined _IMMINTRIN_H_INCLUDED
-# error "Never use <pkuintrin.h> directly; include <immintrin.h> instead."
+#ifndef _X86GPRINTRIN_H_INCLUDED
+# error "Never use <pkuintrin.h> directly; include <x86gprintrin.h> instead."
 #endif
 
 #ifndef _PKUINTRIN_H_INCLUDED
diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index b03f9cd..be57cda 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -1081,6 +1081,19 @@
   (ior (match_operand 0 "register_operand")
        (match_operand 0 "vector_memory_operand")))
 
+(define_predicate "bcst_mem_operand"
+  (and (match_code "vec_duplicate")
+       (and (match_test "TARGET_AVX512F")
+	    (ior (match_test "TARGET_AVX512VL")
+		 (match_test "GET_MODE_SIZE (GET_MODE (op)) == 64")))
+       (match_test "VALID_BCST_MODE_P (GET_MODE_INNER (GET_MODE (op)))")
+       (match_test "memory_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0)))")))
+
+; Return true when OP is bcst_mem_operand or vector_memory_operand.
+(define_predicate "bcst_vector_operand"
+  (ior (match_operand 0 "vector_operand")
+       (match_operand 0 "bcst_mem_operand")))
+
 ;; Return true when OP is either nonimmediate operand, or any
 ;; CONST_VECTOR.
 (define_predicate "nonimmediate_or_const_vector_operand"
diff --git a/gcc/config/i386/rdseedintrin.h b/gcc/config/i386/rdseedintrin.h
index efc7cea..168053a 100644
--- a/gcc/config/i386/rdseedintrin.h
+++ b/gcc/config/i386/rdseedintrin.h
@@ -21,8 +21,8 @@
    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
    <http://www.gnu.org/licenses/>.  */
 
-#if !defined _IMMINTRIN_H_INCLUDED
-# error "Never use <rdseedintrin.h> directly; include <immintrin.h> instead."
+#ifndef _X86GPRINTRIN_H_INCLUDED
+# error "Never use <rdseedintrin.h> directly; include <x86gprintrin.h> instead."
 #endif
 
 #ifndef _RDSEEDINTRIN_H_INCLUDED
diff --git a/gcc/config/i386/rtmintrin.h b/gcc/config/i386/rtmintrin.h
index 463a989..436e517 100644
--- a/gcc/config/i386/rtmintrin.h
+++ b/gcc/config/i386/rtmintrin.h
@@ -21,8 +21,8 @@
    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
    <http://www.gnu.org/licenses/>.  */
 
-#ifndef _IMMINTRIN_H_INCLUDED
-# error "Never use <rtmintrin.h> directly; include <immintrin.h> instead."
+#ifndef _X86GPRINTRIN_H_INCLUDED
+# error "Never use <rtmintrin.h> directly; include <x86gprintrin.h> instead."
 #endif
 
 #ifndef _RTMINTRIN_H_INCLUDED
diff --git a/gcc/config/i386/serializeintrin.h b/gcc/config/i386/serializeintrin.h
index 0c35b9e..95f26d6 100644
--- a/gcc/config/i386/serializeintrin.h
+++ b/gcc/config/i386/serializeintrin.h
@@ -21,8 +21,8 @@
    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
    <http://www.gnu.org/licenses/>.  */
 
-#if !defined _IMMINTRIN_H_INCLUDED
-# error "Never use <serializeintrin.h> directly; include <immintrin.h> instead."
+#ifndef _X86GPRINTRIN_H_INCLUDED
+# error "Never use <serializeintrin.h> directly; include <x86gprintrin.h> instead."
 #endif
 
 #ifndef _SERIALIZE_H_INCLUDED
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 934b60a..52635f6 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -1756,8 +1756,8 @@
 (define_insn "*<plusminus_insn><mode>3<mask_name><round_name>"
   [(set (match_operand:VF 0 "register_operand" "=x,v")
 	(plusminus:VF
-	  (match_operand:VF 1 "<round_nimm_predicate>" "<comm>0,v")
-	  (match_operand:VF 2 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
+	  (match_operand:VF 1 "<bcst_round_nimm_predicate>" "<comm>0,v")
+	  (match_operand:VF 2 "<bcst_round_nimm_predicate>" "xBm,<bcst_round_constraint>")))]
   "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
    && <mask_mode512bit_condition> && <round_mode512bit_condition>"
   "@
@@ -1765,35 +1765,7 @@
    v<plusminus_mnemonic><ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
   [(set_attr "isa" "noavx,avx")
    (set_attr "type" "sseadd")
-   (set_attr "prefix" "<mask_prefix3>")
-   (set_attr "mode" "<MODE>")])
-
-(define_insn "*sub<mode>3<mask_name>_bcst"
-  [(set (match_operand:VF_AVX512 0 "register_operand" "=v")
-	(minus:VF_AVX512
-	  (match_operand:VF_AVX512 1 "register_operand" "v")
-	  (vec_duplicate:VF_AVX512
-	    (match_operand:<ssescalarmode> 2 "memory_operand" "m"))))]
-  "TARGET_AVX512F
-   && ix86_binary_operator_ok (MINUS, <MODE>mode, operands)
-   && <mask_mode512bit_condition>"
-  "vsub<ssemodesuffix>\t{%2<avx512bcst>, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<avx512bcst>}"
-  [(set_attr "prefix" "evex")
-   (set_attr "type" "sseadd")
-   (set_attr "mode" "<MODE>")])
-
-(define_insn "*add<mode>3<mask_name>_bcst"
-  [(set (match_operand:VF_AVX512 0 "register_operand" "=v")
-	(plus:VF_AVX512
-	  (vec_duplicate:VF_AVX512
-	    (match_operand:<ssescalarmode> 1 "memory_operand" "m"))
-	  (match_operand:VF_AVX512 2 "register_operand" "v")))]
-  "TARGET_AVX512F
-   && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)
-   && <mask_mode512bit_condition>"
-  "vadd<ssemodesuffix>\t{%1<avx512bcst>, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1<avx512bcst>}"
-  [(set_attr "prefix" "evex")
-   (set_attr "type" "sseadd")
+   (set_attr "prefix" "<bcst_mask_prefix3>")
    (set_attr "mode" "<MODE>")])
 
 ;; Standard scalar operation patterns which preserve the rest of the
@@ -1846,32 +1818,19 @@
 (define_insn "*mul<mode>3<mask_name><round_name>"
   [(set (match_operand:VF 0 "register_operand" "=x,v")
 	(mult:VF
-	  (match_operand:VF 1 "<round_nimm_predicate>" "%0,v")
-	  (match_operand:VF 2 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
-  "TARGET_SSE
-   && !(MEM_P (operands[1]) && MEM_P (operands[2]))
+	  (match_operand:VF 1 "<bcst_round_nimm_predicate>" "%0,v")
+	  (match_operand:VF 2 "<bcst_round_nimm_predicate>" "xBm,<bcst_round_constraint>")))]
+  "TARGET_SSE && ix86_binary_operator_ok (MULT, <MODE>mode, operands)
    && <mask_mode512bit_condition> && <round_mode512bit_condition>"
   "@
    mul<ssemodesuffix>\t{%2, %0|%0, %2}
    vmul<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
   [(set_attr "isa" "noavx,avx")
    (set_attr "type" "ssemul")
-   (set_attr "prefix" "<mask_prefix3>")
+   (set_attr "prefix" "<bcst_mask_prefix3>")
    (set_attr "btver2_decode" "direct,double")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "*mul<mode>3<mask_name>_bcst"
-  [(set (match_operand:VF_AVX512 0 "register_operand" "=v")
-	(mult:VF_AVX512
-	  (vec_duplicate:VF_AVX512
-	     (match_operand:<ssescalarmode> 1 "memory_operand" "m"))
-	  (match_operand:VF_AVX512 2 "register_operand" "v")))]
-  "TARGET_AVX512F && <mask_mode512bit_condition>"
-  "vmul<ssemodesuffix>\t{%1<avx512bcst>, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1<avx512bcst>}"
-  [(set_attr "prefix" "evex")
-   (set_attr "type" "ssemul")
-   (set_attr "mode" "<MODE>")])
-
 ;; Standard scalar operation patterns which preserve the rest of the
 ;; vector for combiner.
 (define_insn "*<sse>_vm<multdiv_mnemonic><mode>3"
@@ -1943,26 +1902,14 @@
   [(set (match_operand:VF 0 "register_operand" "=x,v")
 	(div:VF
 	  (match_operand:VF 1 "register_operand" "0,v")
-	  (match_operand:VF 2 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
+	  (match_operand:VF 2 "<bcst_round_nimm_predicate>" "xBm,<bcst_round_constraint>")))]
   "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
   "@
    div<ssemodesuffix>\t{%2, %0|%0, %2}
    vdiv<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
   [(set_attr "isa" "noavx,avx")
    (set_attr "type" "ssediv")
-   (set_attr "prefix" "<mask_prefix3>")
-   (set_attr "mode" "<MODE>")])
-
-(define_insn "*<avx512>_div<mode>3<mask_name>_bcst"
-  [(set (match_operand:VF_AVX512 0 "register_operand" "=v")
-	(div:VF_AVX512
-	  (match_operand:VF_AVX512 1 "register_operand" "v")
-	  (vec_duplicate:VF_AVX512
-	     (match_operand:<ssescalarmode> 2 "memory_operand" "m"))))]
-  "TARGET_AVX512F && <mask_mode512bit_condition>"
-  "vdiv<ssemodesuffix>\t{%2<avx512bcst>, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<avx512bcst>}"
-  [(set_attr "prefix" "evex")
-    (set_attr "type" "ssediv")
+   (set_attr "prefix" "<bcst_mask_prefix3>")
    (set_attr "mode" "<MODE>")])
 
 (define_insn "<sse>_rcp<mode>2"
@@ -2861,30 +2808,30 @@
   DONE;
 })
 
-(define_insn "<mask_codefor>reducep<mode><mask_name>"
+(define_insn "<mask_codefor>reducep<mode><mask_name><round_saeonly_name>"
   [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
 	(unspec:VF_AVX512VL
-	  [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")
+	  [(match_operand:VF_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
 	   (match_operand:SI 2 "const_0_to_255_operand")]
 	  UNSPEC_REDUCE))]
   "TARGET_AVX512DQ"
-  "vreduce<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
+  "vreduce<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}"
   [(set_attr "type" "sse")
    (set_attr "prefix" "evex")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "reduces<mode><mask_scalar_name>"
+(define_insn "reduces<mode><mask_scalar_name><round_saeonly_scalar_name>"
   [(set (match_operand:VF_128 0 "register_operand" "=v")
 	(vec_merge:VF_128
 	  (unspec:VF_128
 	    [(match_operand:VF_128 1 "register_operand" "v")
-	     (match_operand:VF_128 2 "nonimmediate_operand" "vm")
+	     (match_operand:VF_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>")
 	     (match_operand:SI 3 "const_0_to_255_operand")]
 	    UNSPEC_REDUCE)
 	  (match_dup 1)
 	  (const_int 1)))]
   "TARGET_AVX512DQ"
-  "vreduce<ssescalarmodesuffix>\t{%3, %2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %<iptr>2, %3}"
+  "vreduce<ssescalarmodesuffix>\t{%3, <round_saeonly_scalar_mask_op4>%2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %<iptr>2<round_saeonly_scalar_mask_op4>, %3}"
   [(set_attr "type" "sse")
    (set_attr "prefix" "evex")
    (set_attr "mode" "<MODE>")])
@@ -4055,9 +4002,9 @@
 (define_insn "<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name><round_name>"
   [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
 	(fma:VF_SF_AVX512VL
-	  (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
-	  (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
-	  (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")))]
+	  (match_operand:VF_SF_AVX512VL 1 "<bcst_round_nimm_predicate>" "%0,0,v")
+	  (match_operand:VF_SF_AVX512VL 2 "<bcst_round_nimm_predicate>" "<bcst_round_constraint>,v,<bcst_round_constraint>")
+	  (match_operand:VF_SF_AVX512VL 3 "<bcst_round_nimm_predicate>" "v,<bcst_round_constraint>,0")))]
   "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
   "@
    vfmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
@@ -4066,46 +4013,6 @@
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "*<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name>_bcst_1"
-  [(set (match_operand:VF_AVX512 0 "register_operand" "=v")
-	(fma:VF_AVX512
-	  (match_operand:VF_AVX512 1 "register_operand" "%0")
-	  (match_operand:VF_AVX512 2 "register_operand" "v")
-	  (vec_duplicate:VF_AVX512
-	    (match_operand:<ssescalarmode> 3 "memory_operand" "m"))))]
-  "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
-  "vfmadd213<ssemodesuffix>\t{%3<avx512bcst>, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<avx512bcst>}"
-  [(set_attr "type" "ssemuladd")
-   (set_attr "mode" "<MODE>")])
-
-(define_insn "*<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name>_bcst_2"
-  [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
-	(fma:VF_AVX512
-	  (vec_duplicate:VF_AVX512
-	    (match_operand:<ssescalarmode> 1 "memory_operand" "m,m"))
-	  (match_operand:VF_AVX512 2 "register_operand" "0,v")
-	  (match_operand:VF_AVX512 3 "register_operand" "v,0")))]
-  "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
-  "@
-   vfmadd132<ssemodesuffix>\t{%1<avx512bcst>, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %1<avx512bcst>}
-   vfmadd231<ssemodesuffix>\t{%1<avx512bcst>, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %1<avx512bcst>}"
-  [(set_attr "type" "ssemuladd")
-   (set_attr "mode" "<MODE>")])
-
-(define_insn "*<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name>_bcst_3"
-  [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
-	(fma:VF_AVX512
-	  (match_operand:VF_AVX512 1 "register_operand" "0,v")
-	  (vec_duplicate:VF_AVX512
-	    (match_operand:<ssescalarmode> 2 "memory_operand" "m,m"))
-	  (match_operand:VF_AVX512 3 "register_operand" "v,0")))]
-  "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
-  "@
-   vfmadd132<ssemodesuffix>\t{%2<avx512bcst>, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<avx512bcst>}
-   vfmadd231<ssemodesuffix>\t{%2<avx512bcst>, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<avx512bcst>}"
-  [(set_attr "type" "ssemuladd")
-   (set_attr "mode" "<MODE>")])
-
 (define_insn "<avx512>_fmadd_<mode>_mask<round_name>"
   [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
 	(vec_merge:VF_AVX512VL
@@ -4171,10 +4078,10 @@
 (define_insn "<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name><round_name>"
   [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
 	(fma:VF_SF_AVX512VL
-	  (match_operand:VF_SF_AVX512VL   1 "<round_nimm_predicate>" "%0,0,v")
-	  (match_operand:VF_SF_AVX512VL   2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
+	  (match_operand:VF_SF_AVX512VL   1 "<bcst_round_nimm_predicate>" "%0,0,v")
+	  (match_operand:VF_SF_AVX512VL   2 "<bcst_round_nimm_predicate>" "<bcst_round_constraint>,v,<bcst_round_constraint>")
 	  (neg:VF_SF_AVX512VL
-	    (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))))]
+	    (match_operand:VF_SF_AVX512VL 3 "<bcst_round_nimm_predicate>" "v,<bcst_round_constraint>,0"))))]
   "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
   "@
    vfmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
@@ -4183,49 +4090,6 @@
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "*<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name>_bcst_1"
-  [(set (match_operand:VF_AVX512 0 "register_operand" "=v")
-	(fma:VF_AVX512
-	  (match_operand:VF_AVX512 1 "register_operand" "%0")
-	  (match_operand:VF_AVX512 2 "register_operand" "v")
-	  (neg:VF_AVX512
-	    (vec_duplicate:VF_AVX512
-	      (match_operand:<ssescalarmode> 3 "memory_operand" "m")))))]
-  "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
-  "vfmsub213<ssemodesuffix>\t{%3<avx512bcst>, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<avx512bcst>}"
-  [(set_attr "type" "ssemuladd")
-   (set_attr "mode" "<MODE>")])
-
-(define_insn "*<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name>_bcst_2"
-  [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
-	(fma:VF_AVX512
-	  (vec_duplicate:VF_AVX512
-	    (match_operand:<ssescalarmode> 1 "memory_operand" "m,m"))
-	  (match_operand:VF_AVX512 2 "register_operand" "0,v")
-	  (neg:VF_AVX512
-	    (match_operand:VF_AVX512 3 "register_operand" "v,0"))))]
-  "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
-  "@
-   vfmsub132<ssemodesuffix>\t{%1<avx512bcst>, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %1<avx512bcst>}
-   vfmsub231<ssemodesuffix>\t{%1<avx512bcst>, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %1<avx512bcst>}"
-  [(set_attr "type" "ssemuladd")
-   (set_attr "mode" "<MODE>")])
-
-(define_insn "*<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name>_bcst_3"
-  [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
-	(fma:VF_AVX512
-	  (match_operand:VF_AVX512 1 "register_operand" "0,v")
-	  (vec_duplicate:VF_AVX512
-	    (match_operand:<ssescalarmode> 2 "memory_operand" "m,m"))
-	  (neg:VF_AVX512
-	    (match_operand:VF_AVX512 3 "nonimmediate_operand" "v,0"))))]
-  "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
-  "@
-   vfmsub132<ssemodesuffix>\t{%2<avx512bcst>, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<avx512bcst>}
-   vfmsub231<ssemodesuffix>\t{%2<avx512bcst>, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<avx512bcst>}"
-  [(set_attr "type" "ssemuladd")
-   (set_attr "mode" "<MODE>")])
-
 (define_insn "<avx512>_fmsub_<mode>_mask<round_name>"
   [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
 	(vec_merge:VF_AVX512VL
@@ -4294,9 +4158,9 @@
   [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
 	(fma:VF_SF_AVX512VL
 	  (neg:VF_SF_AVX512VL
-	    (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v"))
-	  (match_operand:VF_SF_AVX512VL   2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
-	  (match_operand:VF_SF_AVX512VL   3 "<round_nimm_predicate>" "v,<round_constraint>,0")))]
+	    (match_operand:VF_SF_AVX512VL 1 "<bcst_round_nimm_predicate>" "%0,0,v"))
+	  (match_operand:VF_SF_AVX512VL   2 "<bcst_round_nimm_predicate>" "<bcst_round_constraint>,v,<bcst_round_constraint>")
+	  (match_operand:VF_SF_AVX512VL   3 "<bcst_round_nimm_predicate>" "v,<bcst_round_constraint>,0")))]
   "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
   "@
    vfnmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
@@ -4305,49 +4169,6 @@
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "*<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name>_bcst_1"
-  [(set (match_operand:VF_AVX512 0 "register_operand" "=v")
-	(fma:VF_AVX512
-	  (neg:VF_AVX512
-	    (match_operand:VF_AVX512 1 "register_operand" "%0"))
-	  (match_operand:VF_AVX512 2 "register_operand" "v")
-	  (vec_duplicate:VF_AVX512
-	    (match_operand:<ssescalarmode> 3 "memory_operand" "m"))))]
-  "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
-  "vfnmadd213<ssemodesuffix>\t{%3<avx512bcst>, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<avx512bcst>}"
-  [(set_attr "type" "ssemuladd")
-   (set_attr "mode" "<MODE>")])
-
-(define_insn "*<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name>_bcst_2"
-  [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
-	(fma:VF_AVX512
-	  (neg:VF_AVX512
-	    (vec_duplicate:VF_AVX512
-	      (match_operand:<ssescalarmode> 1 "memory_operand" "m,m")))
-	  (match_operand:VF_AVX512 2 "register_operand" "0,v")
-	  (match_operand:VF_AVX512 3 "register_operand" "v,0")))]
-  "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
-  "@
-   vfnmadd132<ssemodesuffix>\t{%1<avx512bcst>, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %1<avx512bcst>}
-   vfnmadd231<ssemodesuffix>\t{%1<avx512bcst>, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %1<avx512bcst>}"
-  [(set_attr "type" "ssemuladd")
-   (set_attr "mode" "<MODE>")])
-
-(define_insn "*<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name>_bcst_3"
-  [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
-	(fma:VF_AVX512
-	  (neg:VF_AVX512
-	    (match_operand:VF_AVX512 1 "register_operand" "0,v"))
-	  (vec_duplicate:VF_AVX512
-	    (match_operand:<ssescalarmode> 2 "memory_operand" "m,m"))
-	  (match_operand:VF_AVX512 3 "register_operand" "v,0")))]
-  "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
-  "@
-   vfnmadd132<ssemodesuffix>\t{%2<avx512bcst>, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<avx512bcst>}
-   vfnmadd231<ssemodesuffix>\t{%2<avx512bcst>, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<avx512bcst>}"
-  [(set_attr "type" "ssemuladd")
-   (set_attr "mode" "<MODE>")])
-
 (define_insn "<avx512>_fnmadd_<mode>_mask<round_name>"
   [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
 	(vec_merge:VF_AVX512VL
@@ -4417,10 +4238,10 @@
   [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
 	(fma:VF_SF_AVX512VL
 	  (neg:VF_SF_AVX512VL
-	    (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v"))
-	  (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
+	    (match_operand:VF_SF_AVX512VL 1 "<bcst_round_nimm_predicate>" "%0,0,v"))
+	  (match_operand:VF_SF_AVX512VL 2 "<bcst_round_nimm_predicate>" "<bcst_round_constraint>,v,<bcst_round_constraint>")
 	  (neg:VF_SF_AVX512VL
-	    (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))))]
+	    (match_operand:VF_SF_AVX512VL 3 "<bcst_round_nimm_predicate>" "v,<bcst_round_constraint>,0"))))]
   "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
   "@
    vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
@@ -4429,52 +4250,6 @@
   [(set_attr "type" "ssemuladd")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "*<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name>_bcst_1"
-  [(set (match_operand:VF_AVX512 0 "register_operand" "=v")
-	(fma:VF_AVX512
-	  (neg:VF_AVX512
-	    (match_operand:VF_AVX512 1 "register_operand" "%0"))
-	  (match_operand:VF_AVX512 2 "register_operand" "v")
-	  (neg:VF_AVX512
-	    (vec_duplicate:VF_AVX512
-	      (match_operand:<ssescalarmode> 3 "memory_operand" "m")))))]
-  "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
-  "vfnmsub213<ssemodesuffix>\t{%3<avx512bcst>, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<avx512bcst>}"
-  [(set_attr "type" "ssemuladd")
-   (set_attr "mode" "<MODE>")])
-
-(define_insn "*<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name>_bcst_2"
-  [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
-	(fma:VF_AVX512
-	  (neg:VF_AVX512
-	    (vec_duplicate:VF_AVX512
-	      (match_operand:<ssescalarmode> 1 "memory_operand" "m,m")))
-	  (match_operand:VF_AVX512 2 "register_operand" "0,v")
-	  (neg:VF_AVX512
-	    (match_operand:VF_AVX512 3 "register_operand" "v,0"))))]
-  "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
-  "@
-   vfnmsub132<ssemodesuffix>\t{%1<avx512bcst>, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %1<avx512bcst>}
-   vfnmsub231<ssemodesuffix>\t{%1<avx512bcst>, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %1<avx512bcst>}"
-  [(set_attr "type" "ssemuladd")
-   (set_attr "mode" "<MODE>")])
-
-(define_insn "*<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name>_bcst_3"
-  [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
-	(fma:VF_AVX512
-	  (neg:VF_AVX512
-	    (match_operand:VF_AVX512 1 "register_operand" "0,v"))
-	  (vec_duplicate:VF_AVX512
-	    (match_operand:<ssescalarmode> 2 "memory_operand" "m,m"))
-	  (neg:VF_AVX512
-	    (match_operand:VF_AVX512 3 "register_operand" "v,0"))))]
-  "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
-  "@
-   vfnmsub132<ssemodesuffix>\t{%2<avx512bcst>, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<avx512bcst>}
-   vfnmsub231<ssemodesuffix>\t{%2<avx512bcst>, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<avx512bcst>}"
-  [(set_attr "type" "ssemuladd")
-   (set_attr "mode" "<MODE>")])
-
 (define_insn "<avx512>_fnmsub_<mode>_mask<round_name>"
   [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
 	(vec_merge:VF_AVX512VL
@@ -6374,7 +6149,7 @@
    (set_attr "prefix" "evex")
    (set_attr "mode" "TI")])
 
-(define_insn "sse2_cvtsd2ss<round_name>"
+(define_insn "sse2_cvtsd2ss<mask_name><round_name>"
   [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
 	(vec_merge:V4SF
 	  (vec_duplicate:V4SF
@@ -6386,7 +6161,7 @@
   "@
    cvtsd2ss\t{%2, %0|%0, %2}
    cvtsd2ss\t{%2, %0|%0, %q2}
-   vcvtsd2ss\t{<round_op3>%2, %1, %0|%0, %1, %q2<round_op3>}"
+   vcvtsd2ss\t{<round_mask_op3>%2, %1, %0<mask_operand3>|<mask_operand3>%0, %1, %q2<round_mask_op3>}"
   [(set_attr "isa" "noavx,noavx,avx")
    (set_attr "type" "ssecvt")
    (set_attr "athlon_decode" "vector,double,*")
@@ -6417,7 +6192,7 @@
    (set_attr "prefix" "orig,orig,vex")
    (set_attr "mode" "SF")])
 
-(define_insn "sse2_cvtss2sd<round_saeonly_name>"
+(define_insn "sse2_cvtss2sd<mask_name><round_saeonly_name>"
   [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
 	(vec_merge:V2DF
 	  (float_extend:V2DF
@@ -6430,7 +6205,7 @@
   "@
    cvtss2sd\t{%2, %0|%0, %2}
    cvtss2sd\t{%2, %0|%0, %k2}
-   vcvtss2sd\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %k2<round_saeonly_op3>}"
+   vcvtss2sd\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|<mask_operand3>%0, %1, %k2<round_saeonly_mask_op3>}"
   [(set_attr "isa" "noavx,noavx,avx")
    (set_attr "type" "ssecvt")
    (set_attr "amdfam10_decode" "vector,double,*")
@@ -11563,8 +11338,8 @@
 (define_insn "*<plusminus_insn><mode>3"
   [(set (match_operand:VI_AVX2 0 "register_operand" "=x,v")
 	(plusminus:VI_AVX2
-	  (match_operand:VI_AVX2 1 "vector_operand" "<comm>0,v")
-	  (match_operand:VI_AVX2 2 "vector_operand" "xBm,vm")))]
+	  (match_operand:VI_AVX2 1 "bcst_vector_operand" "<comm>0,v")
+	  (match_operand:VI_AVX2 2 "bcst_vector_operand" "xBm,vmBr")))]
   "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
   "@
    p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
@@ -11572,31 +11347,7 @@
   [(set_attr "isa" "noavx,avx")
    (set_attr "type" "sseiadd")
    (set_attr "prefix_data16" "1,*")
-   (set_attr "prefix" "orig,vex")
-   (set_attr "mode" "<sseinsnmode>")])
-
-(define_insn "*sub<mode>3_bcst"
-  [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
-	(minus:VI48_AVX512VL
-	  (match_operand:VI48_AVX512VL 1 "register_operand" "v")
-	  (vec_duplicate:VI48_AVX512VL
-	    (match_operand:<ssescalarmode> 2 "memory_operand" "m"))))]
-  "TARGET_AVX512F && ix86_binary_operator_ok (MINUS, <MODE>mode, operands)"
-  "vpsub<ssemodesuffix>\t{%2<avx512bcst>, %1, %0|%0, %1, %2<avx512bcst>}"
-  [(set_attr "type" "sseiadd")
-   (set_attr "prefix" "evex")
-   (set_attr "mode" "<sseinsnmode>")])
-
-(define_insn "*add<mode>3_bcst"
-  [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
-	(plus:VI48_AVX512VL
-	  (vec_duplicate:VI48_AVX512VL
-	    (match_operand:<ssescalarmode> 1 "memory_operand" "m"))
-	  (match_operand:VI48_AVX512VL 2 "register_operand" "v")))]
-  "TARGET_AVX512F && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
-  "vpadd<ssemodesuffix>\t{%1<avx512bcst>, %2, %0|%0, %2, %1<avx512bcst>}"
-  [(set_attr "type" "sseiadd")
-   (set_attr "prefix" "evex")
+   (set_attr "prefix" "orig,maybe_evex")
    (set_attr "mode" "<sseinsnmode>")])
 
 (define_insn "*<plusminus_insn><mode>3_mask"
@@ -12110,24 +11861,13 @@
    (set_attr "mode" "TI")])
 
 (define_insn "avx512dq_mul<mode>3<mask_name>"
-  [(set (match_operand:VI8 0 "register_operand" "=v")
-	(mult:VI8
-	  (match_operand:VI8 1 "register_operand" "v")
-	  (match_operand:VI8 2 "nonimmediate_operand" "vm")))]
-  "TARGET_AVX512DQ && <mask_mode512bit_condition>"
-  "vpmullq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
-  [(set_attr "type" "sseimul")
-   (set_attr "prefix" "evex")
-   (set_attr "mode" "<sseinsnmode>")])
-
-(define_insn "*avx512dq_mul<mode>3<mask_name>_bcst"
   [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
 	(mult:VI8_AVX512VL
-	  (vec_duplicate:VI8_AVX512VL
-	    (match_operand:<ssescalarmode> 1 "memory_operand" "m"))
-	  (match_operand:VI8_AVX512VL 2 "register_operand" "v")))]
-  "TARGET_AVX512DQ"
-  "vpmullq\t{%1<avx512bcst>, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1<avx512bcst>}"
+	  (match_operand:VI8_AVX512VL 1 "bcst_vector_operand" "%v")
+	  (match_operand:VI8_AVX512VL 2 "bcst_vector_operand" "vmBr")))]
+  "TARGET_AVX512DQ && <mask_mode512bit_condition>
+  && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
+  "vpmullq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
   [(set_attr "type" "sseimul")
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
@@ -12157,10 +11897,10 @@
 (define_insn "*<sse4_1_avx2>_mul<mode>3<mask_name>"
   [(set (match_operand:VI4_AVX512F 0 "register_operand" "=Yr,*x,v")
 	(mult:VI4_AVX512F
-	  (match_operand:VI4_AVX512F 1 "vector_operand" "%0,0,v")
-	  (match_operand:VI4_AVX512F 2 "vector_operand" "YrBm,*xBm,vm")))]
-  "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))
-   && <mask_mode512bit_condition>"
+	  (match_operand:VI4_AVX512F 1 "bcst_vector_operand" "%0,0,v")
+	  (match_operand:VI4_AVX512F 2 "bcst_vector_operand" "YrBm,*xBm,vmBr")))]
+  "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, <MODE>mode, operands)
+  && <mask_mode512bit_condition>"
   "@
    pmulld\t{%2, %0|%0, %2}
    pmulld\t{%2, %0|%0, %2}
@@ -12168,22 +11908,10 @@
   [(set_attr "isa" "noavx,noavx,avx")
    (set_attr "type" "sseimul")
    (set_attr "prefix_extra" "1")
-   (set_attr "prefix" "<mask_prefix4>")
+   (set_attr "prefix" "<bcst_mask_prefix4>")
    (set_attr "btver2_decode" "vector,vector,vector")
    (set_attr "mode" "<sseinsnmode>")])
 
-(define_insn "*avx512f_mul<mode>3<mask_name>_bcst"
-  [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
-	(mult:VI4_AVX512VL
-	  (vec_duplicate:VI4_AVX512VL
-	    (match_operand:<ssescalarmode> 1 "memory_operand" "m"))
-	  (match_operand:VI4_AVX512VL 2 "register_operand" "v")))]
-  "TARGET_AVX512F"
-   "vpmulld\t{%1<avx512bcst>, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1<avx512bcst>}"
-  [(set_attr "type" "sseimul")
-   (set_attr "prefix" "evex")
-   (set_attr "mode" "<sseinsnmode>")])
-
 (define_expand "mul<mode>3"
   [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand")
 	(mult:VI8_AVX2_AVX512F
@@ -13210,7 +12938,7 @@
   [(set (match_operand:VI 0 "register_operand" "=x,x,v")
 	(and:VI
 	  (not:VI (match_operand:VI 1 "register_operand" "0,x,v"))
-	  (match_operand:VI 2 "vector_operand" "xBm,xm,vm")))]
+	  (match_operand:VI 2 "bcst_vector_operand" "xBm,xm,vmBr")))]
   "TARGET_SSE"
 {
   char buf[64];
@@ -13309,19 +13037,6 @@
 	      ]
 	      (const_string "<sseinsnmode>")))])
 
-(define_insn "*andnot<mode>3_bcst"
-  [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
-	(and:VI48_AVX512VL
-	  (not:VI48_AVX512VL
-	     (match_operand:VI48_AVX512VL 1 "register_operand" "v"))
-	  (vec_duplicate:VI48_AVX512VL
-	    (match_operand:<ssescalarmode> 2 "memory_operand" "m"))))]
-  "TARGET_AVX512F"
-  "vpandn<ssemodesuffix>\t{%2<avx512bcst>, %1, %0|%0, %1, %2<avx512bcst>}"
-  [(set_attr "type" "sselog")
-   (set_attr "prefix" "evex")
-   (set_attr "mode" "<sseinsnmode>")])
-
 (define_insn "*andnot<mode>3_mask"
   [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
 	(vec_merge:VI48_AVX512VL
@@ -13351,10 +13066,10 @@
 (define_insn "<mask_codefor><code><mode>3<mask_name>"
   [(set (match_operand:VI48_AVX_AVX512F 0 "register_operand" "=x,x,v")
 	(any_logic:VI48_AVX_AVX512F
-	  (match_operand:VI48_AVX_AVX512F 1 "vector_operand" "%0,x,v")
-	  (match_operand:VI48_AVX_AVX512F 2 "vector_operand" "xBm,xm,vm")))]
+	  (match_operand:VI48_AVX_AVX512F 1 "bcst_vector_operand" "%0,x,v")
+	  (match_operand:VI48_AVX_AVX512F 2 "bcst_vector_operand" "xBm,xm,vmBr")))]
   "TARGET_SSE && <mask_mode512bit_condition>
-   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+   && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
 {
   char buf[64];
   const char *ops;
@@ -13540,18 +13255,6 @@
 	      ]
 	      (const_string "<sseinsnmode>")))])
 
-(define_insn "*<code><mode>3_bcst"
-  [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
-	(any_logic:VI48_AVX512VL
-	  (vec_duplicate:VI48_AVX512VL
-	    (match_operand:<ssescalarmode> 1 "memory_operand" "m"))
-	  (match_operand:VI48_AVX512VL 2 "register_operand" "v")))]
-  "TARGET_AVX512F && <mask_avx512vl_condition>"
-  "vp<logic><ssemodesuffix>\t{%1<avx512bcst>, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1<avx512bcst>}"
-  [(set_attr "type" "sseiadd")
-   (set_attr "prefix" "evex")
-   (set_attr "mode" "<sseinsnmode>")])
-
 (define_mode_iterator VI1248_AVX512VLBW
   [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX512VL && TARGET_AVX512BW")
    (V16QI "TARGET_AVX512VL && TARGET_AVX512BW")
@@ -19092,7 +18795,7 @@
    (set_attr "type" "sse")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "avx512er_vmrcp28<mode><round_saeonly_name>"
+(define_insn "avx512er_vmrcp28<mode><mask_name><round_saeonly_name>"
   [(set (match_operand:VF_128 0 "register_operand" "=v")
 	(vec_merge:VF_128
 	  (unspec:VF_128
@@ -19101,7 +18804,7 @@
 	  (match_operand:VF_128 2 "register_operand" "v")
 	  (const_int 1)))]
   "TARGET_AVX512ER"
-  "vrcp28<ssescalarmodesuffix>\t{<round_saeonly_op3>%1, %2, %0|%0, %2, %<iptr>1<round_saeonly_op3>}"
+  "vrcp28<ssescalarmodesuffix>\t{<round_saeonly_mask_op3>%1, %2, %0<mask_operand3>|<mask_opernad3>%0, %2, %<iptr>1<round_saeonly_mask_op3>}"
   [(set_attr "length_immediate" "1")
    (set_attr "prefix" "evex")
    (set_attr "type" "sse")
@@ -19118,7 +18821,7 @@
    (set_attr "type" "sse")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "avx512er_vmrsqrt28<mode><round_saeonly_name>"
+(define_insn "avx512er_vmrsqrt28<mode><mask_name><round_saeonly_name>"
   [(set (match_operand:VF_128 0 "register_operand" "=v")
 	(vec_merge:VF_128
 	  (unspec:VF_128
@@ -19127,7 +18830,7 @@
 	  (match_operand:VF_128 2 "register_operand" "v")
 	  (const_int 1)))]
   "TARGET_AVX512ER"
-  "vrsqrt28<ssescalarmodesuffix>\t{<round_saeonly_op3>%1, %2, %0|%0, %2, %<iptr>1<round_saeonly_op3>}"
+  "vrsqrt28<ssescalarmodesuffix>\t{<round_saeonly_mask_op3>%1, %2, %0<mask_operand3>|<mask_operand3>%0, %2, %<iptr>1<round_saeonly_mask_op3>}"
   [(set_attr "length_immediate" "1")
    (set_attr "type" "sse")
    (set_attr "prefix" "evex")
diff --git a/gcc/config/i386/subst.md b/gcc/config/i386/subst.md
index 58ea9dc..e037a96 100644
--- a/gcc/config/i386/subst.md
+++ b/gcc/config/i386/subst.md
@@ -60,7 +60,9 @@
 (define_subst_attr "mask_prefix" "mask" "vex" "evex")
 (define_subst_attr "mask_prefix2" "mask" "maybe_vex" "evex")
 (define_subst_attr "mask_prefix3" "mask" "orig,vex" "evex,evex")
+(define_subst_attr "bcst_mask_prefix3" "mask" "orig,maybe_evex" "evex,evex")
 (define_subst_attr "mask_prefix4" "mask" "orig,orig,vex" "evex,evex,evex")
+(define_subst_attr "bcst_mask_prefix4" "mask" "orig,orig,maybe_evex" "evex,evex,evex")
 (define_subst_attr "mask_expand_op3" "mask" "3" "5")
 
 (define_subst "mask"
@@ -130,9 +132,11 @@
 (define_subst_attr "round_mask_op4" "round" "" "<round_mask_operand4>")
 (define_subst_attr "round_sd_mask_op4" "round" "" "<round_sd_mask_operand4>")
 (define_subst_attr "round_constraint" "round" "vm" "v")
+(define_subst_attr "bcst_round_constraint" "round" "vmBr" "v")
 (define_subst_attr "round_constraint2" "round" "m" "v")
 (define_subst_attr "round_constraint3" "round" "rm" "r")
 (define_subst_attr "round_nimm_predicate" "round" "vector_operand" "register_operand")
+(define_subst_attr "bcst_round_nimm_predicate" "round" "bcst_vector_operand" "register_operand")
 (define_subst_attr "round_nimm_scalar_predicate" "round" "nonimmediate_operand" "register_operand")
 (define_subst_attr "round_prefix" "round" "vex" "evex")
 (define_subst_attr "round_mode512bit_condition" "round" "1" "(<MODE>mode == V16SFmode
diff --git a/gcc/config/i386/t-i386 b/gcc/config/i386/t-i386
index 5134e08..e5fb061 100644
--- a/gcc/config/i386/t-i386
+++ b/gcc/config/i386/t-i386
@@ -17,7 +17,8 @@
 # <http://www.gnu.org/licenses/>.
 
 OPTIONS_H_EXTRA += $(srcdir)/config/i386/stringop.def
-TM_H += $(srcdir)/config/i386/x86-tune.def
+TM_H += $(srcdir)/config/i386/x86-tune.def \
+	$(srcdir)/common/config/i386/i386-cpuinfo.h
 PASSES_EXTRA += $(srcdir)/config/i386/i386-passes.def
 
 i386-c.o: $(srcdir)/config/i386/i386-c.c
diff --git a/gcc/config/i386/t-rtems b/gcc/config/i386/t-rtems
index 7626970..5f078c6 100644
--- a/gcc/config/i386/t-rtems
+++ b/gcc/config/i386/t-rtems
@@ -17,10 +17,10 @@
 # <http://www.gnu.org/licenses/>.
 #
 
-MULTILIB_OPTIONS = mtune=i486/mtune=pentium/mtune=pentiumpro msoft-float
+MULTILIB_OPTIONS = march=i486/march=pentium/march=pentiumpro msoft-float
 MULTILIB_DIRNAMES= m486 mpentium mpentiumpro soft-float
 MULTILIB_MATCHES = msoft-float=mno-80387
-MULTILIB_MATCHES += mtune?pentium=mtune?k6 mtune?pentiumpro=mtune?athlon
+MULTILIB_MATCHES += march?pentium=march?k6 march?pentiumpro=march?athlon
 MULTILIB_EXCEPTIONS = \
-mtune=pentium/*msoft-float* \
-mtune=pentiumpro/*msoft-float*
+march=pentium/*msoft-float* \
+march=pentiumpro/*msoft-float*
diff --git a/gcc/config/i386/tbmintrin.h b/gcc/config/i386/tbmintrin.h
index c8a9d77..e03bf91 100644
--- a/gcc/config/i386/tbmintrin.h
+++ b/gcc/config/i386/tbmintrin.h
@@ -21,8 +21,8 @@
    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
    <http://www.gnu.org/licenses/>.  */
 
-#ifndef _X86INTRIN_H_INCLUDED
-# error "Never use <tbmintrin.h> directly; include <x86intrin.h> instead."
+#ifndef _X86GPRINTRIN_H_INCLUDED
+# error "Never use <tbmintrin.h> directly; include <x86gprintrin.h> instead."
 #endif
 
 #ifndef _TBMINTRIN_H_INCLUDED
diff --git a/gcc/config/i386/tsxldtrkintrin.h b/gcc/config/i386/tsxldtrkintrin.h
index 08b76a9..eab36d0 100644
--- a/gcc/config/i386/tsxldtrkintrin.h
+++ b/gcc/config/i386/tsxldtrkintrin.h
@@ -1,5 +1,28 @@
-#if !defined _IMMINTRIN_H_INCLUDED
-#error "Never use <tsxldtrkintrin.h> directly; include <immintrin.h> instead."
+/* Copyright (C) 2020 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _X86GPRINTRIN_H_INCLUDED
+# error "Never use <tsxldtrkintrin.h> directly; include <x86gprintrin.h> instead."
 #endif
 
 #ifndef _TSXLDTRKINTRIN_H_INCLUDED
diff --git a/gcc/config/i386/uintrintrin.h b/gcc/config/i386/uintrintrin.h
new file mode 100644
index 0000000..991f642
--- /dev/null
+++ b/gcc/config/i386/uintrintrin.h
@@ -0,0 +1,87 @@
+/* Copyright (C) 2020 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _X86GPRINTRIN_H_INCLUDED
+# error "Never use <uintrintrin.h> directly; include <x86gprintrin.h> instead."
+#endif
+
+#ifndef _UINTRNTRIN_H_INCLUDED
+#define _UINTRNTRIN_H_INCLUDED
+
+#ifdef __x86_64__
+
+#ifndef __UINTR__
+#pragma GCC push_options
+#pragma GCC target ("uintr")
+#define __DISABLE_UINTR__
+#endif /* __UINTR__ */
+
+struct __uintr_frame
+{
+  /* The position of the most significant bit set in user-interrupt
+     request register.  */
+  unsigned long long uirrv;
+  /* RIP of the interrupted user process.  */
+  unsigned long long rip;
+  /* RFLAGS of the interrupted user process.  */
+  unsigned long long rflags;
+  /* RSP of the interrupted user process.  */
+  unsigned long long rsp;
+};
+
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_clui (void)
+{
+  __builtin_ia32_clui ();
+}
+
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_stui (void)
+{
+  __builtin_ia32_stui ();
+}
+
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_senduipi (unsigned long long __R)
+{
+  __builtin_ia32_senduipi (__R);
+}
+
+extern __inline unsigned char
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_testui (void)
+{
+  return __builtin_ia32_testui ();
+}
+
+#ifdef __DISABLE_UINTR__
+#undef __DISABLE_UINTR__
+#pragma GCC pop_options
+#endif /* __DISABLE_UINTR__ */
+
+#endif
+
+#endif /* _UINTRNTRIN_H_INCLUDED.  */
diff --git a/gcc/config/i386/waitpkgintrin.h b/gcc/config/i386/waitpkgintrin.h
index 5dbcde3..5046c98 100644
--- a/gcc/config/i386/waitpkgintrin.h
+++ b/gcc/config/i386/waitpkgintrin.h
@@ -21,8 +21,8 @@
    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
    <http://www.gnu.org/licenses/>.  */
 
-#if !defined _IMMINTRIN_H_INCLUDED
-# error "Never use <waitpkgintrin.h> directly; include <immintrin.h> instead."
+#ifndef _X86GPRINTRIN_H_INCLUDED
+# error "Never use <waitpkgintrin.h> directly; include <x86gprintrin.h> instead."
 #endif
 
 #ifndef _WAITPKG_H_INCLUDED
diff --git a/gcc/config/i386/wbnoinvdintrin.h b/gcc/config/i386/wbnoinvdintrin.h
index 5393698..7089e61 100644
--- a/gcc/config/i386/wbnoinvdintrin.h
+++ b/gcc/config/i386/wbnoinvdintrin.h
@@ -1,5 +1,28 @@
-#ifndef _IMMINTRIN_H_INCLUDED
-#error "Never use <wbnoinvdintrin.h> directly; include <immintrin.h> instead."
+/* Copyright (C) 2018-2020 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _X86GPRINTRIN_H_INCLUDED
+# error "Never use <wbnoinvdintrin.h> directly; include <x86gprintrin.h> instead."
 #endif
 
 #ifndef _WBNOINVDINTRIN_H_INCLUDED
diff --git a/gcc/config/i386/x86gprintrin.h b/gcc/config/i386/x86gprintrin.h
new file mode 100644
index 0000000..ffe07e4
--- /dev/null
+++ b/gcc/config/i386/x86gprintrin.h
@@ -0,0 +1,256 @@
+/* Copyright (C) 2020 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _X86GPRINTRIN_H_INCLUDED
+#define _X86GPRINTRIN_H_INCLUDED
+
+#include <ia32intrin.h>
+
+#ifndef __iamcu__
+
+#include <stddef.h>
+
+#include <adxintrin.h>
+
+#include <bmiintrin.h>
+
+#include <bmi2intrin.h>
+
+#include <cetintrin.h>
+
+#include <cldemoteintrin.h>
+
+#include <clflushoptintrin.h>
+
+#include <clwbintrin.h>
+
+#include <clzerointrin.h>
+
+#include <enqcmdintrin.h>
+
+#include <fxsrintrin.h>
+
+#include <lzcntintrin.h>
+
+#include <lwpintrin.h>
+
+#include <movdirintrin.h>
+
+#include <mwaitxintrin.h>
+
+#include <pconfigintrin.h>
+
+#include <popcntintrin.h>
+
+#include <pkuintrin.h>
+
+#include <rdseedintrin.h>
+
+#include <rtmintrin.h>
+
+#include <serializeintrin.h>
+
+#include <sgxintrin.h>
+
+#include <tbmintrin.h>
+
+#include <tsxldtrkintrin.h>
+
+#include <uintrintrin.h>
+
+#include <waitpkgintrin.h>
+
+#include <wbnoinvdintrin.h>
+
+#include <xsaveintrin.h>
+
+#include <xsavecintrin.h>
+
+#include <xsaveoptintrin.h>
+
+#include <xsavesintrin.h>
+
+#include <xtestintrin.h>
+
+#include <hresetintrin.h>
+
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_wbinvd (void)
+{
+  __builtin_ia32_wbinvd ();
+}
+
+#ifndef __RDRND__
+#pragma GCC push_options
+#pragma GCC target("rdrnd")
+#define __DISABLE_RDRND__
+#endif /* __RDRND__ */
+extern __inline int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_rdrand16_step (unsigned short *__P)
+{
+  return __builtin_ia32_rdrand16_step (__P);
+}
+
+extern __inline int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_rdrand32_step (unsigned int *__P)
+{
+  return __builtin_ia32_rdrand32_step (__P);
+}
+#ifdef __DISABLE_RDRND__
+#undef __DISABLE_RDRND__
+#pragma GCC pop_options
+#endif /* __DISABLE_RDRND__ */
+
+#ifndef __RDPID__
+#pragma GCC push_options
+#pragma GCC target("rdpid")
+#define __DISABLE_RDPID__
+#endif /* __RDPID__ */
+extern __inline unsigned int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_rdpid_u32 (void)
+{
+  return __builtin_ia32_rdpid ();
+}
+#ifdef __DISABLE_RDPID__
+#undef __DISABLE_RDPID__
+#pragma GCC pop_options
+#endif /* __DISABLE_RDPID__ */
+
+#ifdef  __x86_64__
+
+#ifndef __FSGSBASE__
+#pragma GCC push_options
+#pragma GCC target("fsgsbase")
+#define __DISABLE_FSGSBASE__
+#endif /* __FSGSBASE__ */
+extern __inline unsigned int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_readfsbase_u32 (void)
+{
+  return __builtin_ia32_rdfsbase32 ();
+}
+
+extern __inline unsigned long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_readfsbase_u64 (void)
+{
+  return __builtin_ia32_rdfsbase64 ();
+}
+
+extern __inline unsigned int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_readgsbase_u32 (void)
+{
+  return __builtin_ia32_rdgsbase32 ();
+}
+
+extern __inline unsigned long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_readgsbase_u64 (void)
+{
+  return __builtin_ia32_rdgsbase64 ();
+}
+
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_writefsbase_u32 (unsigned int __B)
+{
+  __builtin_ia32_wrfsbase32 (__B);
+}
+
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_writefsbase_u64 (unsigned long long __B)
+{
+  __builtin_ia32_wrfsbase64 (__B);
+}
+
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_writegsbase_u32 (unsigned int __B)
+{
+  __builtin_ia32_wrgsbase32 (__B);
+}
+
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_writegsbase_u64 (unsigned long long __B)
+{
+  __builtin_ia32_wrgsbase64 (__B);
+}
+#ifdef __DISABLE_FSGSBASE__
+#undef __DISABLE_FSGSBASE__
+#pragma GCC pop_options
+#endif /* __DISABLE_FSGSBASE__ */
+
+#ifndef __RDRND__
+#pragma GCC push_options
+#pragma GCC target("rdrnd")
+#define __DISABLE_RDRND__
+#endif /* __RDRND__ */
+extern __inline int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_rdrand64_step (unsigned long long *__P)
+{
+  return __builtin_ia32_rdrand64_step (__P);
+}
+#ifdef __DISABLE_RDRND__
+#undef __DISABLE_RDRND__
+#pragma GCC pop_options
+#endif /* __DISABLE_RDRND__ */
+
+#endif /* __x86_64__  */
+
+#ifndef __PTWRITE__
+#pragma GCC push_options
+#pragma GCC target("ptwrite")
+#define __DISABLE_PTWRITE__
+#endif
+
+#ifdef __x86_64__
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_ptwrite64 (unsigned long long __B)
+{
+  __builtin_ia32_ptwrite64 (__B);
+}
+#endif /* __x86_64__ */
+
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_ptwrite32 (unsigned __B)
+{
+  __builtin_ia32_ptwrite32 (__B);
+}
+#ifdef __DISABLE_PTWRITE__
+#undef __DISABLE_PTWRITE__
+#pragma GCC pop_options
+#endif /* __DISABLE_PTWRITE__ */
+
+#endif /* __iamcu__ */
+
+#endif /* _X86GPRINTRIN_H_INCLUDED.  */
diff --git a/gcc/config/i386/x86intrin.h b/gcc/config/i386/x86intrin.h
index 59fdceb..bc6cb40 100644
--- a/gcc/config/i386/x86intrin.h
+++ b/gcc/config/i386/x86intrin.h
@@ -24,7 +24,7 @@
 #ifndef _X86INTRIN_H_INCLUDED
 #define _X86INTRIN_H_INCLUDED
 
-#include <ia32intrin.h>
+#include <x86gprintrin.h>
 
 #ifndef __iamcu__
 
@@ -37,16 +37,6 @@
 
 #include <xopintrin.h>
 
-#include <lwpintrin.h>
-
-#include <tbmintrin.h>
-
-#include <popcntintrin.h>
-
-#include <mwaitxintrin.h>
-
-#include <clzerointrin.h>
-
 #endif /* __iamcu__ */
 
 #endif /* _X86INTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/xsavecintrin.h b/gcc/config/i386/xsavecintrin.h
index 039e215..06c9f36 100644
--- a/gcc/config/i386/xsavecintrin.h
+++ b/gcc/config/i386/xsavecintrin.h
@@ -21,8 +21,8 @@
    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
    <http://www.gnu.org/licenses/>.  */
 
-#if !defined _IMMINTRIN_H_INCLUDED
-# error "Never use <xsavecintrin.h> directly; include <immintrin.h> instead."
+#ifndef _X86GPRINTRIN_H_INCLUDED
+# error "Never use <xsavecintrin.h> directly; include <x86gprintrin.h> instead."
 #endif
 
 #ifndef _XSAVECINTRIN_H_INCLUDED
diff --git a/gcc/config/i386/xsaveintrin.h b/gcc/config/i386/xsaveintrin.h
index 9f0b8bb..f9cac0d 100644
--- a/gcc/config/i386/xsaveintrin.h
+++ b/gcc/config/i386/xsaveintrin.h
@@ -21,8 +21,8 @@
    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
    <http://www.gnu.org/licenses/>.  */
 
-#if !defined _IMMINTRIN_H_INCLUDED
-# error "Never use <xsaveintrin.h> directly; include <immintrin.h> instead."
+#ifndef _X86GPRINTRIN_H_INCLUDED
+# error "Never use <xsaveintrin.h> directly; include <x86gprintrin.h> instead."
 #endif
 
 #ifndef _XSAVEINTRIN_H_INCLUDED
diff --git a/gcc/config/i386/xsaveoptintrin.h b/gcc/config/i386/xsaveoptintrin.h
index 9da3297..4f2756b 100644
--- a/gcc/config/i386/xsaveoptintrin.h
+++ b/gcc/config/i386/xsaveoptintrin.h
@@ -21,8 +21,8 @@
    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
    <http://www.gnu.org/licenses/>.  */
 
-#if !defined _IMMINTRIN_H_INCLUDED
-# error "Never use <xsaveoptintrin.h> directly; include <immintrin.h> instead."
+#ifndef _X86GPRINTRIN_H_INCLUDED
+# error "Never use <xsaveoptintrin.h> directly; include <x86gprintrin.h> instead."
 #endif
 
 #ifndef _XSAVEOPTINTRIN_H_INCLUDED
diff --git a/gcc/config/i386/xsavesintrin.h b/gcc/config/i386/xsavesintrin.h
index 264f1c4..629a1f3 100644
--- a/gcc/config/i386/xsavesintrin.h
+++ b/gcc/config/i386/xsavesintrin.h
@@ -21,8 +21,8 @@
    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
    <http://www.gnu.org/licenses/>.  */
 
-#if !defined _IMMINTRIN_H_INCLUDED
-# error "Never use <xsavesintrin.h> directly; include <immintrin.h> instead."
+#ifndef _X86GPRINTRIN_H_INCLUDED
+# error "Never use <xsavesintrin.h> directly; include <x86gprintrin.h> instead."
 #endif
 
 #ifndef _XSAVESINTRIN_H_INCLUDED
diff --git a/gcc/config/i386/xtestintrin.h b/gcc/config/i386/xtestintrin.h
index cb187e4..757cc34 100644
--- a/gcc/config/i386/xtestintrin.h
+++ b/gcc/config/i386/xtestintrin.h
@@ -21,8 +21,8 @@
    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
    <http://www.gnu.org/licenses/>.  */
 
-#ifndef _IMMINTRIN_H_INCLUDED
-# error "Never use <xtestintrin.h> directly; include <immintrin.h> instead."
+#ifndef _X86GPRINTRIN_H_INCLUDED
+# error "Never use <xtestintrin.h> directly; include <x86gprintrin.h> instead."
 #endif
 
 #ifndef _XTESTINTRIN_H_INCLUDED
diff --git a/gcc/config/linux-protos.h b/gcc/config/linux-protos.h
index 3759187..c52778b 100644
--- a/gcc/config/linux-protos.h
+++ b/gcc/config/linux-protos.h
@@ -19,4 +19,4 @@ along with GCC; see the file COPYING3.  If not see
 
 extern bool linux_has_ifunc_p (void);
 
-extern bool linux_libc_has_function (enum function_class fn_class);
+extern bool linux_libc_has_function (enum function_class fn_class, tree);
diff --git a/gcc/config/linux.c b/gcc/config/linux.c
index 9876153..83ffff4 100644
--- a/gcc/config/linux.c
+++ b/gcc/config/linux.c
@@ -25,7 +25,8 @@ along with GCC; see the file COPYING3.  If not see
 #include "linux-protos.h"
 
 bool
-linux_libc_has_function (enum function_class fn_class)
+linux_libc_has_function (enum function_class fn_class,
+			 tree type ATTRIBUTE_UNUSED)
 {
   if (OPTION_GLIBC || OPTION_MUSL)
     return true;
diff --git a/gcc/config/msp430/msp430.md b/gcc/config/msp430/msp430.md
index f70e61b..ad244bb 100644
--- a/gcc/config/msp430/msp430.md
+++ b/gcc/config/msp430/msp430.md
@@ -1346,12 +1346,12 @@
 ;; instructions, so we provide a pattern to support it here.
 (define_insn "andneghi3"
   [(set (match_operand:HI                 0 "register_operand" "=r")
-	(and:HI (neg:HI (match_operand:HI 1 "register_operand"  "r"))
+	(and:HI (neg:HI (match_operand:HI 1 "general_operand"  "rm"))
 		(match_operand            2 "immediate_operand" "n")))]
   ""
   "*
     if (REGNO (operands[0]) != REGNO (operands[1]))
-      return \"MOV.W\t%1, %0 { INV.W\t%0 { INC.W\t%0 { AND.W\t%2, %0\";
+      return \"MOV%X1.W\t%1, %0 { INV.W\t%0 { INC.W\t%0 { AND.W\t%2, %0\";
     else
       return \"INV.W\t%0 { INC.W\t%0 { AND.W\t%2, %0\";
   "
diff --git a/gcc/config/nvptx/mkoffload.c b/gcc/config/nvptx/mkoffload.c
index 4fecb2b..a3c4099 100644
--- a/gcc/config/nvptx/mkoffload.c
+++ b/gcc/config/nvptx/mkoffload.c
@@ -399,7 +399,8 @@ compile_native (const char *infile, const char *outfile, const char *compiler,
   obstack_ptr_grow (&argv_obstack, NULL);
 
   const char **new_argv = XOBFINISH (&argv_obstack, const char **);
-  fork_execute (new_argv[0], CONST_CAST (char **, new_argv), true);
+  fork_execute (new_argv[0], CONST_CAST (char **, new_argv), true,
+		".gccnative_args");
   obstack_free (&argv_obstack, NULL);
 }
 
@@ -582,7 +583,8 @@ main (int argc, char **argv)
       unsetenv ("COMPILER_PATH");
       unsetenv ("LIBRARY_PATH");
 
-      fork_execute (new_argv[0], CONST_CAST (char **, new_argv), true);
+      fork_execute (new_argv[0], CONST_CAST (char **, new_argv), true,
+		    ".gcc_args");
       obstack_free (&argv_obstack, NULL);
 
       xputenv (concat ("GCC_EXEC_PREFIX=", execpath, NULL));
@@ -594,6 +596,7 @@ main (int argc, char **argv)
 	fatal_error (input_location, "cannot open intermediate ptx file");
 
       process (in, out);
+      fclose (in);
     }
 
   fclose (out);
diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c
index 0c590d8..1734947 100644
--- a/gcc/config/nvptx/nvptx.c
+++ b/gcc/config/nvptx/nvptx.c
@@ -368,6 +368,22 @@ nvptx_name_replacement (const char *name)
   return name;
 }
 
+/* Return NULL if NAME contains no dot.  Otherwise return a copy of NAME
+   with the dots replaced with dollar signs.  */
+
+static char *
+nvptx_replace_dot (const char *name)
+{
+  if (strchr (name, '.') == NULL)
+    return NULL;
+
+  char *p = xstrdup (name);
+  for (size_t i = 0; i < strlen (p); ++i)
+    if (p[i] == '.')
+      p[i] = '$';
+  return p;
+}
+
 /* If MODE should be treated as two registers of an inner mode, return
    that inner mode.  Otherwise return VOIDmode.  */
 
@@ -827,26 +843,12 @@ write_var_marker (FILE *file, bool is_defn, bool globalize, const char *name)
   fputs ("\n", file);
 }
 
-/* Write a .func or .kernel declaration or definition along with
-   a helper comment for use by ld.  S is the stream to write to, DECL
-   the decl for the function with name NAME.   For definitions, emit
-   a declaration too.  */
+/* Helper function for write_fn_proto.  */
 
-static const char *
-write_fn_proto (std::stringstream &s, bool is_defn,
-		const char *name, const_tree decl)
+static void
+write_fn_proto_1 (std::stringstream &s, bool is_defn,
+		  const char *name, const_tree decl)
 {
-  if (is_defn)
-    /* Emit a declaration. The PTX assembler gets upset without it.   */
-    name = write_fn_proto (s, false, name, decl);
-  else
-    {
-      /* Avoid repeating the name replacement.  */
-      name = nvptx_name_replacement (name);
-      if (name[0] == '*')
-	name++;
-    }
-
   write_fn_marker (s, is_defn, TREE_PUBLIC (decl), name);
 
   /* PTX declaration.  */
@@ -929,8 +931,38 @@ write_fn_proto (std::stringstream &s, bool is_defn,
     s << ")";
 
   s << (is_defn ? "\n" : ";\n");
+}
 
-  return name;
+/* Write a .func or .kernel declaration or definition along with
+   a helper comment for use by ld.  S is the stream to write to, DECL
+   the decl for the function with name NAME.  For definitions, emit
+   a declaration too.  */
+
+static void
+write_fn_proto (std::stringstream &s, bool is_defn,
+		const char *name, const_tree decl)
+{
+  const char *replacement = nvptx_name_replacement (name);
+  char *replaced_dots = NULL;
+  if (replacement != name)
+    name = replacement;
+  else
+    {
+      replaced_dots = nvptx_replace_dot (name);
+      if (replaced_dots)
+	name = replaced_dots;
+    }
+  if (name[0] == '*')
+    name++;
+
+  if (is_defn)
+    /* Emit a declaration.  The PTX assembler gets upset without it.  */
+    write_fn_proto_1 (s, false, name, decl);
+
+  write_fn_proto_1 (s, is_defn, name, decl);
+
+  if (replaced_dots)
+    XDELETE (replaced_dots);
 }
 
 /* Construct a function declaration from a call insn.  This can be
@@ -942,6 +974,8 @@ static void
 write_fn_proto_from_insn (std::stringstream &s, const char *name,
 			  rtx result, rtx pat)
 {
+  char *replaced_dots = NULL;
+
   if (!name)
     {
       s << "\t.callprototype ";
@@ -949,7 +983,15 @@ write_fn_proto_from_insn (std::stringstream &s, const char *name,
     }
   else
     {
-      name = nvptx_name_replacement (name);
+      const char *replacement = nvptx_name_replacement (name);
+      if (replacement != name)
+	name = replacement;
+      else
+	{
+	  replaced_dots = nvptx_replace_dot (name);
+	  if (replaced_dots)
+	    name = replaced_dots;
+	}
       write_fn_marker (s, false, true, name);
       s << "\t.extern .func ";
     }
@@ -958,6 +1000,8 @@ write_fn_proto_from_insn (std::stringstream &s, const char *name,
     write_return_mode (s, true, GET_MODE (result));
 
   s << name;
+  if (replaced_dots)
+    XDELETE (replaced_dots);
 
   int arg_end = XVECLEN (pat, 0);
   for (int i = 1; i < arg_end; i++)
@@ -2101,7 +2145,7 @@ nvptx_assemble_integer (rtx x, unsigned int size, int ARG_UNUSED (aligned_p))
       val = INTVAL (XEXP (x, 1));
       x = XEXP (x, 0);
       gcc_assert (GET_CODE (x) == SYMBOL_REF);
-      /* FALLTHROUGH */
+      gcc_fallthrough (); /* FALLTHROUGH */
 
     case SYMBOL_REF:
       gcc_assert (size == init_frag.size);
@@ -2349,6 +2393,7 @@ const char *
 nvptx_output_mov_insn (rtx dst, rtx src)
 {
   machine_mode dst_mode = GET_MODE (dst);
+  machine_mode src_mode = GET_MODE (src);
   machine_mode dst_inner = (GET_CODE (dst) == SUBREG
 			    ? GET_MODE (XEXP (dst, 0)) : dst_mode);
   machine_mode src_inner = (GET_CODE (src) == SUBREG
@@ -2375,7 +2420,7 @@ nvptx_output_mov_insn (rtx dst, rtx src)
   if (GET_MODE_SIZE (dst_inner) == GET_MODE_SIZE (src_inner))
     {
       if (GET_MODE_BITSIZE (dst_mode) == 128
-	  && GET_MODE_BITSIZE (GET_MODE (src)) == 128)
+	  && GET_MODE_BITSIZE (src_mode) == 128)
 	{
 	  /* mov.b128 is not supported.  */
 	  if (dst_inner == V2DImode && src_inner == TImode)
@@ -2388,6 +2433,10 @@ nvptx_output_mov_insn (rtx dst, rtx src)
       return "%.\tmov.b%T0\t%0, %1;";
     }
 
+  if (GET_MODE_BITSIZE (src_inner) == 128
+      && GET_MODE_BITSIZE (src_mode) == 64)
+    return "%.\tmov.b%T0\t%0, %1;";
+
   return "%.\tcvt%t0%t1\t%0, %1;";
 }
 
@@ -2458,9 +2507,20 @@ nvptx_output_call_insn (rtx_insn *insn, rtx result, rtx callee)
   
   if (decl)
     {
+      char *replaced_dots = NULL;
       const char *name = get_fnname_from_decl (decl);
-      name = nvptx_name_replacement (name);
+      const char *replacement = nvptx_name_replacement (name);
+      if (replacement != name)
+	name = replacement;
+      else
+	{
+	  replaced_dots = nvptx_replace_dot (name);
+	  if (replaced_dots)
+	    name = replaced_dots;
+	}
       assemble_name (asm_out_file, name);
+      if (replaced_dots)
+	XDELETE (replaced_dots);
     }
   else
     output_address (VOIDmode, callee);
@@ -2598,7 +2658,7 @@ nvptx_print_operand (FILE *file, rtx x, int code)
     {
     case 'A':
       x = XEXP (x, 0);
-      /* FALLTHROUGH.  */
+      gcc_fallthrough (); /* FALLTHROUGH. */
 
     case 'D':
       if (GET_CODE (x) == CONST)
@@ -6531,6 +6591,23 @@ nvptx_set_current_function (tree fndecl)
   oacc_bcast_partition = 0;
 }
 
+/* Implement TARGET_LIBC_HAS_FUNCTION.  */
+
+bool
+nvptx_libc_has_function (enum function_class fn_class, tree type)
+{
+  if (fn_class == function_sincos)
+    {
+      if (type != NULL_TREE)
+	/* Currently, newlib does not support sincosl.  */
+	return type == float_type_node || type == double_type_node;
+      else
+	return true;
+    }
+
+  return default_libc_has_function (fn_class, type);
+}
+
 #undef TARGET_OPTION_OVERRIDE
 #define TARGET_OPTION_OVERRIDE nvptx_option_override
 
@@ -6676,6 +6753,9 @@ nvptx_set_current_function (tree fndecl)
 #undef TARGET_SET_CURRENT_FUNCTION
 #define TARGET_SET_CURRENT_FUNCTION nvptx_set_current_function
 
+#undef TARGET_LIBC_HAS_FUNCTION
+#define TARGET_LIBC_HAS_FUNCTION nvptx_libc_has_function
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-nvptx.h"
diff --git a/gcc/config/nvptx/nvptx.h b/gcc/config/nvptx/nvptx.h
index 6ebcc76..17fe157 100644
--- a/gcc/config/nvptx/nvptx.h
+++ b/gcc/config/nvptx/nvptx.h
@@ -29,7 +29,10 @@
 
 #define STARTFILE_SPEC "%{mmainkernel:crt0.o}"
 
-#define ASM_SPEC "%{misa=*:-m %*}"
+/* Default needs to be in sync with default for misa in nvptx.opt.
+   We add a default here to work around a hard-coded sm_30 default in
+   nvptx-as.  */
+#define ASM_SPEC "%{misa=*:-m %*; :-m sm_35}"
 
 #define TARGET_CPU_CPP_BUILTINS()		\
   do						\
diff --git a/gcc/config/nvptx/nvptx.md b/gcc/config/nvptx/nvptx.md
index 6178e6a..ccbcd09 100644
--- a/gcc/config/nvptx/nvptx.md
+++ b/gcc/config/nvptx/nvptx.md
@@ -146,6 +146,13 @@
   return true;
 })
 
+;; Test for a function symbol ref operand
+(define_predicate "symbol_ref_function_operand"
+  (match_code "symbol_ref")
+{
+  return SYMBOL_REF_FUNCTION_P (op);
+})
+
 (define_attr "predicable" "false,true"
   (const_string "true"))
 
@@ -241,6 +248,17 @@
 }
   [(set_attr "subregs_ok" "true")])
 
+;; ptxas segfaults on 'mov.u64 %r24,bar+4096', so break it up.
+(define_split
+  [(set (match_operand:DI 0 "nvptx_register_operand")
+	(const:DI (plus:DI (match_operand:DI 1 "symbol_ref_function_operand")
+			   (match_operand 2 "const_int_operand"))))]
+  ""
+  [(set (match_dup 0) (match_dup 1))
+   (set (match_dup 0) (plus:DI (match_dup 0) (match_dup 2)))
+  ]
+  "")
+
 (define_insn "*mov<mode>_insn"
   [(set (match_operand:SDFM 0 "nonimmediate_operand" "=R,R,m")
 	(match_operand:SDFM 1 "general_operand" "RF,m,R"))]
@@ -365,9 +383,13 @@
   [(set (match_operand:QHIM 0 "nvptx_nonimmediate_operand" "=R,m")
 	(truncate:QHIM (match_operand:SI 1 "nvptx_register_operand" "R,R")))]
   ""
-  "@
-   %.\\tcvt%t0.u32\\t%0, %1;
-   %.\\tst%A0.u%T0\\t%0, %1;"
+  {
+    if (which_alternative == 1)
+      return "%.\\tst%A0.u%T0\\t%0, %1;";
+    if (GET_MODE (operands[0]) == QImode)
+      return "%.\\tmov%t0\\t%0, %1;";
+    return "%.\\tcvt%t0.u32\\t%0, %1;";
+  }
   [(set_attr "subregs_ok" "true")])
 
 (define_insn "truncdi<mode>2"
diff --git a/gcc/config/nvptx/nvptx.opt b/gcc/config/nvptx/nvptx.opt
index 75c3d54..045e354 100644
--- a/gcc/config/nvptx/nvptx.opt
+++ b/gcc/config/nvptx/nvptx.opt
@@ -17,9 +17,11 @@
 ; along with GCC; see the file COPYING3.  If not see
 ; <http://www.gnu.org/licenses/>.
 
-m32
-Target Report RejectNegative InverseMask(ABI64)
-Generate code for a 32-bit ABI.
+; It's not clear whether this was ever build/tested/used, so this is no longer
+; exposed to the user.
+;m32
+;Target Report RejectNegative InverseMask(ABI64)
+;Generate code for a 32-bit ABI.
 
 m64
 Target Report RejectNegative Mask(ABI64)
@@ -37,7 +39,7 @@ msoft-stack
 Target Report Mask(SOFT_STACK)
 Use custom stacks instead of local memory for automatic storage.
 
-msoft-stack-reserve-local
+msoft-stack-reserve-local=
 Target Report Joined RejectNegative UInteger Var(nvptx_softstack_size) Init(128)
 Specify size of .local memory used for stack when the exact amount is not known.
 
@@ -59,6 +61,7 @@ Enum(ptx_isa) String(sm_30) Value(PTX_ISA_SM30)
 EnumValue
 Enum(ptx_isa) String(sm_35) Value(PTX_ISA_SM35)
 
+; Default needs to be in sync with default in ASM_SPEC in nvptx.h.
 misa=
-Target RejectNegative ToLower Joined Enum(ptx_isa) Var(ptx_isa_option) Init(PTX_ISA_SM30)
+Target RejectNegative ToLower Joined Enum(ptx_isa) Var(ptx_isa_option) Init(PTX_ISA_SM35)
 Specify the version of the ptx ISA to use.
diff --git a/gcc/config/pa/pa-hpux11.h b/gcc/config/pa/pa-hpux11.h
index 794bf8e..2820720 100644
--- a/gcc/config/pa/pa-hpux11.h
+++ b/gcc/config/pa/pa-hpux11.h
@@ -154,11 +154,6 @@ along with GCC; see the file COPYING3.  If not see
        %{!mt:%{!pthread:-a shared -lc -a archive}}}}\
    %{shared:%{mt|pthread:-lpthread}}"
 
-/* The libgcc_stub.a library needs to come last.  */
-#undef LINK_GCC_C_SEQUENCE_SPEC
-#define LINK_GCC_C_SEQUENCE_SPEC \
-  "%G %{!nolibc:%L} %G %{!nostdlib:%{!nodefaultlibs:%{!shared:-lgcc_stub}}}"
-
 #undef STARTFILE_SPEC
 #define STARTFILE_SPEC \
   "%{!shared:%{pg:gcrt0%O%s}%{!pg:%{p:mcrt0%O%s}%{!p:crt0%O%s}} \
diff --git a/gcc/config/pa/pa32-linux.h b/gcc/config/pa/pa32-linux.h
index f271bbf..970722a 100644
--- a/gcc/config/pa/pa32-linux.h
+++ b/gcc/config/pa/pa32-linux.h
@@ -57,6 +57,11 @@ call_ ## FUNC (void)					\
 }
 #endif
 
+/* We need to link against libgcc.a for __canonicalize_funcptr_for_compare
+   and $$dyncall.  */
+#undef  ENDFILE_SPEC
+#define ENDFILE_SPEC GNU_USER_TARGET_ENDFILE_SPEC "libgcc.a%s"
+
 #undef  WCHAR_TYPE
 #define WCHAR_TYPE "long int"
 
diff --git a/gcc/config/pa/pa64-hpux.h b/gcc/config/pa/pa64-hpux.h
index c7d127f7..096aa4b 100644
--- a/gcc/config/pa/pa64-hpux.h
+++ b/gcc/config/pa/pa64-hpux.h
@@ -103,12 +103,6 @@ along with GCC; see the file COPYING3.  If not see
    %{shared:%{mt|pthread:-lpthread}}"
 #endif
 
-/* The libgcc_stub.a and milli.a libraries need to come last.  */
-#undef LINK_GCC_C_SEQUENCE_SPEC
-#define LINK_GCC_C_SEQUENCE_SPEC "\
-  %G %{!nolibc:%L} %G %{!nostdlib:%{!nodefaultlibs:%{!shared:-lgcc_stub}\
-  milli.a%s}}"
-
 /* Under hpux11, the normal location of the `ld' and `as' programs is the
    /usr/ccs/bin directory.  */
 
@@ -335,8 +329,12 @@ do {								\
    %{static:crtbeginT%O%s} %{!static:%{!shared:crtbegin%O%s} \
    %{shared:crtbeginS%O%s}}"
 #endif
+
+/* The libgcc_stub.a and milli.a libraries must come last.  We need
+   to link with these libraries whenever start files are needed.  */
 #undef ENDFILE_SPEC
-#define ENDFILE_SPEC "%{!shared:crtend%O%s} %{shared:crtendS%O%s}"
+#define ENDFILE_SPEC \
+  "%{!shared:crtend%O%s libgcc_stub.a%s} %{shared:crtendS%O%s} milli.a%s"
 
 /* Since HP uses the .init and .fini sections for array initializers
    and finalizers, we need different defines for INIT_SECTION_ASM_OP
diff --git a/gcc/config/riscv/multilib-generator b/gcc/config/riscv/multilib-generator
index 8f4df18..57ee7c3 100755
--- a/gcc/config/riscv/multilib-generator
+++ b/gcc/config/riscv/multilib-generator
@@ -22,14 +22,26 @@
 
 # Each argument to this script is of the form
 #  <primary arch>-<abi>-<additional arches>-<extensions>
-# For example,
+# Example 1:
 #  rv32imafd-ilp32d-rv32g-c,v
 # means that, in addition to rv32imafd, these configurations can also use the
 # rv32imafd-ilp32d libraries: rv32imafdc, rv32imafdv, rv32g, rv32gc, rv32gv
+#
+# Example 2:
+#  rv32imafd-ilp32d--c*b
+# means that, in addition to rv32imafd, these configurations can also use the
+# rv32imafd-ilp32d libraries: rv32imafdc-ilp32d, rv32imafdb-ilp32d,
+#                             rv32imafdcb-ilp32d
 
 from __future__ import print_function
 import sys
 import collections
+import itertools
+from functools import reduce
+
+#
+# TODO: Add test for this script.
+#
 
 arches = collections.OrderedDict()
 abis = collections.OrderedDict()
@@ -37,37 +49,53 @@ required = []
 reuse = []
 
 canonical_order = "mafdgqlcbjtpvn"
+LONG_EXT_PREFIXES = ['z', 's', 'h', 'x']
+
+#
+# IMPLIED_EXT(ext) -> implied extension list.
+#
+IMPLIED_EXT = {
+  "d" : ["f"],
+}
 
 def arch_canonicalize(arch):
-  # TODO: Support implied extensions, e.g. D implied F in latest spec.
   # TODO: Support extension version.
   new_arch = ""
   if arch[:5] in ['rv32e', 'rv32i', 'rv32g', 'rv64i', 'rv64g']:
-    # TODO: We should expand g to imadzifencei once we support newer spec.
+    # TODO: We should expand g to imad_zifencei once we support newer spec.
     new_arch = arch[:5].replace("g", "imafd")
   else:
     raise Exception("Unexpected arch: `%s`" % arch[:5])
 
   # Find any Z, S, H or X
-  long_ext_prefixes = ['z', 's', 'h', 'x']
-  long_ext_prefixes_idx = map(lambda x: arch.find(x), long_ext_prefixes)
+  long_ext_prefixes_idx = map(lambda x: arch.find(x), LONG_EXT_PREFIXES)
 
   # Filter out any non-existent index.
   long_ext_prefixes_idx = list(filter(lambda x: x != -1, long_ext_prefixes_idx))
   if long_ext_prefixes_idx:
     first_long_ext_idx = min(long_ext_prefixes_idx)
     long_exts = arch[first_long_ext_idx:].split("_")
-    std_exts = arch[5:first_long_ext_idx]
+    std_exts = list(arch[5:first_long_ext_idx])
   else:
     long_exts = []
-    std_exts = arch[5:]
+    std_exts = list(arch[5:])
+
+  #
+  # Handle implied extensions.
+  #
+  for ext in std_exts + long_exts:
+    if ext in IMPLIED_EXT:
+      implied_exts = IMPLIED_EXT[ext]
+      for implied_ext in implied_exts:
+        if implied_ext not in std_exts + long_exts:
+          long_exts.append(implied_ext)
 
   # Single letter extension might appear in the long_exts list,
   # becasue we just append extensions list to the arch string.
-  std_exts += "".join(filter(lambda x:len(x) == 1, long_exts))
+  std_exts += list(filter(lambda x:len(x) == 1, long_exts))
 
   # Multi-letter extension must be in lexicographic order.
-  long_exts = sorted(filter(lambda x:len(x) != 1, long_exts))
+  long_exts = list(sorted(filter(lambda x:len(x) != 1, long_exts)))
 
   # Put extensions in canonical order.
   for ext in canonical_order:
@@ -86,15 +114,98 @@ def arch_canonicalize(arch):
     new_arch += "_" + "_".join(long_exts)
   return new_arch
 
+#
+# add underline for each multi-char extensions.
+# e.g. ["a", "zfh"] -> ["a", "_zfh"]
+#
+def add_underline_prefix(ext):
+  for long_ext_prefix in LONG_EXT_PREFIXES:
+    if ext.startswith(long_ext_prefix):
+      return "_" + ext
+
+  return ext
+
+#
+# Handle expansion operation.
+#
+# e.g. "a*b" -> [("a",), ("b",), ("a", "b")]
+#      "a"   -> [("a",)]
+#
+def _expand_combination(ext):
+  exts = list(ext.split("*"))
+
+  # No need to expand if there is no `*`.
+  if len(exts) == 1:
+    return [(exts[0],)]
+
+  # Add underline to every extension.
+  # e.g.
+  #  _b * zvamo => _b * _zvamo
+  exts = list(map(lambda x: '_' + x, exts))
+
+  # Generate combination!
+  ext_combs = []
+  for comb_len in range(1, len(exts)+1):
+    for ext_comb in itertools.combinations(exts, comb_len):
+      ext_combs.append(ext_comb)
+
+  return ext_combs
+
+#
+# Input a list and drop duplicated entry.
+# e.g.
+#   ["a", "b", "ab", "a"] -> ["a", "b", "ab"]
+#
+def unique(x):
+  #
+  # Drop duplicated entry.
+  # Convert list to set and then convert back to list.
+  #
+  # Add sorted to prevent non-deterministic results in different env.
+  #
+  return list(sorted(list(set(x))))
+
+#
+# Expand EXT string if there is any expansion operator (*).
+# e.g.
+#   "a*b,c" -> ["a", "b", "ab", "c"]
+#
+def expand_combination(ext):
+  ext = list(filter(None, ext.split(',')))
+
+  # Expand combination for EXT, got lots of list.
+  # e.g.
+  #   a * b => [[("a",), ("b",)], [("a", "b")]]
+  ext_combs = list(map(_expand_combination, ext))
+
+  # Then fold to single list.
+  # e.g.
+  #   [[("a",), ("b",)], [("a", "b")]] => [("a",), ("b",), ("a", "b")]
+  ext = list(reduce(lambda x, y: x + y, ext_combs, []))
+
+  # Fold the tuple to string.
+  # e.g.
+  #   [("a",), ("b",), ("a", "b")] => ["a", "b", "ab"]
+  ext = map(lambda e : reduce(lambda x, y: x + y, e), ext)
+
+  # Drop duplicated entry.
+  ext = unique(ext)
+
+  return ext
+
 for cfg in sys.argv[1:]:
   (arch, abi, extra, ext) = cfg.split('-')
   arch = arch_canonicalize (arch)
   arches[arch] = 1
   abis[abi] = 1
   extra = list(filter(None, extra.split(',')))
-  ext = list(filter(None, ext.split(',')))
-  alts = sum([[x] + [x + "_" + y for y in ext] for x in [arch] + extra], [])
+  ext_combs = expand_combination(ext)
+  alts = sum([[x] + [x + y for y in ext_combs] for x in [arch] + extra], [])
   alts = list(map(arch_canonicalize, alts))
+
+  # Drop duplicated entry.
+  alts = unique(alts)
+
   for alt in alts[1:]:
     arches[alt] = 1
     reuse.append('march.%s/mabi.%s=march.%s/mabi.%s' % (arch, abi, alt, abi))
diff --git a/gcc/config/riscv/riscv-c.c b/gcc/config/riscv/riscv-c.c
index 735f2f2..c600badb 100644
--- a/gcc/config/riscv/riscv-c.c
+++ b/gcc/config/riscv/riscv-c.c
@@ -90,12 +90,15 @@ riscv_cpu_cpp_builtins (cpp_reader *pfile)
       builtin_define ("__riscv_cmodel_medlow");
       break;
 
+    case CM_PIC:
+      /* __riscv_cmodel_pic is deprecated, and will removed in next GCC release.
+	 see https://github.com/riscv/riscv-c-api-doc/pull/11  */
+      builtin_define ("__riscv_cmodel_pic");
+      /* FALLTHROUGH. */
+
     case CM_MEDANY:
       builtin_define ("__riscv_cmodel_medany");
       break;
 
-    case CM_PIC:
-      builtin_define ("__riscv_cmodel_pic");
-      break;
     }
 }
diff --git a/gcc/config/riscv/riscv-cores.def b/gcc/config/riscv/riscv-cores.def
new file mode 100644
index 0000000..6a13f3e
--- /dev/null
+++ b/gcc/config/riscv/riscv-cores.def
@@ -0,0 +1,49 @@
+/* List of supported core and tune info for RISC-V.
+   Copyright (C) 2020 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* This is a list of cores that implement RISC-V.
+
+   Before using #include to read this file, define a macro:
+
+      RISCV_CORE(CORE_NAME, ARCH, MICRO_ARCH, TUNE_INFO)
+
+   The CORE_NAME is the name of the core, represented as a string.
+   The ARCH is the default arch of the core, represented as a string,
+   can be NULL if no default arch.
+   The MICRO_ARCH is the name of the core for which scheduling decisions
+   will be made, represented as an identifier.
+   The TUNE_INFO is the detail cost model for this core, represented as an
+   identifier, reference to riscv-tunes.def.  */
+
+RISCV_CORE("sifive-e20",      "rv32imc",    "rocket")
+RISCV_CORE("sifive-e21",      "rv32imac",   "rocket")
+RISCV_CORE("sifive-e24",      "rv32imafc",  "rocket")
+RISCV_CORE("sifive-e31",      "rv32imac",   "sifive-3-series")
+RISCV_CORE("sifive-e34",      "rv32imafc",  "sifive-3-series")
+RISCV_CORE("sifive-e76",      "rv32imafc",  "sifive-7-series")
+
+RISCV_CORE("sifive-s21",      "rv64imac",   "rocket")
+RISCV_CORE("sifive-s51",      "rv64imac",   "sifive-5-series")
+RISCV_CORE("sifive-s54",      "rv64imafdc", "sifive-5-series")
+RISCV_CORE("sifive-s76",      "rv64imafdc", "sifive-7-series")
+
+RISCV_CORE("sifive-u54",      "rv64imafdc", "sifive-5-series")
+RISCV_CORE("sifive-u74",      "rv64imafdc", "sifive-7-series")
+
+#undef RISCV_CORE
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 358224a..256dab1 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -94,4 +94,18 @@ extern bool riscv_hard_regno_rename_ok (unsigned, unsigned);
 
 rtl_opt_pass * make_pass_shorten_memrefs (gcc::context *ctxt);
 
+/* Information about one CPU we know about.  */
+struct riscv_cpu_info {
+  /* This CPU's canonical name.  */
+  const char *name;
+
+  /* Default arch for this CPU, could be NULL if no default arch.  */
+  const char *arch;
+
+  /* Which automaton to use for tuning.  */
+  const char *tune;
+};
+
+extern const riscv_cpu_info *riscv_find_cpu (const char *);
+
 #endif /* ! GCC_RISCV_PROTOS_H */
diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c
index 63b0c38..989a9f1 100644
--- a/gcc/config/riscv/riscv.c
+++ b/gcc/config/riscv/riscv.c
@@ -209,7 +209,7 @@ struct riscv_integer_op {
 
 /* Costs of various operations on the different architectures.  */
 
-struct riscv_tune_info
+struct riscv_tune_param
 {
   unsigned short fp_add[2];
   unsigned short fp_mul[2];
@@ -222,16 +222,16 @@ struct riscv_tune_info
   bool slow_unaligned_access;
 };
 
-/* Information about one CPU we know about.  */
-struct riscv_cpu_info {
-  /* This CPU's canonical name.  */
+/* Information about one micro-arch we know about.  */
+struct riscv_tune_info {
+  /* This micro-arch canonical name.  */
   const char *name;
 
   /* Which automaton to use for tuning.  */
   enum riscv_microarchitecture_type microarchitecture;
 
-  /* Tuning parameters for this CPU.  */
-  const struct riscv_tune_info *tune_info;
+  /* Tuning parameters for this micro-arch.  */
+  const struct riscv_tune_param *tune_param;
 };
 
 /* Global variables for machine-dependent things.  */
@@ -248,7 +248,7 @@ unsigned riscv_stack_boundary;
 static int epilogue_cfa_sp_offset;
 
 /* Which tuning parameters to use.  */
-static const struct riscv_tune_info *tune_info;
+static const struct riscv_tune_param *tune_param;
 
 /* Which automaton to use for tuning.  */
 enum riscv_microarchitecture_type riscv_microarchitecture;
@@ -275,7 +275,7 @@ const enum reg_class riscv_regno_to_class[FIRST_PSEUDO_REGISTER] = {
 };
 
 /* Costs to use when optimizing for rocket.  */
-static const struct riscv_tune_info rocket_tune_info = {
+static const struct riscv_tune_param rocket_tune_info = {
   {COSTS_N_INSNS (4), COSTS_N_INSNS (5)},	/* fp_add */
   {COSTS_N_INSNS (4), COSTS_N_INSNS (5)},	/* fp_mul */
   {COSTS_N_INSNS (20), COSTS_N_INSNS (20)},	/* fp_div */
@@ -288,7 +288,7 @@ static const struct riscv_tune_info rocket_tune_info = {
 };
 
 /* Costs to use when optimizing for Sifive 7 Series.  */
-static const struct riscv_tune_info sifive_7_tune_info = {
+static const struct riscv_tune_param sifive_7_tune_info = {
   {COSTS_N_INSNS (4), COSTS_N_INSNS (5)},	/* fp_add */
   {COSTS_N_INSNS (4), COSTS_N_INSNS (5)},	/* fp_mul */
   {COSTS_N_INSNS (20), COSTS_N_INSNS (20)},	/* fp_div */
@@ -301,7 +301,7 @@ static const struct riscv_tune_info sifive_7_tune_info = {
 };
 
 /* Costs to use when optimizing for size.  */
-static const struct riscv_tune_info optimize_size_tune_info = {
+static const struct riscv_tune_param optimize_size_tune_info = {
   {COSTS_N_INSNS (1), COSTS_N_INSNS (1)},	/* fp_add */
   {COSTS_N_INSNS (1), COSTS_N_INSNS (1)},	/* fp_mul */
   {COSTS_N_INSNS (1), COSTS_N_INSNS (1)},	/* fp_div */
@@ -343,7 +343,7 @@ static const unsigned gpr_save_reg_order[] = {
 };
 
 /* A table describing all the processors GCC knows about.  */
-static const struct riscv_cpu_info riscv_cpu_info_table[] = {
+static const struct riscv_tune_info riscv_tune_info_table[] = {
   { "rocket", generic, &rocket_tune_info },
   { "sifive-3-series", generic, &rocket_tune_info },
   { "sifive-5-series", generic, &rocket_tune_info },
@@ -351,17 +351,22 @@ static const struct riscv_cpu_info riscv_cpu_info_table[] = {
   { "size", generic, &optimize_size_tune_info },
 };
 
-/* Return the riscv_cpu_info entry for the given name string.  */
+/* Return the riscv_tune_info entry for the given name string.  */
 
-static const struct riscv_cpu_info *
-riscv_parse_cpu (const char *cpu_string)
+static const struct riscv_tune_info *
+riscv_parse_tune (const char *tune_string)
 {
-  for (unsigned i = 0; i < ARRAY_SIZE (riscv_cpu_info_table); i++)
-    if (strcmp (riscv_cpu_info_table[i].name, cpu_string) == 0)
-      return riscv_cpu_info_table + i;
+  const riscv_cpu_info *cpu = riscv_find_cpu (tune_string);
 
-  error ("unknown cpu %qs for %<-mtune%>", cpu_string);
-  return riscv_cpu_info_table;
+  if (cpu)
+    tune_string = cpu->tune;
+
+  for (unsigned i = 0; i < ARRAY_SIZE (riscv_tune_info_table); i++)
+    if (strcmp (riscv_tune_info_table[i].name, tune_string) == 0)
+      return riscv_tune_info_table + i;
+
+  error ("unknown cpu %qs for %<-mtune%>", tune_string);
+  return riscv_tune_info_table;
 }
 
 /* Helper function for riscv_build_integer; arguments are as for
@@ -1703,7 +1708,7 @@ riscv_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno ATTRIBUTE_UN
 	 instructions it needs.  */
       if ((cost = riscv_address_insns (XEXP (x, 0), mode, true)) > 0)
 	{
-	  *total = COSTS_N_INSNS (cost + tune_info->memory_cost);
+	  *total = COSTS_N_INSNS (cost + tune_param->memory_cost);
 	  return true;
 	}
       /* Otherwise use the default handling.  */
@@ -1770,7 +1775,7 @@ riscv_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno ATTRIBUTE_UN
 	 mode instead.  */
       mode = GET_MODE (XEXP (x, 0));
       if (float_mode_p)
-	*total = tune_info->fp_add[mode == DFmode];
+	*total = tune_param->fp_add[mode == DFmode];
       else
 	*total = riscv_binary_cost (x, 1, 3);
       return false;
@@ -1779,19 +1784,19 @@ riscv_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno ATTRIBUTE_UN
     case ORDERED:
       /* (FEQ(A, A) & FEQ(B, B)) compared against 0.  */
       mode = GET_MODE (XEXP (x, 0));
-      *total = tune_info->fp_add[mode == DFmode] + COSTS_N_INSNS (2);
+      *total = tune_param->fp_add[mode == DFmode] + COSTS_N_INSNS (2);
       return false;
 
     case UNEQ:
       /* (FEQ(A, A) & FEQ(B, B)) compared against FEQ(A, B).  */
       mode = GET_MODE (XEXP (x, 0));
-      *total = tune_info->fp_add[mode == DFmode] + COSTS_N_INSNS (3);
+      *total = tune_param->fp_add[mode == DFmode] + COSTS_N_INSNS (3);
       return false;
 
     case LTGT:
       /* (FLT(A, A) || FGT(B, B)).  */
       mode = GET_MODE (XEXP (x, 0));
-      *total = tune_info->fp_add[mode == DFmode] + COSTS_N_INSNS (2);
+      *total = tune_param->fp_add[mode == DFmode] + COSTS_N_INSNS (2);
       return false;
 
     case UNGE:
@@ -1800,13 +1805,13 @@ riscv_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno ATTRIBUTE_UN
     case UNLT:
       /* FLT or FLE, but guarded by an FFLAGS read and write.  */
       mode = GET_MODE (XEXP (x, 0));
-      *total = tune_info->fp_add[mode == DFmode] + COSTS_N_INSNS (4);
+      *total = tune_param->fp_add[mode == DFmode] + COSTS_N_INSNS (4);
       return false;
 
     case MINUS:
     case PLUS:
       if (float_mode_p)
-	*total = tune_info->fp_add[mode == DFmode];
+	*total = tune_param->fp_add[mode == DFmode];
       else
 	*total = riscv_binary_cost (x, 1, 4);
       return false;
@@ -1816,7 +1821,7 @@ riscv_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno ATTRIBUTE_UN
 	rtx op = XEXP (x, 0);
 	if (GET_CODE (op) == FMA && !HONOR_SIGNED_ZEROS (mode))
 	  {
-	    *total = (tune_info->fp_mul[mode == DFmode]
+	    *total = (tune_param->fp_mul[mode == DFmode]
 		      + set_src_cost (XEXP (op, 0), mode, speed)
 		      + set_src_cost (XEXP (op, 1), mode, speed)
 		      + set_src_cost (XEXP (op, 2), mode, speed));
@@ -1825,23 +1830,23 @@ riscv_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno ATTRIBUTE_UN
       }
 
       if (float_mode_p)
-	*total = tune_info->fp_add[mode == DFmode];
+	*total = tune_param->fp_add[mode == DFmode];
       else
 	*total = COSTS_N_INSNS (GET_MODE_SIZE (mode) > UNITS_PER_WORD ? 4 : 1);
       return false;
 
     case MULT:
       if (float_mode_p)
-	*total = tune_info->fp_mul[mode == DFmode];
+	*total = tune_param->fp_mul[mode == DFmode];
       else if (!TARGET_MUL)
 	/* Estimate the cost of a library call.  */
 	*total = COSTS_N_INSNS (speed ? 32 : 6);
       else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
-	*total = 3 * tune_info->int_mul[0] + COSTS_N_INSNS (2);
+	*total = 3 * tune_param->int_mul[0] + COSTS_N_INSNS (2);
       else if (!speed)
 	*total = COSTS_N_INSNS (1);
       else
-	*total = tune_info->int_mul[mode == DImode];
+	*total = tune_param->int_mul[mode == DImode];
       return false;
 
     case DIV:
@@ -1849,7 +1854,7 @@ riscv_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno ATTRIBUTE_UN
     case MOD:
       if (float_mode_p)
 	{
-	  *total = tune_info->fp_div[mode == DFmode];
+	  *total = tune_param->fp_div[mode == DFmode];
 	  return false;
 	}
       /* Fall through.  */
@@ -1860,7 +1865,7 @@ riscv_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno ATTRIBUTE_UN
 	/* Estimate the cost of a library call.  */
 	*total = COSTS_N_INSNS (speed ? 32 : 6);
       else if (speed)
-	*total = tune_info->int_div[mode == DImode];
+	*total = tune_param->int_div[mode == DImode];
       else
 	*total = COSTS_N_INSNS (1);
       return false;
@@ -1882,11 +1887,11 @@ riscv_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno ATTRIBUTE_UN
     case FIX:
     case FLOAT_EXTEND:
     case FLOAT_TRUNCATE:
-      *total = tune_info->fp_add[mode == DFmode];
+      *total = tune_param->fp_add[mode == DFmode];
       return false;
 
     case FMA:
-      *total = (tune_info->fp_mul[mode == DFmode]
+      *total = (tune_param->fp_mul[mode == DFmode]
 		+ set_src_cost (XEXP (x, 0), mode, speed)
 		+ set_src_cost (XEXP (x, 1), mode, speed)
 		+ set_src_cost (XEXP (x, 2), mode, speed));
@@ -4546,7 +4551,7 @@ riscv_class_max_nregs (reg_class_t rclass, machine_mode mode)
 static int
 riscv_memory_move_cost (machine_mode mode, reg_class_t rclass, bool in)
 {
-  return (tune_info->memory_cost
+  return (tune_param->memory_cost
 	  + memory_move_secondary_cost (mode, rclass, in));
 }
 
@@ -4555,7 +4560,7 @@ riscv_memory_move_cost (machine_mode mode, reg_class_t rclass, bool in)
 static int
 riscv_issue_rate (void)
 {
-  return tune_info->issue_rate;
+  return tune_param->issue_rate;
 }
 
 /* Auxiliary function to emit RISC-V ELF attribute. */
@@ -4683,7 +4688,7 @@ riscv_init_machine_status (void)
 static void
 riscv_option_override (void)
 {
-  const struct riscv_cpu_info *cpu;
+  const struct riscv_tune_info *cpu;
 
 #ifdef SUBTARGET_OVERRIDE_OPTIONS
   SUBTARGET_OVERRIDE_OPTIONS;
@@ -4705,26 +4710,28 @@ riscv_option_override (void)
   if (TARGET_HARD_FLOAT && (target_flags_explicit & MASK_FDIV) == 0)
     target_flags |= MASK_FDIV;
 
-  /* Handle -mtune.  */
-  cpu = riscv_parse_cpu (riscv_tune_string ? riscv_tune_string :
-			 RISCV_TUNE_STRING_DEFAULT);
+  /* Handle -mtune, use -mcpu if -mtune is not given, and use default -mtune
+     if -mtune and -mcpu both not not given.  */
+  cpu = riscv_parse_tune (riscv_tune_string ? riscv_tune_string :
+			  (riscv_cpu_string ? riscv_cpu_string :
+			   RISCV_TUNE_STRING_DEFAULT));
   riscv_microarchitecture = cpu->microarchitecture;
-  tune_info = optimize_size ? &optimize_size_tune_info : cpu->tune_info;
+  tune_param = optimize_size ? &optimize_size_tune_info : cpu->tune_param;
 
   /* Use -mtune's setting for slow_unaligned_access, even when optimizing
      for size.  For architectures that trap and emulate unaligned accesses,
      the performance cost is too great, even for -Os.  Similarly, if
      -m[no-]strict-align is left unspecified, heed -mtune's advice.  */
-  riscv_slow_unaligned_access_p = (cpu->tune_info->slow_unaligned_access
+  riscv_slow_unaligned_access_p = (cpu->tune_param->slow_unaligned_access
 				   || TARGET_STRICT_ALIGN);
   if ((target_flags_explicit & MASK_STRICT_ALIGN) == 0
-      && cpu->tune_info->slow_unaligned_access)
+      && cpu->tune_param->slow_unaligned_access)
     target_flags |= MASK_STRICT_ALIGN;
 
   /* If the user hasn't specified a branch cost, use the processor's
      default.  */
   if (riscv_branch_cost == 0)
-    riscv_branch_cost = tune_info->branch_cost;
+    riscv_branch_cost = tune_param->branch_cost;
 
   /* Function to allocate machine-dependent function status.  */
   init_machine_status = &riscv_init_machine_status;
diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h
index b7b4a1c..172c7ca 100644
--- a/gcc/config/riscv/riscv.h
+++ b/gcc/config/riscv/riscv.h
@@ -41,17 +41,27 @@ along with GCC; see the file COPYING3.  If not see
 #endif
 
 extern const char *riscv_expand_arch (int argc, const char **argv);
+extern const char *riscv_expand_arch_from_cpu (int argc, const char **argv);
+extern const char *riscv_default_mtune (int argc, const char **argv);
 
 # define EXTRA_SPEC_FUNCTIONS						\
-  { "riscv_expand_arch", riscv_expand_arch },
+  { "riscv_expand_arch", riscv_expand_arch },				\
+  { "riscv_expand_arch_from_cpu", riscv_expand_arch_from_cpu },		\
+  { "riscv_default_mtune", riscv_default_mtune },
 
 /* Support for a compile-time default CPU, et cetera.  The rules are:
-   --with-arch is ignored if -march is specified.
+   --with-arch is ignored if -march or -mcpu is specified.
    --with-abi is ignored if -mabi is specified.
-   --with-tune is ignored if -mtune is specified.  */
+   --with-tune is ignored if -mtune or -mcpu is specified.
+
+   But using default -march/-mtune value if -mcpu don't have valid option.  */
 #define OPTION_DEFAULT_SPECS \
-  {"tune", "%{!mtune=*:-mtune=%(VALUE)}" }, \
-  {"arch", "%{!march=*:-march=%(VALUE)}" }, \
+  {"tune", "%{!mtune=*:"						\
+	   "  %{!mcpu=*:-mtune=%(VALUE)}"				\
+	   "  %{mcpu=*:-mtune=%:riscv_default_mtune(%* %(VALUE))}}" },	\
+  {"arch", "%{!march=*:"						\
+	   "  %{!mcpu=*:-march=%(VALUE)}"				\
+	   "  %{mcpu=*:%:riscv_expand_arch_from_cpu(%* %(VALUE))}}" },	\
   {"abi", "%{!mabi=*:-mabi=%(VALUE)}" }, \
 
 #ifdef IN_LIBGCC2
@@ -69,8 +79,9 @@ extern const char *riscv_expand_arch (int argc, const char **argv);
 %(subtarget_asm_spec)"
 
 #undef DRIVER_SELF_SPECS
-#define DRIVER_SELF_SPECS \
-"%{march=*:-march=%:riscv_expand_arch(%*)}"
+#define DRIVER_SELF_SPECS					\
+"%{march=*:%:riscv_expand_arch(%*)} "				\
+"%{!march=*:%{mcpu=*:%:riscv_expand_arch_from_cpu(%*)}} "
 
 #define TARGET_DEFAULT_CMODEL CM_MEDLOW
 
diff --git a/gcc/config/riscv/riscv.opt b/gcc/config/riscv/riscv.opt
index f01d3ab..808b4a0 100644
--- a/gcc/config/riscv/riscv.opt
+++ b/gcc/config/riscv/riscv.opt
@@ -79,6 +79,10 @@ mtune=
 Target RejectNegative Joined Var(riscv_tune_string)
 -mtune=PROCESSOR	Optimize the output for PROCESSOR.
 
+mcpu=
+Target RejectNegative Joined Var(riscv_cpu_string)
+-mcpu=PROCESSOR	Use architecture of and optimize the output for PROCESSOR.
+
 msmall-data-limit=
 Target Joined Separate UInteger Var(g_switch_value) Init(8)
 -msmall-data-limit=N	Put global and static data smaller than <number> bytes into a special section (on some targets).
diff --git a/gcc/config/riscv/t-riscv b/gcc/config/riscv/t-riscv
index 4820fb3..702767c 100644
--- a/gcc/config/riscv/t-riscv
+++ b/gcc/config/riscv/t-riscv
@@ -24,3 +24,5 @@ riscv-shorten-memrefs.o: $(srcdir)/config/riscv/riscv-shorten-memrefs.c
 	$(POSTCOMPILE)
 
 PASSES_EXTRA += $(srcdir)/config/riscv/riscv-passes.def
+
+$(common_out_file): $(srcdir)/config/riscv/riscv-cores.def
diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h
index 8a2dcda..df10a8c 100644
--- a/gcc/config/rs6000/altivec.h
+++ b/gcc/config/rs6000/altivec.h
@@ -236,6 +236,9 @@
 #define vec_lvebx __builtin_vec_lvebx
 #define vec_lvehx __builtin_vec_lvehx
 #define vec_lvewx __builtin_vec_lvewx
+#define vec_xl_zext __builtin_vec_ze_lxvrx
+#define vec_xl_sext __builtin_vec_se_lxvrx
+#define vec_xst_trunc __builtin_vec_tr_stxvrx
 #define vec_neg __builtin_vec_neg
 #define vec_pmsum_be __builtin_vec_vpmsum
 #define vec_shasigma_be __builtin_crypto_vshasigma
diff --git a/gcc/config/rs6000/freebsd64.h b/gcc/config/rs6000/freebsd64.h
index c991363..6984ca5 100644
--- a/gcc/config/rs6000/freebsd64.h
+++ b/gcc/config/rs6000/freebsd64.h
@@ -78,65 +78,7 @@ extern int dot_symbols;
 
 #undef  SUBSUBTARGET_OVERRIDE_OPTIONS
 #define SUBSUBTARGET_OVERRIDE_OPTIONS				\
-  do								\
-    {								\
-      if (!global_options_set.x_rs6000_alignment_flags)		\
-	rs6000_alignment_flags = MASK_ALIGN_NATURAL;		\
-      if (TARGET_64BIT)						\
-	{							\
-	  if (DEFAULT_ABI != ABI_AIX)				\
-	    {							\
-	      rs6000_current_abi = ABI_AIX;			\
-	      error (INVALID_64BIT, "call");			\
-	    }							\
-	  dot_symbols = !strcmp (rs6000_abi_name, "aixdesc");	\
-	  if (rs6000_isa_flags & OPTION_MASK_RELOCATABLE)	\
-	    {							\
-	      rs6000_isa_flags &= ~OPTION_MASK_RELOCATABLE;	\
-	      error (INVALID_64BIT, "relocatable");		\
-	    }							\
-	  if (ELFv2_ABI_CHECK)					\
-	    {							\
-	      rs6000_current_abi = ABI_ELFv2;			\
-	      if (dot_symbols)					\
-		error ("%<-mcall-aixdesc%> incompatible with %<-mabi=elfv2%>"); \
-	    }							\
-	  if (rs6000_isa_flags & OPTION_MASK_EABI)		\
-	    {							\
-	      rs6000_isa_flags &= ~OPTION_MASK_EABI;		\
-	      error (INVALID_64BIT, "eabi");			\
-	    }							\
-	  if (TARGET_PROTOTYPE)					\
-	    {							\
-	      target_prototype = 0;				\
-	      error (INVALID_64BIT, "prototype");		\
-	    }							\
-	  if ((rs6000_isa_flags & OPTION_MASK_POWERPC64) == 0)	\
-	    {							\
-	      rs6000_isa_flags |= OPTION_MASK_POWERPC64;	\
-	      error ("%<-m64%> requires a PowerPC64 cpu");		\
-	    }							\
-	   if ((rs6000_isa_flags_explicit			\
-		& OPTION_MASK_MINIMAL_TOC) != 0)		\
-	    {							\
-	      if (global_options_set.x_rs6000_current_cmodel	\
-		  && rs6000_current_cmodel != CMODEL_SMALL)	\
-		error ("%<-mcmodel%> incompatible with other toc options"); \
-	      SET_CMODEL (CMODEL_SMALL);			\
-	    }							\
-	  else							\
-	    {							\
-	      if (!global_options_set.x_rs6000_current_cmodel)	\
-		SET_CMODEL (CMODEL_MEDIUM);			\
-	      if (rs6000_current_cmodel != CMODEL_SMALL)	\
-		{						\
-		  TARGET_NO_FP_IN_TOC = 0;			\
-		  TARGET_NO_SUM_IN_TOC = 0;			\
-		}						\
-	    }							\
-	}							\
-    }								\
-  while (0)
+  do rs6000_linux64_override_options (); while (0)
 
 #undef	ASM_SPEC
 #undef	LINK_OS_FREEBSD_SPEC
diff --git a/gcc/config/rs6000/linux64.h b/gcc/config/rs6000/linux64.h
index 2ded330..73b6c01 100644
--- a/gcc/config/rs6000/linux64.h
+++ b/gcc/config/rs6000/linux64.h
@@ -96,90 +96,7 @@ extern int dot_symbols;
 
 #undef	SUBSUBTARGET_OVERRIDE_OPTIONS
 #define	SUBSUBTARGET_OVERRIDE_OPTIONS				\
-  do								\
-    {								\
-      if (!global_options_set.x_rs6000_alignment_flags)		\
-	rs6000_alignment_flags = MASK_ALIGN_NATURAL;		\
-      if (rs6000_isa_flags & OPTION_MASK_64BIT)			\
-	{							\
-	  if (DEFAULT_ABI != ABI_AIX)				\
-	    {							\
-	      rs6000_current_abi = ABI_AIX;			\
-	      error (INVALID_64BIT, "call");			\
-	    }							\
-	  dot_symbols = !strcmp (rs6000_abi_name, "aixdesc");	\
-	  if (ELFv2_ABI_CHECK)					\
-	    {							\
-	      rs6000_current_abi = ABI_ELFv2;			\
-	      if (dot_symbols)					\
-		error ("%<-mcall-aixdesc%> incompatible with %<-mabi=elfv2%>"); \
-	    }							\
-	  if (rs6000_isa_flags & OPTION_MASK_RELOCATABLE)	\
-	    {							\
-	      rs6000_isa_flags &= ~OPTION_MASK_RELOCATABLE;	\
-	      error (INVALID_64BIT, "relocatable");		\
-	    }							\
-	  if (rs6000_isa_flags & OPTION_MASK_EABI)		\
-	    {							\
-	      rs6000_isa_flags &= ~OPTION_MASK_EABI;		\
-	      error (INVALID_64BIT, "eabi");			\
-	    }							\
-	  if (TARGET_PROTOTYPE)					\
-	    {							\
-	      target_prototype = 0;				\
-	      error (INVALID_64BIT, "prototype");		\
-	    }							\
-	  if ((rs6000_isa_flags & OPTION_MASK_POWERPC64) == 0)	\
-	    {							\
-	      rs6000_isa_flags |= OPTION_MASK_POWERPC64;	\
-	      error ("%<-m64%> requires a PowerPC64 cpu");		\
-	    }							\
-	  if ((rs6000_isa_flags_explicit			\
-	       & OPTION_MASK_MINIMAL_TOC) != 0)			\
-	    {							\
-	      if (global_options_set.x_rs6000_current_cmodel	\
-		  && rs6000_current_cmodel != CMODEL_SMALL)	\
-		error ("%<-mcmodel incompatible with other toc options%>"); \
-	      SET_CMODEL (CMODEL_SMALL);			\
-	    }							\
-	  else							\
-	    {							\
-	      if (!global_options_set.x_rs6000_current_cmodel)	\
-		SET_CMODEL (CMODEL_MEDIUM);			\
-	      if (rs6000_current_cmodel != CMODEL_SMALL)	\
-		{						\
-		  if (!global_options_set.x_TARGET_NO_FP_IN_TOC) \
-		    TARGET_NO_FP_IN_TOC				\
-		      = rs6000_current_cmodel == CMODEL_MEDIUM;	\
-		  if (!global_options_set.x_TARGET_NO_SUM_IN_TOC) \
-		    TARGET_NO_SUM_IN_TOC = 0;			\
-		}						\
-	    }							\
-	  if (TARGET_PLTSEQ && DEFAULT_ABI != ABI_ELFv2)	\
-	    {							\
-	      if (global_options_set.x_rs6000_pltseq)		\
-		warning (0, "%qs unsupported for this ABI",	\
-			 "-mpltseq");				\
-	      rs6000_pltseq = false;				\
-	    }							\
-	}							\
-      else							\
-	{							\
-	  if (!RS6000_BI_ARCH_P)				\
-	    error (INVALID_32BIT, "32");			\
-	  if (TARGET_PROFILE_KERNEL)				\
-	    {							\
-	      TARGET_PROFILE_KERNEL = 0;			\
-	      error (INVALID_32BIT, "profile-kernel");		\
-	    }							\
-	  if (global_options_set.x_rs6000_current_cmodel)	\
-	    {							\
-	      SET_CMODEL (CMODEL_SMALL);			\
-	      error (INVALID_32BIT, "cmodel");			\
-	    }							\
-	}							\
-    }								\
-  while (0)
+  do rs6000_linux64_override_options (); while (0)
 
 #undef	ASM_SPEC
 #undef	LINK_OS_LINUX_SPEC
diff --git a/gcc/config/rs6000/ppc-asm.h b/gcc/config/rs6000/ppc-asm.h
index 48edc99..e0bce9c 100644
--- a/gcc/config/rs6000/ppc-asm.h
+++ b/gcc/config/rs6000/ppc-asm.h
@@ -262,6 +262,14 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 #undef toc
 
 #define FUNC_NAME(name) GLUE(__USER_LABEL_PREFIX__,name)
+#ifdef __PCREL__
+#define JUMP_TARGET(name) GLUE(FUNC_NAME(name),@notoc)
+#define FUNC_START(name) \
+	.type FUNC_NAME(name),@function; \
+	.globl FUNC_NAME(name); \
+FUNC_NAME(name): \
+	.localentry FUNC_NAME(name),1
+#else
 #define JUMP_TARGET(name) FUNC_NAME(name)
 #define FUNC_START(name) \
 	.type FUNC_NAME(name),@function; \
@@ -270,6 +278,7 @@ FUNC_NAME(name): \
 0:	addis 2,12,(.TOC.-0b)@ha; \
 	addi 2,2,(.TOC.-0b)@l; \
 	.localentry FUNC_NAME(name),.-FUNC_NAME(name)
+#endif /* !__PCREL__ */
 
 #define HIDDEN_FUNC(name) \
   FUNC_START(name) \
diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def
index e91a48d..5b05da8 100644
--- a/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc/config/rs6000/rs6000-builtin.def
@@ -1111,7 +1111,7 @@
 		     | RS6000_BTC_UNARY),				\
 		    CODE_FOR_ ## ICODE)			/* ICODE */
 
-#define BU_P10_MISC_2(ENUM, NAME, ATTR, ICODE)				\
+#define BU_P10_POWERPC64_MISC_2(ENUM, NAME, ATTR, ICODE)		\
   RS6000_BUILTIN_2 (P10_BUILTIN_ ## ENUM,		/* ENUM */	\
 		    "__builtin_" NAME,			/* NAME */	\
 		    RS6000_BTM_P10					\
@@ -1145,6 +1145,14 @@
 		    CODE_FOR_ ## ICODE)			/* ICODE */
 #endif
 
+#define BU_P10V_OVERLOAD_X(ENUM, NAME)				\
+  RS6000_BUILTIN_X (P10_BUILTIN_VEC_ ## ENUM,		/* ENUM */	\
+		    "__builtin_vec_" NAME,		/* NAME */	\
+		    RS6000_BTM_P10,			/* MASK */	\
+		    (RS6000_BTC_OVERLOADED		/* ATTR */	\
+		     | RS6000_BTC_SPECIAL),				\
+		    CODE_FOR_nothing)			/* ICODE */
+
 /* Power 10 Altivec builtins  */
 
 #define BU_P10V_AV_0(ENUM, NAME, ATTR, ICODE)				\
@@ -1179,6 +1187,15 @@
 		     | RS6000_BTC_TERNARY),				\
 		    CODE_FOR_ ## ICODE)			/* ICODE */
 
+#define BU_P10V_AV_X(ENUM, NAME, ATTR)					\
+  RS6000_BUILTIN_X (P10_BUILTIN_ ## ENUM,		/* ENUM */	\
+		    "__builtin_altivec_" NAME,		/* NAME */	\
+		    RS6000_BTM_P10,			/* MASK */	\
+		    (RS6000_BTC_ ## ATTR		/* ATTR */	\
+		     | RS6000_BTC_SPECIAL),				\
+		    CODE_FOR_nothing)			/* ICODE */
+
+
 
 /* Insure 0 is not a legitimate index.  */
 BU_SPECIAL_X (RS6000_BUILTIN_NONE, NULL, 0, RS6000_BTC_MISC)
@@ -1474,6 +1491,18 @@ BU_ALTIVEC_X (LVSR,		"lvsr",		    PURE)
 BU_ALTIVEC_X (LVEBX,		"lvebx",	    PURE)
 BU_ALTIVEC_X (LVEHX,		"lvehx",	    PURE)
 BU_ALTIVEC_X (LVEWX,		"lvewx",	    PURE)
+BU_P10V_AV_X (SE_LXVRBX,	"se_lxvrbx",	    PURE)
+BU_P10V_AV_X (SE_LXVRHX,	"se_lxvrhx",	    PURE)
+BU_P10V_AV_X (SE_LXVRWX,	"se_lxvrwx",	    PURE)
+BU_P10V_AV_X (SE_LXVRDX,	"se_lxvrdx",	    PURE)
+BU_P10V_AV_X (ZE_LXVRBX,	"ze_lxvrbx",	    PURE)
+BU_P10V_AV_X (ZE_LXVRHX,	"ze_lxvrhx",	    PURE)
+BU_P10V_AV_X (ZE_LXVRWX,	"ze_lxvrwx",	    PURE)
+BU_P10V_AV_X (ZE_LXVRDX,	"ze_lxvrdx",	    PURE)
+BU_P10V_AV_X (TR_STXVRBX,	"tr_stxvrbx",	    MEM)
+BU_P10V_AV_X (TR_STXVRHX,	"tr_stxvrhx",	    MEM)
+BU_P10V_AV_X (TR_STXVRWX,	"tr_stxvrwx",	    MEM)
+BU_P10V_AV_X (TR_STXVRDX,	"tr_stxvrdx",	    MEM)
 BU_ALTIVEC_X (LVXL,		"lvxl",		    PURE)
 BU_ALTIVEC_X (LVXL_V2DF,	"lvxl_v2df",	    PURE)
 BU_ALTIVEC_X (LVXL_V2DI,	"lvxl_v2di",	    PURE)
@@ -1740,6 +1769,9 @@ BU_ALTIVEC_OVERLOAD_X (LDL,	   "ldl")
 BU_ALTIVEC_OVERLOAD_X (LVEBX,	   "lvebx")
 BU_ALTIVEC_OVERLOAD_X (LVEHX,	   "lvehx")
 BU_ALTIVEC_OVERLOAD_X (LVEWX,	   "lvewx")
+BU_P10V_OVERLOAD_X (SE_LXVRX,   "se_lxvrx")
+BU_P10V_OVERLOAD_X (ZE_LXVRX,   "ze_lxvrx")
+BU_P10V_OVERLOAD_X (TR_STXVRX,  "tr_stxvrx")
 BU_ALTIVEC_OVERLOAD_X (LVLX,	   "lvlx")
 BU_ALTIVEC_OVERLOAD_X (LVLXL,	   "lvlxl")
 BU_ALTIVEC_OVERLOAD_X (LVRX,	   "lvrx")
@@ -2727,11 +2759,11 @@ BU_P9_OVERLOAD_2 (CMPRB2,	"byte_in_either_range")
 BU_P9_OVERLOAD_2 (CMPEQB,	"byte_in_set")
 
 /* Builtins for scalar instructions added in ISA 3.1 (power10).  */
-BU_P10_MISC_2 (CFUGED, "cfuged", CONST, cfuged)
-BU_P10_MISC_2 (CNTLZDM, "cntlzdm", CONST, cntlzdm)
-BU_P10_MISC_2 (CNTTZDM, "cnttzdm", CONST, cnttzdm)
-BU_P10_MISC_2 (PDEPD, "pdepd", CONST, pdepd)
-BU_P10_MISC_2 (PEXTD, "pextd", CONST, pextd)
+BU_P10_POWERPC64_MISC_2 (CFUGED, "cfuged", CONST, cfuged)
+BU_P10_POWERPC64_MISC_2 (CNTLZDM, "cntlzdm", CONST, cntlzdm)
+BU_P10_POWERPC64_MISC_2 (CNTTZDM, "cnttzdm", CONST, cnttzdm)
+BU_P10_POWERPC64_MISC_2 (PDEPD, "pdepd", CONST, pdepd)
+BU_P10_POWERPC64_MISC_2 (PEXTD, "pextd", CONST, pextd)
 
 /* Builtins for vector instructions added in ISA 3.1 (power10).  */
 BU_P10V_AV_2 (VCLRLB, "vclrlb", CONST, vclrlb)
diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c
index f5982907..cc1e997 100644
--- a/gcc/config/rs6000/rs6000-c.c
+++ b/gcc/config/rs6000/rs6000-c.c
@@ -597,6 +597,9 @@ rs6000_target_modify_macros (bool define_p, HOST_WIDE_INT flags,
   /* Tell the user if we support the MMA instructions.  */
   if ((flags & OPTION_MASK_MMA) != 0)
     rs6000_define_or_undefine_macro (define_p, "__MMA__");
+  /* Whether pc-relative code is being generated.  */
+  if ((flags & OPTION_MASK_PCREL) != 0)
+    rs6000_define_or_undefine_macro (define_p, "__PCREL__");
 }
 
 void
diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
index a8b52083..b044778 100644
--- a/gcc/config/rs6000/rs6000-call.c
+++ b/gcc/config/rs6000/rs6000-call.c
@@ -57,16 +57,14 @@
 #include "gimplify.h"
 #include "gimple-fold.h"
 #include "gimple-iterator.h"
-#include "gimple-ssa.h"
+#include "ssa.h"
+#include "tree-ssa-propagate.h"
 #include "builtins.h"
 #include "tree-vector-builder.h"
 #if TARGET_XCOFF
 #include "xcoffout.h"  /* get declarations of xcoff_*_section_name */
 #endif
 #include "ppc-auxv.h"
-#include "tree-ssa-propagate.h"
-#include "tree-vrp.h"
-#include "tree-ssanames.h"
 #include "targhooks.h"
 #include "opts.h"
 
@@ -1154,6 +1152,65 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
   { ALTIVEC_BUILTIN_VEC_LVEBX, ALTIVEC_BUILTIN_LVEBX,
     RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI, 0 },
 
+  /* vector signed__int128 vec_xl_sext (signed long long, signed char *);
+     vector signed__int128 vec_xl_sext (signed long long, signed short *);
+     vector signed__int128 vec_xl_sext (signed long long, signed int *);
+     vector signed__int128 vec_xl_sext (signed long long, signed longlong *); */
+  { P10_BUILTIN_VEC_SE_LXVRX, P10_BUILTIN_SE_LXVRBX,
+    RS6000_BTI_V1TI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI, 0 },
+  { P10_BUILTIN_VEC_SE_LXVRX, P10_BUILTIN_SE_LXVRHX,
+    RS6000_BTI_V1TI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI, 0 },
+  { P10_BUILTIN_VEC_SE_LXVRX, P10_BUILTIN_SE_LXVRWX,
+    RS6000_BTI_V1TI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI, 0 },
+  { P10_BUILTIN_VEC_SE_LXVRX, P10_BUILTIN_SE_LXVRDX,
+    RS6000_BTI_V1TI, RS6000_BTI_INTSI, ~RS6000_BTI_INTDI, 0 },
+  { P10_BUILTIN_VEC_SE_LXVRX, P10_BUILTIN_SE_LXVRDX,
+    RS6000_BTI_V1TI, RS6000_BTI_INTSI, ~RS6000_BTI_long_long, 0 },
+
+  /* vector unsigned__int128 vec_xl_zext (signed long long, unsigned char *);
+     vector unsigned__int128 vec_xl_zext (signed long long, unsigned short *);
+     vector unsigned__int128 vec_xl_zext (signed long long, unsigned int *);
+     vector unsigned__int128 vec_xl_zext (signed long long, unsigned longlong *); */
+  { P10_BUILTIN_VEC_ZE_LXVRX, P10_BUILTIN_ZE_LXVRBX,
+    RS6000_BTI_unsigned_V1TI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI, 0 },
+  { P10_BUILTIN_VEC_ZE_LXVRX, P10_BUILTIN_ZE_LXVRHX,
+    RS6000_BTI_unsigned_V1TI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI, 0 },
+  { P10_BUILTIN_VEC_ZE_LXVRX, P10_BUILTIN_ZE_LXVRWX,
+    RS6000_BTI_unsigned_V1TI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI, 0 },
+  { P10_BUILTIN_VEC_ZE_LXVRX, P10_BUILTIN_ZE_LXVRDX,
+    RS6000_BTI_unsigned_V1TI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTDI, 0 },
+  { P10_BUILTIN_VEC_ZE_LXVRX, P10_BUILTIN_ZE_LXVRDX,
+    RS6000_BTI_unsigned_V1TI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_long_long, 0 },
+
+  /* void vec_xst_trunc (vector signed __int128, signed long long, signed char *);
+     void vec_xst_trunc (vector unsigned __int128, signed long long, unsigned char *);
+     void vec_xst_trunc (vector signed __int128, signed long long, signed char *);
+     void vec_xst_trunc (vector unsigned __int128, signed long long, unsigned char *);
+     void vec_xst_trunc (vector signed __int128, signed long long, signed char *);
+     void vec_xst_trunc (vector unsigned __int128, signed long long, unsigned char *);
+     void vec_xst_trunc (vector signed __int128, signed long long, signed char *);
+     void vec_xst_trunc (vector unsigned __int128, signed long long, unsigned char *); */
+  { P10_BUILTIN_VEC_TR_STXVRX, P10_BUILTIN_TR_STXVRBX, RS6000_BTI_void,
+    RS6000_BTI_V1TI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI },
+  { P10_BUILTIN_VEC_TR_STXVRX, P10_BUILTIN_TR_STXVRBX, RS6000_BTI_void,
+    RS6000_BTI_unsigned_V1TI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI },
+  { P10_BUILTIN_VEC_TR_STXVRX, P10_BUILTIN_TR_STXVRHX, RS6000_BTI_void,
+    RS6000_BTI_V1TI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI },
+  { P10_BUILTIN_VEC_TR_STXVRX, P10_BUILTIN_TR_STXVRHX, RS6000_BTI_void,
+    RS6000_BTI_unsigned_V1TI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI },
+  { P10_BUILTIN_VEC_TR_STXVRX, P10_BUILTIN_TR_STXVRWX, RS6000_BTI_void,
+    RS6000_BTI_V1TI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI },
+  { P10_BUILTIN_VEC_TR_STXVRX, P10_BUILTIN_TR_STXVRWX, RS6000_BTI_void,
+    RS6000_BTI_unsigned_V1TI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI },
+  { P10_BUILTIN_VEC_TR_STXVRX, P10_BUILTIN_TR_STXVRDX, RS6000_BTI_void,
+    RS6000_BTI_V1TI, RS6000_BTI_INTSI, ~RS6000_BTI_long_long },
+  { P10_BUILTIN_VEC_TR_STXVRX, P10_BUILTIN_TR_STXVRDX, RS6000_BTI_void,
+    RS6000_BTI_unsigned_V1TI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_long_long },
+  { P10_BUILTIN_VEC_TR_STXVRX, P10_BUILTIN_TR_STXVRDX, RS6000_BTI_void,
+    RS6000_BTI_V1TI, RS6000_BTI_INTSI, ~RS6000_BTI_INTDI },
+  { P10_BUILTIN_VEC_TR_STXVRX, P10_BUILTIN_TR_STXVRDX, RS6000_BTI_void,
+    RS6000_BTI_unsigned_V1TI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTDI },
+
   /*     vector float vec_ldl (int, vector float *);
          vector float vec_ldl (int, float *); */
   { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL_V4SF,
@@ -9576,6 +9633,85 @@ swap_endian_selector_for_mode (machine_mode mode)
 						     gen_rtvec_v (16, perm)));
 }
 
+/* For the load and sign extend rightmost elements; load and zero extend
+ rightmost element builtins.  */
+static rtx
+altivec_expand_lxvr_builtin (enum insn_code icode, tree exp, rtx target, bool blk, bool sign_extend)
+{
+  rtx pat, addr;
+  tree arg0 = CALL_EXPR_ARG (exp, 0);
+  tree arg1 = CALL_EXPR_ARG (exp, 1);
+  machine_mode tmode = insn_data[icode].operand[0].mode;
+  machine_mode smode = insn_data[icode].operand[1].mode;
+  machine_mode mode0 = Pmode;
+  machine_mode mode1 = Pmode;
+  rtx op0 = expand_normal (arg0);
+  rtx op1 = expand_normal (arg1);
+
+  if (icode == CODE_FOR_nothing)
+    /* Builtin not supported on this processor.  */
+    return 0;
+
+  /* If we got invalid arguments bail out before generating bad rtl.  */
+  if (arg0 == error_mark_node || arg1 == error_mark_node)
+    return const0_rtx;
+
+  if (target == 0
+      || GET_MODE (target) != tmode
+      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+    target = gen_reg_rtx (tmode);
+
+  op1 = copy_to_mode_reg (mode1, op1);
+
+  if (op0 == const0_rtx)
+    addr = gen_rtx_MEM (blk ? BLKmode : tmode, op1);
+  else
+    {
+      op0 = copy_to_mode_reg (mode0, op0);
+      addr = gen_rtx_MEM (blk ? BLKmode : smode,
+			  gen_rtx_PLUS (Pmode, op1, op0));
+    }
+
+  if (sign_extend)
+    {
+      rtx discratch = gen_reg_rtx (DImode);
+      rtx tiscratch = gen_reg_rtx (TImode);
+
+      /* Emit the lxvr*x insn.  */
+      pat = GEN_FCN (icode) (tiscratch, addr);
+      if (!pat)
+	return 0;
+      emit_insn (pat);
+
+      /* Emit a sign extension from QI,HI,WI to double (DI).  */
+      rtx scratch = gen_lowpart (smode, tiscratch);
+      if (icode == CODE_FOR_vsx_lxvrbx)
+	emit_insn (gen_extendqidi2 (discratch, scratch));
+      else if (icode == CODE_FOR_vsx_lxvrhx)
+	emit_insn (gen_extendhidi2 (discratch, scratch));
+      else if (icode == CODE_FOR_vsx_lxvrwx)
+	emit_insn (gen_extendsidi2 (discratch, scratch));
+      /*  Assign discratch directly if scratch is already DI.  */
+      if (icode == CODE_FOR_vsx_lxvrdx)
+	discratch = scratch;
+
+      /* Emit the sign extension from DI (double) to TI (quad).  */
+      emit_insn (gen_extendditi2 (target, discratch));
+
+      return target;
+    }
+  else
+    {
+      /* Zero extend.  */
+      pat = GEN_FCN (icode) (target, addr);
+      if (!pat)
+	return 0;
+      emit_insn (pat);
+      return target;
+    }
+  return 0;
+}
+
 static rtx
 altivec_expand_lv_builtin (enum insn_code icode, tree exp, rtx target, bool blk)
 {
@@ -9694,7 +9830,7 @@ altivec_expand_stv_builtin (enum insn_code icode, tree exp)
   rtx op0 = expand_normal (arg0);
   rtx op1 = expand_normal (arg1);
   rtx op2 = expand_normal (arg2);
-  rtx pat, addr, rawaddr;
+  rtx pat, addr, rawaddr, truncrtx;
   machine_mode tmode = insn_data[icode].operand[0].mode;
   machine_mode smode = insn_data[icode].operand[1].mode;
   machine_mode mode1 = Pmode;
@@ -9733,6 +9869,25 @@ altivec_expand_stv_builtin (enum insn_code icode, tree exp)
 
       emit_insn (gen_rtx_SET (addr, op0));
     }
+  else if (icode == CODE_FOR_vsx_stxvrbx
+	   || icode == CODE_FOR_vsx_stxvrhx
+	   || icode == CODE_FOR_vsx_stxvrwx
+	   || icode == CODE_FOR_vsx_stxvrdx)
+    {
+      truncrtx = gen_rtx_TRUNCATE (tmode, op0);
+      op0 = copy_to_mode_reg (E_TImode, truncrtx);
+
+      if (op1 == const0_rtx)
+	addr = gen_rtx_MEM (Pmode, op2);
+      else
+	{
+	  op1 = copy_to_mode_reg (mode1, op1);
+	  addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op2, op1));
+	}
+      pat = GEN_FCN (icode) (addr, op0);
+      if (pat)
+	emit_insn (pat);
+    }
   else
     {
       if (! (*insn_data[icode].operand[1].predicate) (op0, smode))
@@ -10752,6 +10907,16 @@ altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
       return altivec_expand_stv_builtin (CODE_FOR_altivec_stvehx, exp);
     case ALTIVEC_BUILTIN_STVEWX:
       return altivec_expand_stv_builtin (CODE_FOR_altivec_stvewx, exp);
+
+    case P10_BUILTIN_TR_STXVRBX:
+      return altivec_expand_stv_builtin (CODE_FOR_vsx_stxvrbx, exp);
+    case P10_BUILTIN_TR_STXVRHX:
+      return altivec_expand_stv_builtin (CODE_FOR_vsx_stxvrhx, exp);
+    case P10_BUILTIN_TR_STXVRWX:
+      return altivec_expand_stv_builtin (CODE_FOR_vsx_stxvrwx, exp);
+    case P10_BUILTIN_TR_STXVRDX:
+      return altivec_expand_stv_builtin (CODE_FOR_vsx_stxvrdx, exp);
+
     case ALTIVEC_BUILTIN_STVXL_V2DF:
       return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2df, exp);
     case ALTIVEC_BUILTIN_STVXL_V2DI:
@@ -11014,6 +11179,30 @@ altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
     case ALTIVEC_BUILTIN_LVEWX:
       return altivec_expand_lv_builtin (CODE_FOR_altivec_lvewx,
 					exp, target, false);
+    case P10_BUILTIN_SE_LXVRBX:
+      return altivec_expand_lxvr_builtin (CODE_FOR_vsx_lxvrbx,
+					exp, target, false, true);
+    case P10_BUILTIN_SE_LXVRHX:
+      return altivec_expand_lxvr_builtin (CODE_FOR_vsx_lxvrhx,
+					exp, target, false, true);
+    case P10_BUILTIN_SE_LXVRWX:
+      return altivec_expand_lxvr_builtin (CODE_FOR_vsx_lxvrwx,
+					exp, target, false, true);
+    case P10_BUILTIN_SE_LXVRDX:
+      return altivec_expand_lxvr_builtin (CODE_FOR_vsx_lxvrdx,
+					exp, target, false, true);
+    case P10_BUILTIN_ZE_LXVRBX:
+      return altivec_expand_lxvr_builtin (CODE_FOR_vsx_lxvrbx,
+					exp, target, false, false);
+    case P10_BUILTIN_ZE_LXVRHX:
+      return altivec_expand_lxvr_builtin (CODE_FOR_vsx_lxvrhx,
+					exp, target, false, false);
+    case P10_BUILTIN_ZE_LXVRWX:
+      return altivec_expand_lxvr_builtin (CODE_FOR_vsx_lxvrwx,
+					exp, target, false, false);
+    case P10_BUILTIN_ZE_LXVRDX:
+      return altivec_expand_lxvr_builtin (CODE_FOR_vsx_lxvrdx,
+					exp, target, false, false);
     case ALTIVEC_BUILTIN_LVXL_V2DF:
       return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2df,
 					exp, target, false);
@@ -12916,15 +13105,13 @@ rs6000_init_builtins (void)
   /* Vector pair and vector quad support.  */
   if (TARGET_EXTRA_BUILTINS)
     {
-      tree oi_uns_type = make_unsigned_type (256);
-      vector_pair_type_node = build_distinct_type_copy (oi_uns_type);
+      vector_pair_type_node = make_unsigned_type (256);
       SET_TYPE_MODE (vector_pair_type_node, POImode);
       layout_type (vector_pair_type_node);
       lang_hooks.types.register_builtin_type (vector_pair_type_node,
 					      "__vector_pair");
 
-      tree xi_uns_type = make_unsigned_type (512);
-      vector_quad_type_node = build_distinct_type_copy (xi_uns_type);
+      vector_quad_type_node = make_unsigned_type (512);
       SET_TYPE_MODE (vector_quad_type_node, PXImode);
       layout_type (vector_quad_type_node);
       lang_hooks.types.register_builtin_type (vector_quad_type_node,
@@ -13298,6 +13485,18 @@ altivec_init_builtins (void)
   def_builtin ("__builtin_altivec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEBX);
   def_builtin ("__builtin_altivec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEHX);
   def_builtin ("__builtin_altivec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEWX);
+  def_builtin ("__builtin_altivec_se_lxvrbx", v16qi_ftype_long_pcvoid, P10_BUILTIN_SE_LXVRBX);
+  def_builtin ("__builtin_altivec_se_lxvrhx", v8hi_ftype_long_pcvoid, P10_BUILTIN_SE_LXVRHX);
+  def_builtin ("__builtin_altivec_se_lxvrwx", v4si_ftype_long_pcvoid, P10_BUILTIN_SE_LXVRWX);
+  def_builtin ("__builtin_altivec_se_lxvrdx", v2di_ftype_long_pcvoid, P10_BUILTIN_SE_LXVRDX);
+  def_builtin ("__builtin_altivec_ze_lxvrbx", v16qi_ftype_long_pcvoid, P10_BUILTIN_ZE_LXVRBX);
+  def_builtin ("__builtin_altivec_ze_lxvrhx", v8hi_ftype_long_pcvoid, P10_BUILTIN_ZE_LXVRHX);
+  def_builtin ("__builtin_altivec_ze_lxvrwx", v4si_ftype_long_pcvoid, P10_BUILTIN_ZE_LXVRWX);
+  def_builtin ("__builtin_altivec_ze_lxvrdx", v2di_ftype_long_pcvoid, P10_BUILTIN_ZE_LXVRDX);
+  def_builtin ("__builtin_altivec_tr_stxvrbx", void_ftype_v1ti_long_pvoid, P10_BUILTIN_TR_STXVRBX);
+  def_builtin ("__builtin_altivec_tr_stxvrhx", void_ftype_v1ti_long_pvoid, P10_BUILTIN_TR_STXVRHX);
+  def_builtin ("__builtin_altivec_tr_stxvrwx", void_ftype_v1ti_long_pvoid, P10_BUILTIN_TR_STXVRWX);
+  def_builtin ("__builtin_altivec_tr_stxvrdx", void_ftype_v1ti_long_pvoid, P10_BUILTIN_TR_STXVRDX);
   def_builtin ("__builtin_altivec_lvxl", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVXL);
   def_builtin ("__builtin_altivec_lvxl_v2df", v2df_ftype_long_pcvoid,
 	       ALTIVEC_BUILTIN_LVXL_V2DF);
@@ -13363,6 +13562,9 @@ altivec_init_builtins (void)
   def_builtin ("__builtin_vec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEBX);
   def_builtin ("__builtin_vec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEHX);
   def_builtin ("__builtin_vec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEWX);
+  def_builtin ("__builtin_vec_se_lxvrx", v1ti_ftype_long_pcvoid, P10_BUILTIN_VEC_SE_LXVRX);
+  def_builtin ("__builtin_vec_ze_lxvrx", v1ti_ftype_long_pcvoid, P10_BUILTIN_VEC_ZE_LXVRX);
+  def_builtin ("__builtin_vec_tr_stxvrx", void_ftype_opaque_long_pvoid, P10_BUILTIN_VEC_TR_STXVRX);
   def_builtin ("__builtin_vec_st", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_ST);
   def_builtin ("__builtin_vec_ste", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STE);
   def_builtin ("__builtin_vec_stl", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STL);
diff --git a/gcc/config/rs6000/rs6000-internal.h b/gcc/config/rs6000/rs6000-internal.h
index 9caef01..32681b6 100644
--- a/gcc/config/rs6000/rs6000-internal.h
+++ b/gcc/config/rs6000/rs6000-internal.h
@@ -32,7 +32,7 @@ typedef struct rs6000_stack {
   int cr_save_p;		/* true if the CR reg needs to be saved */
   unsigned int vrsave_mask;	/* mask of vec registers to save */
   int push_p;			/* true if we need to allocate stack space */
-  int calls_p;			/* true if the function makes any calls */
+  int calls_p;			/* true if there are non-sibling calls */
   int world_save_p;		/* true if we're saving *everything*:
 				   r13-r31, cr, f14-f31, vrsave, v20-v31  */
   enum rs6000_abi abi;		/* which ABI to use */
diff --git a/gcc/config/rs6000/rs6000-logue.c b/gcc/config/rs6000/rs6000-logue.c
index 0f88ec1..d90cd57 100644
--- a/gcc/config/rs6000/rs6000-logue.c
+++ b/gcc/config/rs6000/rs6000-logue.c
@@ -714,7 +714,7 @@ rs6000_stack_info (void)
   info->altivec_size = 16 * (LAST_ALTIVEC_REGNO + 1
 				 - info->first_altivec_reg_save);
 
-  /* Does this function call anything?  */
+  /* Does this function call anything (apart from sibling calls)?  */
   info->calls_p = (!crtl->is_leaf || cfun->machine->ra_needs_full_frame);
 
   /* Determine if we need to save the condition code registers.  */
@@ -5479,7 +5479,18 @@ rs6000_expand_split_stack_prologue (void)
   gcc_assert (flag_split_stack && reload_completed);
 
   if (!info->push_p)
-    return;
+    {
+      /* We need the -fsplit-stack prologue for functions that make
+	 tail calls.  Tail calls don't count against crtl->is_leaf.
+	 Note that we are called inside a sequence.  get_insns will
+	 just return that (as yet empty) sequence, so instead we
+	 access the function rtl with get_topmost_sequence.  */
+      for (insn = get_topmost_sequence ()->first; insn; insn = NEXT_INSN (insn))
+	if (CALL_P (insn))
+	  break;
+      if (!insn)
+	return;
+    }
 
   if (global_regs[29])
     {
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 6f204ca..4d528a3 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -59,11 +59,12 @@
 #include "gimplify.h"
 #include "gimple-fold.h"
 #include "gimple-iterator.h"
-#include "gimple-ssa.h"
 #include "gimple-walk.h"
+#include "ssa.h"
+#include "tree-vectorizer.h"
+#include "tree-ssa-propagate.h"
 #include "intl.h"
 #include "tm-constrs.h"
-#include "tree-vectorizer.h"
 #include "target-globals.h"
 #include "builtins.h"
 #include "tree-vector-builder.h"
@@ -75,9 +76,6 @@
 #endif
 #include "case-cfn-macros.h"
 #include "ppc-auxv.h"
-#include "tree-ssa-propagate.h"
-#include "tree-vrp.h"
-#include "tree-ssanames.h"
 #include "rs6000-internal.h"
 #include "opts.h"
 
@@ -3451,6 +3449,96 @@ rs6000_override_options_after_change (void)
     flag_cunroll_grow_size = flag_peel_loops || optimize >= 3;
 }
 
+#ifdef TARGET_USES_LINUX64_OPT
+static void
+rs6000_linux64_override_options ()
+{
+  if (!global_options_set.x_rs6000_alignment_flags)
+    rs6000_alignment_flags = MASK_ALIGN_NATURAL;
+  if (rs6000_isa_flags & OPTION_MASK_64BIT)
+    {
+      if (DEFAULT_ABI != ABI_AIX)
+	{
+	  rs6000_current_abi = ABI_AIX;
+	  error (INVALID_64BIT, "call");
+	}
+      dot_symbols = !strcmp (rs6000_abi_name, "aixdesc");
+      if (ELFv2_ABI_CHECK)
+	{
+	  rs6000_current_abi = ABI_ELFv2;
+	  if (dot_symbols)
+	    error ("%<-mcall-aixdesc%> incompatible with %<-mabi=elfv2%>");
+	}
+      if (rs6000_isa_flags & OPTION_MASK_RELOCATABLE)
+	{
+	  rs6000_isa_flags &= ~OPTION_MASK_RELOCATABLE;
+	  error (INVALID_64BIT, "relocatable");
+	}
+      if (rs6000_isa_flags & OPTION_MASK_EABI)
+	{
+	  rs6000_isa_flags &= ~OPTION_MASK_EABI;
+	  error (INVALID_64BIT, "eabi");
+	}
+      if (TARGET_PROTOTYPE)
+	{
+	  target_prototype = 0;
+	  error (INVALID_64BIT, "prototype");
+	}
+      if ((rs6000_isa_flags & OPTION_MASK_POWERPC64) == 0)
+	{
+	  rs6000_isa_flags |= OPTION_MASK_POWERPC64;
+	  error ("%<-m64%> requires a PowerPC64 cpu");
+	}
+      if (!global_options_set.x_rs6000_current_cmodel)
+	SET_CMODEL (CMODEL_MEDIUM);
+      if ((rs6000_isa_flags_explicit & OPTION_MASK_MINIMAL_TOC) != 0)
+	{
+	  if (global_options_set.x_rs6000_current_cmodel
+	      && rs6000_current_cmodel != CMODEL_SMALL)
+	    error ("%<-mcmodel incompatible with other toc options%>");
+	  if (TARGET_MINIMAL_TOC)
+	    SET_CMODEL (CMODEL_SMALL);
+	  else if (TARGET_PCREL
+		   || (PCREL_SUPPORTED_BY_OS
+		       && (rs6000_isa_flags_explicit & OPTION_MASK_PCREL) == 0))
+	    /* Ignore -mno-minimal-toc.  */
+	    ;
+	  else
+	    SET_CMODEL (CMODEL_SMALL);
+	}
+      if (rs6000_current_cmodel != CMODEL_SMALL)
+	{
+	  if (!global_options_set.x_TARGET_NO_FP_IN_TOC)
+	    TARGET_NO_FP_IN_TOC = rs6000_current_cmodel == CMODEL_MEDIUM;
+	  if (!global_options_set.x_TARGET_NO_SUM_IN_TOC)
+	    TARGET_NO_SUM_IN_TOC = 0;
+	}
+      if (TARGET_PLTSEQ && DEFAULT_ABI != ABI_ELFv2)
+	{
+	  if (global_options_set.x_rs6000_pltseq)
+	    warning (0, "%qs unsupported for this ABI",
+		     "-mpltseq");
+	  rs6000_pltseq = false;
+	}
+    }
+  else if (TARGET_64BIT)
+    error (INVALID_32BIT, "32");
+  else
+    {
+      if (TARGET_PROFILE_KERNEL)
+	{
+	  profile_kernel = 0;
+	  error (INVALID_32BIT, "profile-kernel");
+	}
+      if (global_options_set.x_rs6000_current_cmodel)
+	{
+	  SET_CMODEL (CMODEL_SMALL);
+	  error (INVALID_32BIT, "cmodel");
+	}
+    }
+}
+#endif
+
 /* Override command line options.
 
    Combine build-specific configuration information with options
@@ -4236,7 +4324,9 @@ rs6000_option_override_internal (bool global_init_p)
     }
 
   /* Enable Altivec ABI for AIX -maltivec.  */
-  if (TARGET_XCOFF && (TARGET_ALTIVEC || TARGET_VSX))
+  if (TARGET_XCOFF
+      && (TARGET_ALTIVEC || TARGET_VSX)
+      && !global_options_set.x_rs6000_altivec_abi)
     {
       if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
 	error ("target attribute or pragma changes AltiVec ABI");
@@ -5731,7 +5821,7 @@ direct_return (void)
 
 /* Helper for num_insns_constant.  Calculate number of instructions to
    load VALUE to a single gpr using combinations of addi, addis, ori,
-   oris and sldi instructions.  */
+   oris, sldi and rldimi instructions.  */
 
 static int
 num_insns_constant_gpr (HOST_WIDE_INT value)
@@ -5759,7 +5849,7 @@ num_insns_constant_gpr (HOST_WIDE_INT value)
 
       high >>= 1;
 
-      if (low == 0)
+      if (low == 0 || low == high)
 	return num_insns_constant_gpr (high) + 1;
       else if (high == 0)
 	return num_insns_constant_gpr (low) + 1;
@@ -8364,7 +8454,7 @@ rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
 	low_int = 0;
       high_int = INTVAL (XEXP (x, 1)) - low_int;
       sum = force_operand (gen_rtx_PLUS (Pmode, XEXP (x, 0),
-					 GEN_INT (high_int)), 0);
+					 gen_int_mode (high_int, Pmode)), 0);
       return plus_constant (Pmode, sum, low_int);
     }
   else if (GET_CODE (x) == PLUS
@@ -9020,15 +9110,21 @@ rs6000_legitimate_address_p (machine_mode mode, rtx x, bool reg_ok_strict)
   bool reg_offset_p = reg_offset_addressing_ok_p (mode);
   bool quad_offset_p = mode_supports_dq_form (mode);
 
-  /* If this is an unaligned stvx/ldvx type address, discard the outer AND.  */
+  if (TARGET_ELF && RS6000_SYMBOL_REF_TLS_P (x))
+    return 0;
+
+  /* Handle unaligned altivec lvx/stvx type addresses.  */
   if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
       && GET_CODE (x) == AND
       && CONST_INT_P (XEXP (x, 1))
       && INTVAL (XEXP (x, 1)) == -16)
-    x = XEXP (x, 0);
+    {
+      x = XEXP (x, 0);
+      return (legitimate_indirect_address_p (x, reg_ok_strict)
+	      || legitimate_indexed_address_p (x, reg_ok_strict)
+	      || virtual_stack_registers_memory_p (x));
+    }
 
-  if (TARGET_ELF && RS6000_SYMBOL_REF_TLS_P (x))
-    return 0;
   if (legitimate_indirect_address_p (x, reg_ok_strict))
     return 1;
   if (TARGET_UPDATE
@@ -21176,9 +21272,9 @@ rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code,
 	  return true;
 	}
       else if ((outer_code == PLUS
-		&& reg_or_add_cint_operand (x, VOIDmode))
+		&& reg_or_add_cint_operand (x, mode))
 	       || (outer_code == MINUS
-		   && reg_or_sub_cint_operand (x, VOIDmode))
+		   && reg_or_sub_cint_operand (x, mode))
 	       || ((outer_code == SET
 		    || outer_code == IOR
 		    || outer_code == XOR)
@@ -26957,11 +27053,10 @@ rs6000_const_f32_to_i32 (rtx operand)
 void
 rs6000_emit_xxspltidp_v2df (rtx dst, long value)
 {
-  printf("rs6000_emit_xxspltidp_v2df called %ld\n", value);
-  printf("rs6000_emit_xxspltidp_v2df called 0x%lx\n", value);
   if (((value & 0x7F800000) == 0) && ((value & 0x7FFFFF) != 0))
     inform (input_location,
-	    "the result for the xxspltidp instruction is undefined for subnormal input values.\n");
+	    "the result for the xxspltidp instruction "
+	    "is undefined for subnormal input values");
   emit_insn( gen_xxspltidp_v2df_inst (dst, GEN_INT (value)));
 }
 
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 694ff70..dc06014 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -11554,7 +11554,7 @@
   ""
 {
   /* Everything is best done with setbc[r] if available.  */
-  if (TARGET_POWER10)
+  if (TARGET_POWER10 && TARGET_ISEL)
     rs6000_emit_int_cmove (operands[0], operands[1], const1_rtx, const0_rtx);
 
   /* Expanding EQ and NE directly to some machine instructions does not help
@@ -12697,12 +12697,7 @@
   ""
 {
   if (rs6000_speculate_indirect_jumps)
-    {
-      if (TARGET_32BIT)
-      	emit_jump_insn (gen_tablejumpsi (operands[0], operands[1]));
-      else
-	emit_jump_insn (gen_tablejumpdi (operands[0], operands[1]));
-    }
+    emit_jump_insn (gen_tablejump_normal (Pmode, operands[0], operands[1]));
   else
     {
       rtx ccreg = gen_reg_rtx (CCmode);
@@ -12716,69 +12711,57 @@
   DONE;
 })
 
-(define_expand "tablejumpsi"
-  [(set (match_dup 3)
-	(plus:SI (match_operand:SI 0)
-		 (match_dup 2)))
-   (parallel [(set (pc)
-		   (match_dup 3))
-	      (use (label_ref (match_operand 1)))])]
-  "TARGET_32BIT && rs6000_speculate_indirect_jumps"
+(define_expand "@tablejump<mode>_normal"
+  [(use (match_operand:SI 0))
+   (use (match_operand:P 1))]
+  "rs6000_speculate_indirect_jumps"
 {
+  rtx off;
   operands[0] = force_reg (SImode, operands[0]);
-  operands[2] = force_reg (SImode, gen_rtx_LABEL_REF (SImode, operands[1]));
-  operands[3] = gen_reg_rtx (SImode);
+  if (<MODE>mode == SImode)
+    off = operands[0];
+  else
+    {
+      off = gen_reg_rtx (Pmode);
+      rtx src = gen_rtx_fmt_e (SIGN_EXTEND, Pmode, operands[0]);
+      emit_move_insn (off, src);
+    }
+
+  rtx lab = force_reg (Pmode, gen_rtx_LABEL_REF (Pmode, operands[1]));
+  rtx addr = gen_reg_rtx (Pmode);
+
+  emit_insn (gen_add<mode>3 (addr, off, lab));
+  emit_jump_insn (gen_tablejump_insn_normal (Pmode, addr, operands[1]));
+  DONE;
 })
 
-(define_expand "tablejumpsi_nospec"
-  [(set (match_dup 4)
-	(plus:SI (match_operand:SI 0)
-		 (match_dup 3)))
-   (parallel [(set (pc)
-		   (match_dup 4))
-	      (use (label_ref (match_operand 1)))
-	      (clobber (match_operand 2))])]
-  "TARGET_32BIT && !rs6000_speculate_indirect_jumps"
+(define_expand "@tablejump<mode>_nospec"
+  [(use (match_operand:SI 0))
+   (use (match_operand:P 1))
+   (use (match_operand:CC 2))]
+  "!rs6000_speculate_indirect_jumps"
 {
+  rtx off;
   operands[0] = force_reg (SImode, operands[0]);
-  operands[3] = force_reg (SImode, gen_rtx_LABEL_REF (SImode, operands[1]));
-  operands[4] = gen_reg_rtx (SImode);
-})
+  if (<MODE>mode == SImode)
+    off = operands[0];
+  else
+    {
+      off = gen_reg_rtx (Pmode);
+      rtx src = gen_rtx_fmt_e (SIGN_EXTEND, Pmode, operands[0]);
+      emit_move_insn (off, src);
+    }
 
-(define_expand "tablejumpdi"
-  [(set (match_dup 4)
-        (sign_extend:DI (match_operand:SI 0 "lwa_operand")))
-   (set (match_dup 3)
-	(plus:DI (match_dup 4)
-		 (match_dup 2)))
-   (parallel [(set (pc)
-		   (match_dup 3))
-	      (use (label_ref (match_operand 1)))])]
-  "TARGET_64BIT && rs6000_speculate_indirect_jumps"
-{
-  operands[2] = force_reg (DImode, gen_rtx_LABEL_REF (DImode, operands[1]));
-  operands[3] = gen_reg_rtx (DImode);
-  operands[4] = gen_reg_rtx (DImode);
-})
+  rtx lab = force_reg (Pmode, gen_rtx_LABEL_REF (Pmode, operands[1]));
+  rtx addr = gen_reg_rtx (Pmode);
 
-(define_expand "tablejumpdi_nospec"
-  [(set (match_dup 5)
-        (sign_extend:DI (match_operand:SI 0 "lwa_operand")))
-   (set (match_dup 4)
-	(plus:DI (match_dup 5)
-		 (match_dup 3)))
-   (parallel [(set (pc)
-		   (match_dup 4))
-	      (use (label_ref (match_operand 1)))
-	      (clobber (match_operand 2))])]
-  "TARGET_64BIT && !rs6000_speculate_indirect_jumps"
-{
-  operands[3] = force_reg (DImode, gen_rtx_LABEL_REF (DImode, operands[1]));
-  operands[4] = gen_reg_rtx (DImode);
-  operands[5] = gen_reg_rtx (DImode);
+  emit_insn (gen_add<mode>3 (addr, off, lab));
+  emit_jump_insn (gen_tablejump_insn_nospec (Pmode, addr, operands[1],
+					     operands[2]));
+  DONE;
 })
 
-(define_insn "*tablejump<mode>_internal1"
+(define_insn "@tablejump<mode>_insn_normal"
   [(set (pc)
 	(match_operand:P 0 "register_operand" "c,*l"))
    (use (label_ref (match_operand 1)))]
@@ -12786,7 +12769,7 @@
   "b%T0"
   [(set_attr "type" "jmpreg")])
 
-(define_insn "*tablejump<mode>_internal1_nospec"
+(define_insn "@tablejump<mode>_insn_nospec"
   [(set (pc)
 	(match_operand:P 0 "register_operand" "c,*l"))
    (use (label_ref (match_operand 1)))
diff --git a/gcc/config/rs6000/smmintrin.h b/gcc/config/rs6000/smmintrin.h
index d78ddba..4c0fc86 100644
--- a/gcc/config/rs6000/smmintrin.h
+++ b/gcc/config/rs6000/smmintrin.h
@@ -42,6 +42,36 @@
 #include <altivec.h>
 #include <tmmintrin.h>
 
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_insert_epi8 (__m128i const __A, int const __D, int const __N)
+{
+  __v16qi result = (__v16qi)__A;
+
+  result [__N & 0xf] = __D;
+
+  return (__m128i) result;
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_insert_epi32 (__m128i const __A, int const __D, int const __N)
+{
+  __v4si result = (__v4si)__A;
+
+  result [__N & 3] = __D;
+
+  return (__m128i) result;
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_insert_epi64 (__m128i const __A, long long const __D, int const __N)
+{
+  __v2di result = (__v2di)__A;
+
+  result [__N & 1] = __D;
+
+  return (__m128i) result;
+}
+
 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_extract_epi8 (__m128i __X, const int __N)
 {
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 4ff5245..d6347db 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -352,6 +352,8 @@
    UNSPEC_VSX_FIRST_MISMATCH_EOS_INDEX
    UNSPEC_XXGENPCV
    UNSPEC_MTVSBM
+   UNSPEC_EXTENDDITI2
+   UNSPEC_MTVSRD_DITI_W1
    UNSPEC_VCNTMB
    UNSPEC_VEXPAND
    UNSPEC_VEXTRACT
@@ -1253,6 +1255,24 @@
     }
 })
 
+;; Load rightmost element from load_data
+;; using lxvrbx, lxvrhx, lxvrwx, lxvrdx.
+(define_insn "vsx_lxvr<wd>x"
+  [(set (match_operand:TI 0 "vsx_register_operand" "=wa")
+	(zero_extend:TI (match_operand:INT_ISA3  1 "memory_operand" "Z")))]
+  "TARGET_POWER10"
+  "lxvr<wd>x %x0,%y1"
+  [(set_attr "type" "vecload")])
+
+;; Store rightmost element into store_data
+;; using stxvrbx, stxvrhx, strvxwx, strvxdx.
+(define_insn "vsx_stxvr<wd>x"
+  [(set (match_operand:INT_ISA3 0 "memory_operand" "=Z")
+	(truncate:INT_ISA3 (match_operand:TI 1 "vsx_register_operand" "wa")))]
+  "TARGET_POWER10"
+  "stxvr<wd>x %x1,%y0"
+  [(set_attr "type" "vecstore")])
+
 ;; Explicit load/store expanders for the builtin functions for lxvd2x, etc.,
 ;; when you really want their element-reversing behavior.
 (define_insn "vsx_ld_elemrev_v2di"
@@ -4795,6 +4815,37 @@
   "vextsw2d %0,%1"
   [(set_attr "type" "vecexts")])
 
+;; ISA 3.1 vector sign extend
+;; Move DI value from GPR to TI mode in VSX register, word 1.
+(define_insn "mtvsrdd_diti_w1"
+  [(set (match_operand:TI 0 "register_operand" "=wa")
+	(unspec:TI [(match_operand:DI 1 "register_operand" "r")]
+		     UNSPEC_MTVSRD_DITI_W1))]
+  "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+  "mtvsrdd %x0,0,%1"
+  [(set_attr "type" "vecmove")])
+
+;; Sign extend 64-bit value in TI reg, word 1, to 128-bit value in TI reg
+(define_insn "extendditi2_vector"
+  [(set (match_operand:TI 0 "gpc_reg_operand" "=v")
+	(unspec:TI [(match_operand:TI 1 "gpc_reg_operand" "v")]
+		     UNSPEC_EXTENDDITI2))]
+  "TARGET_POWER10"
+  "vextsd2q %0,%1"
+  [(set_attr "type" "vecexts")])
+
+(define_expand "extendditi2"
+  [(set (match_operand:TI 0 "gpc_reg_operand")
+	(sign_extend:DI (match_operand:DI 1 "gpc_reg_operand")))]
+  "TARGET_POWER10"
+  {
+    /* Move 64-bit src from GPR to vector reg and sign extend to 128-bits.  */
+    rtx temp = gen_reg_rtx (TImode);
+    emit_insn (gen_mtvsrdd_diti_w1 (temp, operands[1]));
+    emit_insn (gen_extendditi2_vector (operands[0], temp));
+    DONE;
+  })
+
 
 ;; ISA 3.0 Binary Floating-Point Support
 
@@ -5659,7 +5710,7 @@
 {
   int i;
   int vals_le[16] = {15, 14, 0, 0, 13, 12, 0, 0, 11, 10, 0, 0, 9, 8, 0, 0};
-  int vals_be[16] = {7, 6, 0, 0, 5, 4, 0, 0, 3, 2, 0, 0, 1, 0, 0, 0};
+  int vals_be[16] = {0, 0, 0, 1, 0, 0, 2, 3, 0, 0, 4, 5, 0, 0, 6, 7};
 
   rtx rvals[16];
   rtx mask = gen_reg_rtx (V16QImode);
@@ -5693,7 +5744,7 @@
   "TARGET_P9_VECTOR"
 {
   int vals_le[16] = {7, 6, 0, 0, 5, 4, 0, 0, 3, 2, 0, 0, 1, 0, 0, 0};
-  int vals_be[16] = {15, 14, 0, 0, 13, 12, 0, 0, 11, 10, 0, 0, 9, 8, 0, 0};
+  int vals_be[16] = {0, 0, 8, 9, 0, 0, 10, 11, 0, 0, 12, 13, 0, 0, 14, 15};
 
   int i;
   rtx rvals[16];
@@ -6035,7 +6086,7 @@
                     (match_operand:QI 2 "const_0_to_1_operand" "n")]
         UNSPEC_VCNTMB))]
   "TARGET_POWER10"
-  "vcntmb<VSX_MM_SUFFIX> %0,%1,%2"
+  "vcntmb<wd> %0,%1,%2"
   [(set_attr "type" "vecsimple")])
 
 (define_insn "vec_extract_<mode>"
@@ -6043,7 +6094,7 @@
 	(unspec:SI [(match_operand:VSX_MM 1 "altivec_register_operand" "v")]
 	UNSPEC_VEXTRACT))]
   "TARGET_POWER10"
-  "vextract<VSX_MM_SUFFIX>m %0,%1"
+  "vextract<wd>m %0,%1"
   [(set_attr "type" "vecsimple")])
 
 (define_insn "vec_expand_<mode>"
@@ -6051,5 +6102,5 @@
         (unspec:VSX_MM [(match_operand:VSX_MM 1 "vsx_register_operand" "v")]
         UNSPEC_VEXPAND))]
   "TARGET_POWER10"
-  "vexpand<VSX_MM_SUFFIX>m %0,%1"
+  "vexpand<wd>m %0,%1"
   [(set_attr "type" "vecsimple")])
diff --git a/gcc/config/rs6000/vxworks.h b/gcc/config/rs6000/vxworks.h
index 771dddf..87ca3af 100644
--- a/gcc/config/rs6000/vxworks.h
+++ b/gcc/config/rs6000/vxworks.h
@@ -18,10 +18,21 @@ You should have received a copy of the GNU General Public License
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 
-/* Note to future editors: VxWorks is mostly an EABI target.  We do
-   not use rs6000/eabi.h because we would have to override most of
-   it anyway.  However, if you change that file, consider making
-   analogous changes here too.  */
+/* The port comes in two very different flavors at this stage:
+
+   - For 653 (AE) and regular versions prior to VxWorks 7, the port
+     comes with its own set of definitions, matching a system compiler
+     configured this way as well as the corresponding run-time
+     environment.  This is essentially an eabi system, so changes to
+     eabi.h should usually be reflected here.
+
+   - Starting with VxWorks 7 (post SR600), the system environment
+     was made extremely similar to GNU/Linux and this toolchain is
+     built on top of the corresponding header files.  */
+
+/*-------------------------------------------------------------*/
+/* Common definitions first.                                   */
+/*-------------------------------------------------------------*/
 
 /* CPP predefined macros.  */
 
@@ -29,111 +40,156 @@ along with GCC; see the file COPYING3.  If not see
 #define TARGET_OS_CPP_BUILTINS()		\
   do						\
     {						\
-      builtin_define ("__ppc");			\
-      builtin_define ("__PPC__");		\
-      builtin_define ("__EABI__");		\
       builtin_define ("__ELF__");		\
+      if (!TARGET_VXWORKS7)			\
+	builtin_define ("__EABI__");		\
+						\
+      /* CPU macros, based on what the system compilers do.  */	\
+      if (!TARGET_VXWORKS7)			\
+	{					\
+	  builtin_define ("__ppc");		\
+	  /* Namespace violation below, but the system headers \
+	     really depend heavily on this.  */	\
+	  builtin_define ("CPU_FAMILY=PPC");	\
+						\
+	  /* __PPC__ isn't actually emitted by the system compiler \
+	     prior to vx7 but has been advertised by us for ages.  */	\
+	  builtin_define ("__PPC__");		\
+	}					\
+      else					\
+	{					\
+	  builtin_define ("__PPC__");		\
+	  builtin_define ("__powerpc__");	\
+	  if (TARGET_64BIT)			\
+	    {					\
+	      builtin_define ("__PPC64__");	\
+	      builtin_define ("__powerpc64__");	\
+	    }					\
+	  else					\
+	    {					\
+	      builtin_define ("__PPC");		\
+	      builtin_define ("__powerpc");	\
+	    }					\
+	}					\
+						\
+      /* Asserts for #cpu and #machine.  */	\
+      if (TARGET_64BIT)				\
+	{					\
+	  builtin_assert ("cpu=powerpc64");     \
+	  builtin_assert ("machine=powerpc64"); \
+	}					\
+      else 					\
+	{					\
+	  builtin_assert ("cpu=powerpc");	\
+	  builtin_assert ("machine=powerpc");   \
+	}					\
+						\
+      /* PowerPC VxWorks specificities.  */	\
       if (!TARGET_SOFT_FLOAT)			\
-	builtin_define ("__hardfp");		\
+	{					\
+	  builtin_define ("__hardfp");		\
+	  builtin_define ("_WRS_HARDWARE_FP");  \
+	}                                       \
 						\
-      /* C89 namespace violation! */		\
-      builtin_define ("CPU_FAMILY=PPC");	\
-        					\
+      /* Common VxWorks and port items.  */	\
       VXWORKS_OS_CPP_BUILTINS ();		\
+      TARGET_OS_SYSV_CPP_BUILTINS ();		\
     }		\
   while (0)
 
-/* vx6 library path.  */
-#if !TARGET_VXWORKS7
-#undef  STARTFILE_PREFIX_SPEC
-#define STARTFILE_PREFIX_SPEC						\
- "%{mrtp:%{!shared:%:getenv(WIND_BASE /target/lib/usr/lib/ppc/PPC32/common)}}"
+/* Specific CPU macro definitions expected by the system headers,
+   inferred from -mcpu requests by the user.  Different versions of
+   VxWorks expect different forms of macros, such as
+
+   -D_VX_CPU=_VX_PPC403 on Vx7 and some variants of Vx6,
+   -DCPU=PPC403 on all Vx6 and earlier.  */
+
+#if TARGET_VXWORKS7
+#define VX_CPU_PREFIX "_VX_"
+#else
+#define VX_CPU_PREFIX ""
 #endif
 
-/* Only big endian PPC is supported by VxWorks.  */
-#undef BYTES_BIG_ENDIAN
-#define BYTES_BIG_ENDIAN 1
-#undef WORDS_BIG_ENDIAN
-#define WORDS_BIG_ENDIAN 1
+#define VX_CPUDEF(CPUID) \
+  ":-D" VX_CPU_PREFIX "CPU=" VX_CPU_PREFIX #CPUID
 
-/* We have to kill off the entire specs set created by rs6000/sysv4.h
-   and substitute our own set.  The top level vxworks.h has done some
-   of this for us.  */
+#define VX_MCPU(CPU,CPUID) \
+  "mcpu=" #CPU VX_CPUDEF(CPUID)
 
-#undef SUBTARGET_EXTRA_SPECS
 #undef CPP_SPEC
-#undef CC1_SPEC
-#undef ASM_SPEC
-
-#define SUBTARGET_EXTRA_SPECS /* none needed */
+#define CPP_SPEC			\
+  "%{!D" VX_CPU_PREFIX "CPU=*:%{"	\
+  VX_MCPU(403, PPC403)   ";"		\
+  VX_MCPU(405, PPC405)   ";"		\
+  VX_MCPU(440, PPC440)   ";"		\
+  VX_MCPU(464, PPC464)   ";"		\
+  VX_MCPU(476, PPC476)   ";"		\
+  VX_MCPU(603, PPC603)   ";"		\
+  VX_MCPU(604, PPC604)   ";"		\
+  VX_MCPU(860, PPC860)   ";"		\
+  VX_MCPU(e6500, E6500)  ";"		\
+  VX_MCPU(8540, PPC85XX) ";"		\
+  VX_MCPU(8548, PPC85XX) ";"		\
+  VX_CPUDEF(PPC604)			\
+  "}}"					\
+  VXWORKS_ADDITIONAL_CPP_SPEC
 
 /* FIXME: The only reason we allow no -mcpu switch at all is because
-   config-ml.in insists on a "." multilib. */
-#define CPP_SPEC \
-"%{!DCPU=*:		  \
-   %{mcpu=403 : -DCPU=PPC403  ; \
-     mcpu=405 : -DCPU=PPC405  ; \
-     mcpu=440 : -DCPU=PPC440  ; \
-     mcpu=464 : -DCPU=PPC464  ; \
-     mcpu=476 : -DCPU=PPC476  ; \
-     mcpu=603 : -DCPU=PPC603  ; \
-     mcpu=604 : -DCPU=PPC604  ; \
-     mcpu=860 : -DCPU=PPC860  ; \
-     mcpu=8540: -DCPU=PPC85XX ; \
-     mcpu=8548: -DCPU=PPC85XX ; \
-              : -DCPU=PPC604  }}" \
-VXWORKS_ADDITIONAL_CPP_SPEC
-
-#define CC1_SPEC						\
-"%{G*} %{mno-sdata:-msdata=none} %{msdata:-msdata=default}	\
- %{mlittle|mlittle-endian:-mstrict-align}"
-
-#define ASM_SPEC \
-"%(asm_cpu) \
- %{,assembler|,assembler-with-cpp: %{mregnames} %{mno-regnames}} \
- %{mrelocatable} %{mrelocatable-lib} %{" FPIC_SPEC ":-K PIC} -mbig"
+   config-ml.in insists on a "." multilib.  */
 
 #undef  LIB_SPEC
 #define LIB_SPEC VXWORKS_LIB_SPEC
 
-/* For RTPs, leverage linker relaxation.  This helps programs referring
-   to, typically, kernel services too far away for short calls.  This is more
-   precise than -mlongcall and can be overriden with -Wl,--no-relax.  */
-#define VXWORKS_RELAX_LINK_SPEC "%{mrtp:--relax}"
-
-#undef  LINK_SPEC
-#define LINK_SPEC VXWORKS_LINK_SPEC " " VXWORKS_RELAX_LINK_SPEC
-
 #undef  STARTFILE_SPEC
 #define STARTFILE_SPEC VXWORKS_STARTFILE_SPEC
+
 #undef  ENDFILE_SPEC
 #define ENDFILE_SPEC VXWORKS_ENDFILE_SPEC
 
 /* There is no default multilib.  */
 #undef MULTILIB_DEFAULTS
 
-#undef TARGET_DEFAULT
-#define TARGET_DEFAULT (MASK_EABI | MASK_STRICT_ALIGN)
+/* No _mcount profiling on VxWorks.  */
+#undef FUNCTION_PROFILER
+#define FUNCTION_PROFILER(FILE,LABELNO) VXWORKS_FUNCTION_PROFILER(FILE,LABELNO)
 
-#undef PROCESSOR_DEFAULT
-#define PROCESSOR_DEFAULT PROCESSOR_PPC604
+/* Initialize library function table.  */
+#undef TARGET_INIT_LIBFUNCS
+#define TARGET_INIT_LIBFUNCS rs6000_vxworks_init_libfuncs
 
 /* Nor sdata, for kernel mode.  We use this in
    SUBSUBTARGET_INITIALIZE_OPTIONS, after rs6000_rtp has been initialized.  */
 #undef SDATA_DEFAULT_SIZE
 #define SDATA_DEFAULT_SIZE (TARGET_VXWORKS_RTP ? 8 : 0)
 
-/* Enforce 16-byte alignment for the stack pointer, to permit general
-   compliance with e.g. Altivec instructions requirements.  Make sure
-   this isn't overruled by the EABI constraints.  */
+#undef SUB3TARGET_OVERRIDE_OPTIONS
+#define SUB3TARGET_OVERRIDE_OPTIONS           \
+  do {                                          \
+  if (!global_options_set.x_g_switch_value)     \
+    g_switch_value = SDATA_DEFAULT_SIZE;        \
+  VXWORKS_OVERRIDE_OPTIONS;                     \
+  } while (0)
 
-#undef  STACK_BOUNDARY
-#define STACK_BOUNDARY (16*BITS_PER_UNIT)
+/* The stack pointer need not be moved while checking the stack.  */
+#undef STACK_CHECK_MOVING_SP
 
-#undef  PREFERRED_STACK_BOUNDARY
-#define PREFERRED_STACK_BOUNDARY STACK_BOUNDARY
+/* Define this to be nonzero if static stack checking is supported.  */
+#define STACK_CHECK_STATIC_BUILTIN 1
 
-#undef  ABI_STACK_BOUNDARY
+/* Room needed to allow exception propagation, from what experiments
+   and low level observations taught us ...  */
+#define STACK_CHECK_PROTECT (TARGET_64BIT ? 16 * 1024 : 12 * 1024)
+
+/* Leverage linker relaxation for RTPs.  This helps 32bit programs
+   referring to kernel services too far away for short calls, is more
+   precise than -mlongcall and can be overriden with -Wl,--no-relax.  */
+#define VXWORKS_RELAX_LINK_SPEC "%{mrtp:--relax}"
+
+/*-------------------------------------------------------------*/
+/* Pre-VxWorks7 configuration.                                 */
+/*-------------------------------------------------------------*/
+
+#if !TARGET_VXWORKS7
 
 #undef RS6000_STARTING_FRAME_OFFSET
 #define RS6000_STARTING_FRAME_OFFSET					\
@@ -146,21 +202,79 @@ VXWORKS_ADDITIONAL_CPP_SPEC
    RS6000_ALIGN (crtl->outgoing_args_size.to_constant ()		\
 		 + STACK_POINTER_OFFSET, 16)
 
-#undef SUBSUBTARGET_OVERRIDE_OPTIONS
-#define SUBSUBTARGET_OVERRIDE_OPTIONS		\
-  do {						\
-  if (!global_options_set.x_g_switch_value)	\
-    g_switch_value = SDATA_DEFAULT_SIZE;	\
-  VXWORKS_OVERRIDE_OPTIONS;			\
-  } while (0)
+/* Enforce 16-byte alignment for the stack pointer, to permit general
+   compliance with e.g. Altivec instructions requirements.  Make sure
+   this isn't overruled by the EABI constraints.  */
 
-/* No _mcount profiling on VxWorks.  */
-#undef FUNCTION_PROFILER
-#define FUNCTION_PROFILER(FILE,LABELNO) VXWORKS_FUNCTION_PROFILER(FILE,LABELNO)
+#undef  STACK_BOUNDARY
+#define STACK_BOUNDARY (16*BITS_PER_UNIT)
 
-/* Define this to be nonzero if static stack checking is supported.  */
-#define STACK_CHECK_STATIC_BUILTIN 1
+#undef  PREFERRED_STACK_BOUNDARY
+#define PREFERRED_STACK_BOUNDARY STACK_BOUNDARY
+
+#undef  ABI_STACK_BOUNDARY
+
+#undef  STARTFILE_PREFIX_SPEC
+#define STARTFILE_PREFIX_SPEC						\
+ "%{mrtp:%{!shared:%:getenv(WIND_BASE /target/lib/usr/lib/ppc/PPC32/common)}}"
+
+/* For aggregates passing, use the same, consistent ABI as Linux.  */
+#define AGGREGATE_PADDING_FIXED 0
+#define AGGREGATES_PAD_UPWARD_ALWAYS 0
+
+#undef ASM_SPEC
+#define ASM_SPEC \
+"%(asm_cpu) \
+ %{,assembler|,assembler-with-cpp: %{mregnames} %{mno-regnames}} \
+ %{mrelocatable} %{mrelocatable-lib} %{" FPIC_SPEC ":-K PIC} -mbig"
+
+#undef CC1_SPEC
+#define CC1_SPEC VXWORKS_CC1_SPEC " \
+  %{G*} %{mno-sdata:-msdata=none} %{msdata:-msdata=default}      \
+ %{mlittle|mlittle-endian:-mstrict-align}"
+
+#undef  LINK_SPEC
+#define LINK_SPEC VXWORKS_LINK_SPEC " " VXWORKS_RELAX_LINK_SPEC
+
+#undef TARGET_DEFAULT
+#define TARGET_DEFAULT (MASK_EABI | MASK_STRICT_ALIGN)
+
+#undef PROCESSOR_DEFAULT
+#define PROCESSOR_DEFAULT PROCESSOR_PPC604
+
+/* Only big endian PPC is supported by VxWorks.  */
+#undef BYTES_BIG_ENDIAN
+#define BYTES_BIG_ENDIAN 1
+
+#undef WORDS_BIG_ENDIAN
+#define WORDS_BIG_ENDIAN 1
+
+#undef SUBTARGET_EXTRA_SPECS
+#define SUBTARGET_EXTRA_SPECS /* none needed */
+
+#else /* TARGET_VXWORKS7 */
+
+/*-------------------------------------------------------------*/
+/* Post-VxWorks7 (SR600) configuration.                        */
+/*-------------------------------------------------------------*/
+
+/* VxWorks does not use local symbols for the function entry point.  */
+#undef DOT_SYMBOLS
+#define DOT_SYMBOLS 0
+
+#undef LINK_OS_VXWORKS_SPEC
+#define LINK_OS_VXWORKS_SPEC \
+  " %{!mrtp:-r} %{mrtp:-q -static} %{!Xbind-lazy:-z now}"
+
+#undef LINK_OS_EXTRA_SPEC32
+#define LINK_OS_EXTRA_SPEC32 LINK_OS_VXWORKS_SPEC " " VXWORKS_RELAX_LINK_SPEC
+
+#undef LINK_OS_EXTRA_SPEC64
+#define LINK_OS_EXTRA_SPEC64 LINK_OS_VXWORKS_SPEC
+
+/* linux64.h enables this, not supported in vxWorks.  */
+#undef TARGET_FLOAT128_ENABLE_TYPE
+#define TARGET_FLOAT128_ENABLE_TYPE 0
+
+#endif /* TARGET_VXWORKS7 */
 
-/* This platform supports the probing method of stack checking (RTP mode).
-   8K is reserved in the stack to propagate exceptions in case of overflow.  */
-#define STACK_CHECK_PROTECT 8192
diff --git a/gcc/config/s390/s390-protos.h b/gcc/config/s390/s390-protos.h
index 6f1bc07..029f728 100644
--- a/gcc/config/s390/s390-protos.h
+++ b/gcc/config/s390/s390-protos.h
@@ -121,6 +121,7 @@ extern void s390_expand_vec_compare_cc (rtx, enum rtx_code, rtx, rtx, bool);
 extern enum rtx_code s390_reverse_condition (machine_mode, enum rtx_code);
 extern void s390_expand_vcond (rtx, rtx, rtx, enum rtx_code, rtx, rtx);
 extern void s390_expand_vec_init (rtx, rtx);
+extern rtx s390_build_signbit_mask (machine_mode);
 extern rtx s390_return_addr_rtx (int, rtx);
 extern rtx s390_back_chain_rtx (void);
 extern rtx_insn *s390_emit_call (rtx, rtx, rtx, rtx);
diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
index c762840..f9b27f9 100644
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@@ -2467,6 +2467,9 @@ s390_contiguous_bitmask_vector_p (rtx op, int *start, int *end)
   rtx elt;
   bool b;
 
+  /* Handle floats by bitcasting them to ints.  */
+  op = gen_lowpart (related_int_vector_mode (GET_MODE (op)).require (), op);
+
   gcc_assert (!!start == !!end);
   if (!const_vec_duplicate_p (op, &elt)
       || !CONST_INT_P (elt))
@@ -5952,6 +5955,7 @@ s390_expand_vec_strlen (rtx target, rtx string, rtx alignment)
   rtx temp;
   rtx len = gen_reg_rtx (QImode);
   rtx cond;
+  rtx mem;
 
   s390_load_address (str_addr_base_reg, XEXP (string, 0));
   emit_move_insn (str_idx_reg, const0_rtx);
@@ -5993,10 +5997,10 @@ s390_expand_vec_strlen (rtx target, rtx string, rtx alignment)
   LABEL_NUSES (loop_start_label) = 1;
 
   /* Load 16 bytes of the string into VR.  */
-  emit_move_insn (str_reg,
-		  gen_rtx_MEM (V16QImode,
-			       gen_rtx_PLUS (Pmode, str_idx_reg,
-					     str_addr_base_reg)));
+  mem = gen_rtx_MEM (V16QImode,
+		     gen_rtx_PLUS (Pmode, str_idx_reg, str_addr_base_reg));
+  set_mem_align (mem, 128);
+  emit_move_insn (str_reg, mem);
   if (into_loop_label != NULL_RTX)
     {
       emit_label (into_loop_label);
@@ -6863,15 +6867,16 @@ s390_expand_vec_init (rtx target, rtx vals)
     }
 
   /* Use vector gen mask or vector gen byte mask if possible.  */
-  if (all_same && all_const_int
-      && (XVECEXP (vals, 0, 0) == const0_rtx
-	  || s390_contiguous_bitmask_vector_p (XVECEXP (vals, 0, 0),
-					       NULL, NULL)
-	  || s390_bytemask_vector_p (XVECEXP (vals, 0, 0), NULL)))
+  if (all_same && all_const_int)
     {
-      emit_insn (gen_rtx_SET (target,
-			      gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0))));
-      return;
+      rtx vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
+      if (XVECEXP (vals, 0, 0) == const0_rtx
+	  || s390_contiguous_bitmask_vector_p (vec, NULL, NULL)
+	  || s390_bytemask_vector_p (vec, NULL))
+	{
+	  emit_insn (gen_rtx_SET (target, vec));
+	  return;
+	}
     }
 
   /* Use vector replicate instructions.  vlrep/vrepi/vrep  */
@@ -6949,6 +6954,30 @@ s390_expand_vec_init (rtx target, rtx vals)
     }
 }
 
+/* Emit a vector constant that contains 1s in each element's sign bit position
+   and 0s in other positions.  MODE is the desired constant's mode.  */
+extern rtx
+s390_build_signbit_mask (machine_mode mode)
+{
+  /* Generate the integral element mask value.  */
+  machine_mode inner_mode = GET_MODE_INNER (mode);
+  int inner_bitsize = GET_MODE_BITSIZE (inner_mode);
+  wide_int mask_val = wi::set_bit_in_zero (inner_bitsize - 1, inner_bitsize);
+
+  /* Emit the element mask rtx.  Use gen_lowpart in order to cast the integral
+     value to the desired mode.  */
+  machine_mode int_mode = related_int_vector_mode (mode).require ();
+  rtx mask = immed_wide_int_const (mask_val, GET_MODE_INNER (int_mode));
+  mask = gen_lowpart (inner_mode, mask);
+
+  /* Emit the vector mask rtx by mode the element mask rtx.  */
+  int nunits = GET_MODE_NUNITS (mode);
+  rtvec v = rtvec_alloc (nunits);
+  for (int i = 0; i < nunits; i++)
+    RTVEC_ELT (v, i) = mask;
+  return gen_rtx_CONST_VECTOR (mode, v);
+}
+
 /* Structure to hold the initial parameters for a compare_and_swap operation
    in HImode and QImode.  */
 
@@ -16082,12 +16111,13 @@ s390_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
 
      fenv_var = __builtin_s390_efpc ();
      __builtin_s390_sfpc (fenv_var & mask) */
-  tree old_fpc = build2 (MODIFY_EXPR, unsigned_type_node, fenv_var, call_efpc);
-  tree new_fpc =
-    build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var,
-	    build_int_cst (unsigned_type_node,
-			   ~(FPC_DXC_MASK | FPC_FLAGS_MASK |
-			     FPC_EXCEPTION_MASK)));
+  tree old_fpc = build4 (TARGET_EXPR, unsigned_type_node, fenv_var, call_efpc,
+			 NULL_TREE, NULL_TREE);
+  tree new_fpc
+    = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var,
+	      build_int_cst (unsigned_type_node,
+			     ~(FPC_DXC_MASK | FPC_FLAGS_MASK
+			       | FPC_EXCEPTION_MASK)));
   tree set_new_fpc = build_call_expr (sfpc, 1, new_fpc);
   *hold = build2 (COMPOUND_EXPR, void_type_node, old_fpc, set_new_fpc);
 
@@ -16106,8 +16136,8 @@ s390_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
   __atomic_feraiseexcept ((old_fpc & FPC_FLAGS_MASK) >> FPC_FLAGS_SHIFT);  */
 
   old_fpc = create_tmp_var_raw (unsigned_type_node);
-  tree store_old_fpc = build2 (MODIFY_EXPR, void_type_node,
-			       old_fpc, call_efpc);
+  tree store_old_fpc = build4 (TARGET_EXPR, void_type_node, old_fpc, call_efpc,
+			       NULL_TREE, NULL_TREE);
 
   set_new_fpc = build_call_expr (sfpc, 1, fenv_var);
 
diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
index 4c3e540..18edea1 100644
--- a/gcc/config/s390/s390.md
+++ b/gcc/config/s390/s390.md
@@ -1391,23 +1391,55 @@
 ; (TF|DF|SF|TD|DD|SD) instructions
 
 
-; FIXME: load and test instructions turn SNaN into QNaN what is not
-; acceptable if the target will be used afterwards.  On the other hand
-; they are quite convenient for implementing comparisons with 0.0. So
-; try to enable them via splitter/peephole if the value isn't needed anymore.
-; See testcases: load-and-test-fp-1.c and load-and-test-fp-2.c
+; load and test instructions turn a signaling NaN into a quiet NaN.  Thus they
+; may only be used if the target register is dead afterwards or if fast math
+; is enabled.  The former is done via a peephole optimization.  Note, load and
+; test instructions may only be used for (in)equality comparisons because
+; relational comparisons must treat a quiet NaN like a signaling NaN which is
+; not the case for load and test instructions.  For fast math insn
+; "cmp<mode>_ccs_0_fastmath" applies.
+; See testcases load-and-test-fp-{1,2}.c
+
+(define_peephole2
+  [(set (match_operand:FP 0 "register_operand")
+	(match_operand:FP 1 "const0_operand"))
+   (set (reg:CCZ CC_REGNUM)
+	(compare:CCZ (match_operand:FP 2 "register_operand")
+		     (match_operand:FP 3 "register_operand")))]
+  "TARGET_HARD_FLOAT
+   && FP_REG_P (operands[2])
+   && REGNO (operands[0]) == REGNO (operands[3])
+   && peep2_reg_dead_p (2, operands[0])
+   && peep2_reg_dead_p (2, operands[2])"
+  [(parallel
+    [(set (reg:CCZ CC_REGNUM)
+	  (compare:CCZ (match_dup 2) (match_dup 1)))
+     (clobber (match_dup 2))])]
+  "")
 
 ; ltxbr, ltdbr, ltebr, ltxtr, ltdtr
-(define_insn "*cmp<mode>_ccs_0"
-  [(set (reg CC_REGNUM)
-	(compare (match_operand:FP 0 "register_operand"  "f")
-		 (match_operand:FP 1 "const0_operand"    "")))
-   (clobber (match_operand:FP      2 "register_operand" "=0"))]
-  "s390_match_ccmode(insn, CCSmode) && TARGET_HARD_FLOAT"
+(define_insn "*cmp<mode>_ccz_0"
+  [(set (reg:CCZ CC_REGNUM)
+	(compare:CCZ (match_operand:FP 0 "register_operand" "f")
+		     (match_operand:FP 1 "const0_operand")))
+   (clobber (match_operand:FP 2 "register_operand" "=0"))]
+  "TARGET_HARD_FLOAT"
   "lt<xde><bt>r\t%0,%0"
    [(set_attr "op_type" "RRE")
     (set_attr "type"  "fsimp<mode>")])
 
+(define_insn "*cmp<mode>_ccs_0_fastmath"
+  [(set (reg CC_REGNUM)
+	(compare (match_operand:FP 0 "register_operand" "f")
+		 (match_operand:FP 1 "const0_operand")))]
+  "s390_match_ccmode (insn, CCSmode)
+   && TARGET_HARD_FLOAT
+   && !flag_trapping_math
+   && !flag_signaling_nans"
+  "lt<xde><bt>r\t%0,%0"
+  [(set_attr "op_type" "RRE")
+   (set_attr "type" "fsimp<mode>")])
+
 ; VX: TFmode in FPR pairs: use cxbr instead of wfcxb
 ; cxtr, cdtr, cxbr, cdbr, cebr, cdb, ceb, wfcsb, wfcdb
 (define_insn "*cmp<mode>_ccs"
diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
index 2573b7d..3c01cd1 100644
--- a/gcc/config/s390/vector.md
+++ b/gcc/config/s390/vector.md
@@ -1425,35 +1425,45 @@
 
 ; Vector copysign, implement using vector select
 (define_expand "copysign<mode>3"
-  [(set (match_operand:VFT 0 "register_operand" "")
-	(if_then_else:VFT
-	 (eq (match_dup 3)
-	     (match_dup 4))
-	 (match_operand:VFT 1 "register_operand"  "")
-	 (match_operand:VFT 2 "register_operand"  "")))]
+  [(set (match_operand:VFT            0 "register_operand" "")
+	(ior:VFT
+	 (and:VFT (match_operand:VFT  2 "register_operand" "")
+		  (match_dup 3))
+	 (and:VFT (not:VFT (match_dup 3))
+		  (match_operand:VFT  1 "register_operand" ""))))]
   "TARGET_VX"
 {
-  int sz = GET_MODE_BITSIZE (GET_MODE_INNER (<MODE>mode));
-  int prec = GET_MODE_PRECISION (GET_MODE_INNER (<tointvec>mode));
-  wide_int mask_val = wi::shwi (1l << (sz - 1), prec);
-
-  rtx mask = gen_reg_rtx (<tointvec>mode);
-
-  int nunits = GET_MODE_NUNITS (<tointvec>mode);
-  rtvec v = rtvec_alloc (nunits);
-  for (int i = 0; i < nunits; i++)
-    RTVEC_ELT (v, i) = GEN_INT (mask_val.to_shwi ());
-
-  mask = gen_rtx_CONST_VECTOR (<tointvec>mode, v);
-  operands[3] = force_reg (<tointvec>mode, mask);
-  operands[4] = CONST0_RTX (<tointvec>mode);
+  rtx mask = s390_build_signbit_mask (<MODE>mode);
+  operands[3] = force_reg (<MODE>mode, mask);
 })
 
 ;;
 ;; Integer compares
 ;;
 
-(define_insn "*vec_cmp<VICMP_HW_OP:code><VI:mode>_nocc"
+(define_expand "vec_cmp<VI_HW:mode><VI_HW:mode>"
+  [(set (match_operand:VI_HW    0 "register_operand" "")
+	(match_operator:VI_HW   1 ""
+	  [(match_operand:VI_HW 2 "register_operand" "")
+	   (match_operand:VI_HW 3 "register_operand" "")]))]
+  "TARGET_VX"
+{
+  s390_expand_vec_compare (operands[0], GET_CODE(operands[1]), operands[2], operands[3]);
+  DONE;
+})
+
+(define_expand "vec_cmpu<VI_HW:mode><VI_HW:mode>"
+  [(set (match_operand:VI_HW    0 "register_operand" "")
+	(match_operator:VI_HW   1 ""
+	  [(match_operand:VI_HW 2 "register_operand" "")
+	   (match_operand:VI_HW 3 "register_operand" "")]))]
+  "TARGET_VX"
+{
+  s390_expand_vec_compare (operands[0], GET_CODE(operands[1]), operands[2], operands[3]);
+  DONE;
+})
+
+(define_insn "*vec_cmp<VICMP_HW_OP:code><VI:mode><VI:mode>_nocc"
   [(set (match_operand:VI                 2 "register_operand" "=v")
 	(VICMP_HW_OP:VI (match_operand:VI 0 "register_operand"  "v")
 			(match_operand:VI 1 "register_operand"  "v")))]
diff --git a/gcc/config/t-vxworks b/gcc/config/t-vxworks
index fd1fbfd..221f53c 100644
--- a/gcc/config/t-vxworks
+++ b/gcc/config/t-vxworks
@@ -59,7 +59,7 @@ stmp-int-hdrs: subst-glimits.h
 subst-%.h:
 	cp -p $(srcdir)/$*.h orig-$*.h
 	ID=$$(echo $(BASEVER_c) | sed -e 's/\./_/g'); \
-	sed -e "s/_LIMITS_H__/_LIMITS_H_$${ID}_/" < $(srcdir)/$*.h > $@
+	sed -e "s/_LIMITS_H__/_LIMITS_H__$${ID}_/" < $(srcdir)/$*.h > $@
 	cp $@ $(srcdir)/$*.h
 
 # Then arrange to restore the original versions after the standard
diff --git a/gcc/config/vx-common.h b/gcc/config/vx-common.h
index f4a1ffd..9cd7b3d 100644
--- a/gcc/config/vx-common.h
+++ b/gcc/config/vx-common.h
@@ -23,8 +23,6 @@ along with GCC; see the file COPYING3.  If not see
 /* Most of these will probably be overridden by subsequent headers.  We
    undefine them here just in case, and define VXWORKS_ versions of each,
    to be used in port-specific vxworks.h.  */
-#undef LIB_SPEC
-#undef LINK_SPEC
 #undef LIBGCC_SPEC
 #define LIBGCC_SPEC VXWORKS_LIBGCC_SPEC
 #undef STARTFILE_SPEC
diff --git a/gcc/config/vxworks.c b/gcc/config/vxworks.c
index 970d504..ca0f5de 100644
--- a/gcc/config/vxworks.c
+++ b/gcc/config/vxworks.c
@@ -154,8 +154,10 @@ vxworks_override_options (void)
   targetm.have_ctors_dtors = 
     TARGET_VXWORKS_HAVE_CTORS_DTORS || HAVE_INITFINI_ARRAY_SUPPORT;
 
-  /* PIC is only supported for RTPs.  */
-  if (flag_pic && !TARGET_VXWORKS_RTP)
+  /* PIC is only supported for RTPs.  flags_pic might be < 0 here, in
+     contexts where the corresponding switches are not processed,
+     e.g. from --help.  We are not generating code in such cases.  */
+  if (flag_pic > 0 && !TARGET_VXWORKS_RTP)
     error ("PIC is only supported for RTPs");
 
   /* VxWorks comes with non-gdb debuggers which only support strict
diff --git a/gcc/config/vxworks.h b/gcc/config/vxworks.h
index e50260b0..b7e5970 100644
--- a/gcc/config/vxworks.h
+++ b/gcc/config/vxworks.h
@@ -70,6 +70,12 @@ along with GCC; see the file COPYING3.  If not see
 
 #endif
 
+/* Our ports rely on gnu-user.h, which #defines _POSIX_SOURCE for
+   C++ by default.  VxWorks doesn't provide 100% of what this implies
+   (e.g. ::mkstemp), so, arrange to prevent that by falling back to
+   the default CPP spec for C++ as well.  */
+#undef CPLUSPLUS_CPP_SPEC
+
 /* For VxWorks static rtps, the system provides libc_internal.a, a superset of
    libgcc.a that we need to use e.g. to satisfy references to __init and
    __fini.  We still want our libgcc to prevail for symbols it would provide
@@ -84,7 +90,7 @@ along with GCC; see the file COPYING3.  If not see
 #define VXWORKS_SYSCALL_LIBS_RTP
 
 #if TARGET_VXWORKS7
-#define VXWORKS_NET_LIBS_RTP "-lnet"
+#define VXWORKS_NET_LIBS_RTP "-l%:if-exists-then-else(%:getenv(VSB_DIR /usr/h/public/rtnetStackLib.h) rtnet net)"
 #else
 #define VXWORKS_NET_LIBS_RTP "-lnet -ldsi"
 #endif
@@ -152,8 +158,7 @@ along with GCC; see the file COPYING3.  If not see
 /* Setup the crtstuff begin/end we might need for dwarf EH registration.  */
 
 #if !defined(CONFIG_SJLJ_EXCEPTIONS) && DWARF2_UNWIND_INFO
-#define VX_CRTBEGIN_SPEC \
- "%{!mrtp:vx_crtbegin-kernel.o%s} %{mrtp:vx_crtbegin-rtp.o%s}"
+#define VX_CRTBEGIN_SPEC "vx_crtbegin.o%s"
 #define VX_CRTEND_SPEC "-l:vx_crtend.o"
 #else
 #define VX_CRTBEGIN_SPEC ""
diff --git a/gcc/config/vxworks/_vxworks-versions.h b/gcc/config/vxworks/_vxworks-versions.h
index 0aaf547..15e8bfe 100644
--- a/gcc/config/vxworks/_vxworks-versions.h
+++ b/gcc/config/vxworks/_vxworks-versions.h
@@ -22,17 +22,29 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 #ifndef _VXWORKS_VERSIONS_H
 #define _VXWORKS_VERSIONS_H  1
 
-/* All we need is access to the bare _WRS_VXWORKS_MAJOR/MINOR macros
-   exposed by version.h.  Cheat a bit to make sure we don't drag additional
-   header files, which can easily cause #include ordering nightmares.  */
+/* All we need is access to the bare _WRS_VXWORKS_MAJOR/MINOR macros,
+   exposed by version.h or already provided somehow (e.g. with a self
+   spec for some reason).  When resorting to system headers, cheat a
+   bit to make sure we don't drag additional header files, which can
+   easily cause #include ordering nightmares.  */
 
+#if !defined(_WRS_VXWORKS_MAJOR)
 #pragma push_macro("_WRS_KERNEL")
 #undef _WRS_KERNEL
 #include <version.h>
 #pragma pop_macro("_WRS_KERNEL")
+#endif
+
+/* A lot depends on the MAJOR so we really need to make sure we have
+   that.  MINOR is less critical and many environments don't actually
+   define it unless it is really meaningful (e.g. 6.4 through 6.9).  */
 
 #if !defined(_WRS_VXWORKS_MAJOR)
-#error "VxWorks version macros needed but not defined"
+#error "_WRS_VXWORKS_MAJOR undefined"
+#endif
+
+#if !defined(_WRS_VXWORKS_MINOR)
+#define _WRS_VXWORKS_MINOR 0
 #endif
 
 #define _VXWORKS_MAJOR_GT(MAJOR) (_WRS_VXWORKS_MAJOR > (MAJOR))
author	Thomas Koenig <tkoenig@gcc.gnu.org>	2020-10-28 18:41:24 +0100
committer	Thomas Koenig <tkoenig@gcc.gnu.org>	2020-10-28 18:41:24 +0100
commit	bf6dad60c338a42a7fb85f7b2a5870c0fb2e20f8 (patch)
tree	e513781ef717465e7db0358e987a5a6cbef5665c /gcc/config
parent	0c261d5b5c931d9e9214d06531bdc7e9e16aeaab (diff)
parent	47d13acbda9a5d8eb57ff169ba74857cd54108e4 (diff)
download	gcc-bf6dad60c338a42a7fb85f7b2a5870c0fb2e20f8.zip gcc-bf6dad60c338a42a7fb85f7b2a5870c0fb2e20f8.tar.gz gcc-bf6dad60c338a42a7fb85f7b2a5870c0fb2e20f8.tar.bz2