54 files changed, 1272 insertions, 134 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 264bbd2..5b54c5a 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,74 @@
+2025-04-21  Jan Hubicka  <hubicka@ucw.cz>
+
+	PR target/119879
+	* config/i386/i386.cc (fp_conversion_stmt_cost): Inline to ...
+	(ix86_vector_costs::add_stmt_cost): ... here; fix handling of NOP_EXPR.
+
+2025-04-21  Matthew Fortune  <matthew.fortune@imgtec.com>
+
+	* config/mips/mips.cc (mips_option_override): Error out for
+	-mmicromips -mmsa.
+
+2025-04-21  Andrew Pinski  <quic_apinski@quicinc.com>
+
+	PR middle-end/119507
+	* except.cc (switch_to_exception_section): Don't use the cached section if
+	the current function is in comdat.
+
+2025-04-21  Andrew Pinski  <quic_apinski@quicinc.com>
+
+	* vec.h (array_slice::begin): Assert that the
+	slice is valid.
+	(array_slice::end): Likewise.
+
+2025-04-21  hongtao.liu  <hongtao.liu@intel.com>
+
+	* config/i386/i386-expand.cc (ix86_emit_swdivsf): Generate 2
+	FMA instructions when TARGET_FMA.
+
+2025-04-19  Jeff Law  <jlaw@ventanamicro.com>
+
+	PR target/119865
+	* config/riscv/riscv.cc (parse_features_for_version): Do not
+	explicitly free the architecture string.
+
+2025-04-19  Jeff Law  <jlaw@ventanamicro.com>
+
+	PR target/118410
+	* config/riscv/bitmanip.md (logical with constant argument): New
+	splitter for cases where synthesizing ~C is cheaper than synthesizing
+	the original constant C.
+
+2025-04-19  Jan Hubicka  <hubicka@ucw.cz>
+
+	* config/i386/i386.cc (vec_fp_conversion_cost): New function.
+	(ix86_rtx_costs): Use it for SSE/AVX FP conversoins.
+	(ix86_builtin_vectorization_cost): Fix indentation;
+	and use vec_fp_conversion_cost in vec_promote_demote.
+	(fp_conversion_stmt_cost): New function.
+	(ix86_vector_costs::add_stmt_cost): Use it to cost NOP_EXPR
+	and vec_promote_demote.
+	* config/i386/i386.h (struct processor_costs):
+	* config/i386/x86-tune-costs.h (struct processor_costs):
+
+2025-04-19  Andrew Pinski  <quic_apinski@quicinc.com>
+
+	PR rtl-optimization/111949
+	* combine.cc (find_split_point): Add a split point
+	for `(and (not X) Y)` if not in the outer set already.
+
+2025-04-19  Jiaxun Yang  <jiaxun.yang@flygoat.com>
+
+	PR target/111814
+	* config/sh/sh-modes.def (RESET_FLOAT_FORMAT): Use mips format.
+	(FLOAT_MODE): Use mips mode.
+
+2025-04-19  Maciej W. Rozycki  <macro@orcam.me.uk>
+
+	* config/alpha/alpha.cc
+	(alpha_get_mem_rtx_alignment_and_offset): Recurse into
+	COMPONENT_REF nodes.
+
 2025-04-18  Jeff Law  <jlaw@ventanamicro.com>
 
 	* config/riscv/bitmanip.md (*bext<mode>_mask_pos): New pattern
diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP
index f0d1b43..fa0255d 100644
--- a/gcc/DATESTAMP
+++ b/gcc/DATESTAMP
@@ -1 +1 @@
-20250419
+20250422
diff --git a/gcc/combine.cc b/gcc/combine.cc
index e118608..873c2bd 100644
--- a/gcc/combine.cc
+++ b/gcc/combine.cc
@@ -5280,6 +5280,12 @@ find_split_point (rtx *loc, rtx_insn *insn, bool set_src)
 	  SUBST (XEXP (x, 0), XEXP (x, 1));
 	  SUBST (XEXP (x, 1), tem);
 	}
+      /* Many targets have a `(and (not X) Y)` and/or `(ior (not X) Y)` instructions.
+	 Split at that insns.  However if this is
+	 the SET_SRC, we likely do not have such an instruction and it's
+	 worthless to try this split.  */
+      if (!set_src && GET_CODE (XEXP (x, 0)) == NOT)
+	return loc;
       break;
 
     case PLUS:
diff --git a/gcc/config/alpha/alpha.cc b/gcc/config/alpha/alpha.cc
index ba470d9..14e7da5 100644
--- a/gcc/config/alpha/alpha.cc
+++ b/gcc/config/alpha/alpha.cc
@@ -4291,14 +4291,10 @@ alpha_get_mem_rtx_alignment_and_offset (rtx expr, int &a, HOST_WIDE_INT &o)
 
   tree mem = MEM_EXPR (expr);
   if (mem != NULL_TREE)
-    switch (TREE_CODE (mem))
-      {
-      case MEM_REF:
-	tree_offset = mem_ref_offset (mem).force_shwi ();
-	tree_align = get_object_alignment (get_base_address (mem));
-	break;
+    {
+      HOST_WIDE_INT comp_offset = 0;
 
-      case COMPONENT_REF:
+      for (; TREE_CODE (mem) == COMPONENT_REF; mem = TREE_OPERAND (mem, 0))
 	{
 	  tree byte_offset = component_ref_field_offset (mem);
 	  tree bit_offset = DECL_FIELD_BIT_OFFSET (TREE_OPERAND (mem, 1));
@@ -4307,14 +4303,15 @@ alpha_get_mem_rtx_alignment_and_offset (rtx expr, int &a, HOST_WIDE_INT &o)
 	      || !poly_int_tree_p (byte_offset, &offset)
 	      || !tree_fits_shwi_p (bit_offset))
 	    break;
-	  tree_offset = offset + tree_to_shwi (bit_offset) / BITS_PER_UNIT;
+	  comp_offset += offset + tree_to_shwi (bit_offset) / BITS_PER_UNIT;
 	}
-	tree_align = get_object_alignment (get_base_address (mem));
-	break;
 
-      default:
-	break;
-      }
+      if (TREE_CODE (mem) == MEM_REF)
+	{
+	  tree_offset = comp_offset + mem_ref_offset (mem).force_shwi ();
+	  tree_align = get_object_alignment (get_base_address (mem));
+	}
+    }
 
   if (reg_align > mem_align)
     {
diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index cdfd94d..36f71eb 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -19256,8 +19256,6 @@ ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode)
   e1 = gen_reg_rtx (mode);
   x1 = gen_reg_rtx (mode);
 
-  /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
-
   b = force_reg (mode, b);
 
   /* x0 = rcp(b) estimate */
@@ -19270,20 +19268,42 @@ ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode)
     emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
 						UNSPEC_RCP)));
 
-  /* e0 = x0 * b */
-  emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, b)));
+  unsigned vector_size = GET_MODE_SIZE (mode);
+
+  /* (a - (rcp(b) * a * b)) * rcp(b) + rcp(b) * a
+     N-R step with 2 fma implementation.  */
+  if (TARGET_FMA
+      || (TARGET_AVX512F && vector_size == 64)
+      || (TARGET_AVX512VL && (vector_size == 32 || vector_size == 16)))
+    {
+      /* e0 = x0 * a  */
+      emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, a)));
+      /* e1 = e0 * b - a  */
+      emit_insn (gen_rtx_SET (e1, gen_rtx_FMA (mode, e0, b,
+					       gen_rtx_NEG (mode, a))));
+      /* res = - e1 * x0 + e0  */
+      emit_insn (gen_rtx_SET (res, gen_rtx_FMA (mode,
+					       gen_rtx_NEG (mode, e1),
+					       x0, e0)));
+    }
+  else
+    /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
+    {
+      /* e0 = x0 * b */
+      emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, b)));
 
-  /* e0 = x0 * e0 */
-  emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, e0)));
+      /* e1 = x0 + x0 */
+      emit_insn (gen_rtx_SET (e1, gen_rtx_PLUS (mode, x0, x0)));
 
-  /* e1 = x0 + x0 */
-  emit_insn (gen_rtx_SET (e1, gen_rtx_PLUS (mode, x0, x0)));
+      /* e0 = x0 * e0 */
+      emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, e0)));
 
-  /* x1 = e1 - e0 */
-  emit_insn (gen_rtx_SET (x1, gen_rtx_MINUS (mode, e1, e0)));
+      /* x1 = e1 - e0 */
+      emit_insn (gen_rtx_SET (x1, gen_rtx_MINUS (mode, e1, e0)));
 
-  /* res = a * x1 */
-  emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, a, x1)));
+      /* res = a * x1 */
+      emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, a, x1)));
+    }
 }
 
 /* Output code to perform a Newton-Rhapson approximation of a
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 38df84f..d15f91d 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -100,6 +100,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "i386-features.h"
 #include "function-abi.h"
 #include "rtl-error.h"
+#include "gimple-pretty-print.h"
 
 /* This file should be included last.  */
 #include "target-def.h"
@@ -21816,6 +21817,25 @@ ix86_insn_cost (rtx_insn *insn, bool speed)
   return insn_cost + pattern_cost (PATTERN (insn), speed);
 }
 
+/* Return cost of SSE/AVX FP->FP conversion (extensions and truncates).  */
+
+static int
+vec_fp_conversion_cost (const struct processor_costs *cost, int size)
+{
+  if (size < 128)
+    return cost->cvtss2sd;
+  else if (size < 256)
+    {
+      if (TARGET_SSE_SPLIT_REGS)
+	return cost->cvtss2sd * size / 64;
+      return cost->cvtss2sd;
+    }
+  if (size < 512)
+    return cost->vcvtps2pd256;
+  else
+    return cost->vcvtps2pd512;
+}
+
 /* Compute a (partial) cost for rtx X.  Return true if the complete
    cost has been computed, and false if subexpressions should be
    scanned.  In either case, *TOTAL contains the cost result.  */
@@ -22479,17 +22499,18 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
       return false;
 
     case FLOAT_EXTEND:
+      /* x87 represents all values extended to 80bit.  */
       if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
 	*total = 0;
       else
-        *total = ix86_vec_cost (mode, cost->addss);
+	*total = vec_fp_conversion_cost (cost, GET_MODE_BITSIZE (mode));
       return false;
 
     case FLOAT_TRUNCATE:
       if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
 	*total = cost->fadd;
       else
-        *total = ix86_vec_cost (mode, cost->addss);
+	*total = vec_fp_conversion_cost (cost, GET_MODE_BITSIZE (mode));
       return false;
 
     case ABS:
@@ -24683,7 +24704,7 @@ ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
   switch (type_of_cost)
     {
       case scalar_stmt:
-        return fp ? ix86_cost->addss : COSTS_N_INSNS (1);
+	return fp ? ix86_cost->addss : COSTS_N_INSNS (1);
 
       case scalar_load:
 	/* load/store costs are relative to register move which is 2. Recompute
@@ -24754,7 +24775,11 @@ ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
         return ix86_cost->cond_not_taken_branch_cost;
 
       case vec_perm:
+	return ix86_vec_cost (mode, ix86_cost->sse_op);
+
       case vec_promote_demote:
+	if (fp)
+	  return vec_fp_conversion_cost (ix86_tune_cost, mode);
         return ix86_vec_cost (mode, ix86_cost->sse_op);
 
       case vec_construct:
@@ -25342,6 +25367,9 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
 	        (TREE_TYPE (gimple_assign_lhs (stmt_info->stmt)),
 		 TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt))))
 	    stmt_cost = 0;
+	  else if (fp)
+	    stmt_cost = vec_fp_conversion_cost
+			  (ix86_tune_cost, GET_MODE_BITSIZE (mode));
 	  break;
 
 	case BIT_IOR_EXPR:
@@ -25383,6 +25411,29 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
 	break;
       }
 
+  if (kind == vec_promote_demote
+      && fp && FLOAT_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt))))
+    {
+      int outer_size
+	= tree_to_uhwi
+	    (TYPE_SIZE
+		(TREE_TYPE (gimple_assign_lhs (stmt_info->stmt))));
+      int inner_size
+	= tree_to_uhwi
+	    (TYPE_SIZE
+		(TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt))));
+      int stmt_cost = vec_fp_conversion_cost
+			(ix86_tune_cost, GET_MODE_BITSIZE (mode));
+      /* VEC_PACK_TRUNC_EXPR: If inner size is greater than outer size we will end
+	 up doing two conversions and packing them.  */
+      if (inner_size > outer_size)
+	{
+	  int n = inner_size / outer_size;
+	  stmt_cost = stmt_cost * n
+		      + (n - 1) * ix86_vec_cost (mode, ix86_cost->sse_op);
+	}
+    }
+
   /* If we do elementwise loads into a vector then we are bound by
      latency and execution resources for the many scalar loads
      (AGU and load ports).  Try to account for this by scaling the
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 8507243..18aa42d 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -207,6 +207,12 @@ struct processor_costs {
   const int divsd;		/* cost of DIVSD instructions.  */
   const int sqrtss;		/* cost of SQRTSS instructions.  */
   const int sqrtsd;		/* cost of SQRTSD instructions.  */
+  const int cvtss2sd;		/* cost SSE FP conversions,
+				   such as CVTSS2SD.  */
+  const int vcvtps2pd256;	/* cost 256bit packed FP conversions,
+				   such as VCVTPD2PS with larger reg in ymm.  */
+  const int vcvtps2pd512;	/* cost 512bit packed FP conversions,
+				   such as VCVTPD2PS with larger reg in zmm.  */
   const int reassoc_int, reassoc_fp, reassoc_vec_int, reassoc_vec_fp;
 				/* Specify reassociation width for integer,
 				   fp, vector integer and vector fp
diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h
index 9477345..cddcf61 100644
--- a/gcc/config/i386/x86-tune-costs.h
+++ b/gcc/config/i386/x86-tune-costs.h
@@ -121,16 +121,19 @@ struct processor_costs ix86_size_cost = {/* costs for tuning for size */
   COSTS_N_BYTES (2),			/* cost of FCHS instruction.  */
   COSTS_N_BYTES (2),			/* cost of FSQRT instruction.  */
 
-  COSTS_N_BYTES (2),			/* cost of cheap SSE instruction.  */
-  COSTS_N_BYTES (2),			/* cost of ADDSS/SD SUBSS/SD insns.  */
-  COSTS_N_BYTES (2),			/* cost of MULSS instruction.  */
-  COSTS_N_BYTES (2),			/* cost of MULSD instruction.  */
-  COSTS_N_BYTES (2),			/* cost of FMA SS instruction.  */
-  COSTS_N_BYTES (2),			/* cost of FMA SD instruction.  */
-  COSTS_N_BYTES (2),			/* cost of DIVSS instruction.  */
-  COSTS_N_BYTES (2),			/* cost of DIVSD instruction.  */
-  COSTS_N_BYTES (2),			/* cost of SQRTSS instruction.  */
-  COSTS_N_BYTES (2),			/* cost of SQRTSD instruction.  */
+  COSTS_N_BYTES (4),			/* cost of cheap SSE instruction.  */
+  COSTS_N_BYTES (4),			/* cost of ADDSS/SD SUBSS/SD insns.  */
+  COSTS_N_BYTES (4),			/* cost of MULSS instruction.  */
+  COSTS_N_BYTES (4),			/* cost of MULSD instruction.  */
+  COSTS_N_BYTES (4),			/* cost of FMA SS instruction.  */
+  COSTS_N_BYTES (4),			/* cost of FMA SD instruction.  */
+  COSTS_N_BYTES (4),			/* cost of DIVSS instruction.  */
+  COSTS_N_BYTES (4),			/* cost of DIVSD instruction.  */
+  COSTS_N_BYTES (4),			/* cost of SQRTSS instruction.  */
+  COSTS_N_BYTES (4),			/* cost of SQRTSD instruction.  */
+  COSTS_N_BYTES (4),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_BYTES (4),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_BYTES (6),			/* cost of 512bit VCVTPS2PD etc.  */
   1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
   ix86_size_memcpy,
   ix86_size_memset,
@@ -243,6 +246,9 @@ struct processor_costs i386_cost = {	/* 386 specific costs */
   COSTS_N_INSNS (88),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (122),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (122),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (27),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (54),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (108),			/* cost of 512bit VCVTPS2PD etc.  */
   1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
   i386_memcpy,
   i386_memset,
@@ -356,6 +362,9 @@ struct processor_costs i486_cost = {	/* 486 specific costs */
   COSTS_N_INSNS (74),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (83),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (83),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (8),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (16),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (32),			/* cost of 512bit VCVTPS2PD etc.  */
   1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
   i486_memcpy,
   i486_memset,
@@ -467,6 +476,9 @@ struct processor_costs pentium_cost = {
   COSTS_N_INSNS (39),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (70),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (70),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (3),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (6),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (12),			/* cost of 512bit VCVTPS2PD etc.  */
   1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
   pentium_memcpy,
   pentium_memset,
@@ -571,6 +583,9 @@ struct processor_costs lakemont_cost = {
   COSTS_N_INSNS (60),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (31),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (63),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (5),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (10),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (20),			/* cost of 512bit VCVTPS2PD etc.  */
   1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
   pentium_memcpy,
   pentium_memset,
@@ -690,6 +705,9 @@ struct processor_costs pentiumpro_cost = {
   COSTS_N_INSNS (18),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (31),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (31),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (3),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (6),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (12),			/* cost of 512bit VCVTPS2PD etc.  */
   1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
   pentiumpro_memcpy,
   pentiumpro_memset,
@@ -800,6 +818,9 @@ struct processor_costs geode_cost = {
   COSTS_N_INSNS (47),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (54),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (54),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (6),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (12),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (24),			/* cost of 512bit VCVTPS2PD etc.  */
   1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
   geode_memcpy,
   geode_memset,
@@ -913,6 +934,9 @@ struct processor_costs k6_cost = {
   COSTS_N_INSNS (56),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (56),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (56),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (2),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (4),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (8),			/* cost of 512bit VCVTPS2PD etc.  */
   1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
   k6_memcpy,
   k6_memset,
@@ -1027,6 +1051,9 @@ struct processor_costs athlon_cost = {
   COSTS_N_INSNS (24),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (19),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (19),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (4),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (8),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (16),			/* cost of 512bit VCVTPS2PD etc.  */
   1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
   athlon_memcpy,
   athlon_memset,
@@ -1150,6 +1177,9 @@ struct processor_costs k8_cost = {
   COSTS_N_INSNS (20),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (19),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (27),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (4),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (8),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (16),			/* cost of 512bit VCVTPS2PD etc.  */
   1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
   k8_memcpy,
   k8_memset,
@@ -1281,6 +1311,9 @@ struct processor_costs amdfam10_cost = {
   COSTS_N_INSNS (20),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (19),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (27),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (4),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (8),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (16),			/* cost of 512bit VCVTPS2PD etc.  */
   1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
   amdfam10_memcpy,
   amdfam10_memset,
@@ -1405,6 +1438,9 @@ const struct processor_costs bdver_cost = {
   COSTS_N_INSNS (27),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (15),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (26),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (4),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (7),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (14),			/* cost of 512bit VCVTPS2PD etc.  */
   1, 2, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
   bdver_memcpy,
   bdver_memset,
@@ -1553,6 +1589,10 @@ struct processor_costs znver1_cost = {
   COSTS_N_INSNS (13),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (10),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (15),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (3),			/* cost of CVTSS2SD etc.  */
+  /* Real latency is 4, but for split regs multiply cost of half op by 2.  */
+  COSTS_N_INSNS (6),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (12),			/* cost of 512bit VCVTPS2PD etc.  */
   /* Zen can execute 4 integer operations per cycle. FP operations take 3 cycles
      and it can execute 2 integer additions and 2 multiplications thus
      reassociation may make sense up to with of 6.  SPEC2k6 bencharks suggests
@@ -1712,6 +1752,9 @@ struct processor_costs znver2_cost = {
   COSTS_N_INSNS (13),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (10),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (15),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (3),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (5),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (10),			/* cost of 512bit VCVTPS2PD etc.  */
   /* Zen can execute 4 integer operations per cycle.  FP operations
      take 3 cycles and it can execute 2 integer additions and 2
      multiplications thus reassociation may make sense up to with of 6.
@@ -1847,6 +1890,9 @@ struct processor_costs znver3_cost = {
   COSTS_N_INSNS (13),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (10),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (15),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (3),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (5),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (10),			/* cost of 512bit VCVTPS2PD etc.  */
   /* Zen can execute 4 integer operations per cycle.  FP operations
      take 3 cycles and it can execute 2 integer additions and 2
      multiplications thus reassociation may make sense up to with of 6.
@@ -1984,6 +2030,10 @@ struct processor_costs znver4_cost = {
   COSTS_N_INSNS (13),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (15),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (21),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (3),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (5),			/* cost of 256bit VCVTPS2PD etc.  */
+  /* Real latency is 6, but for split regs multiply cost of half op by 2.  */
+  COSTS_N_INSNS (10),			/* cost of 512bit VCVTPS2PD etc.  */
   /* Zen can execute 4 integer operations per cycle.  FP operations
      take 3 cycles and it can execute 2 integer additions and 2
      multiplications thus reassociation may make sense up to with of 6.
@@ -2135,6 +2185,9 @@ struct processor_costs znver5_cost = {
   COSTS_N_INSNS (14),			/* cost of SQRTSS instruction.  */
   /* DIVSD has throughtput 0.13 and latency 20.  */
   COSTS_N_INSNS (20),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (3),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (5),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (5),			/* cost of 512bit VCVTPS2PD etc.  */
   /* Zen5 can execute:
       - integer ops: 6 per cycle, at most 3 multiplications.
 	latency 1 for additions, 3 for multiplications (pipelined)
@@ -2274,6 +2327,9 @@ struct processor_costs skylake_cost = {
   COSTS_N_INSNS (14),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (12),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (18),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (2),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (2),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (4),			/* cost of 512bit VCVTPS2PD etc.  */
   1, 4, 2, 2,				/* reassoc int, fp, vec_int, vec_fp.  */
   skylake_memcpy,
   skylake_memset,
@@ -2403,6 +2459,9 @@ struct processor_costs icelake_cost = {
   COSTS_N_INSNS (14),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (12),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (18),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (2),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (2),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (2),			/* cost of 512bit VCVTPS2PD etc.  */
   1, 4, 2, 2,				/* reassoc int, fp, vec_int, vec_fp.  */
   icelake_memcpy,
   icelake_memset,
@@ -2526,6 +2585,9 @@ struct processor_costs alderlake_cost = {
   COSTS_N_INSNS (17),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (14),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (18),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (2),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (2),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (2),			/* cost of 512bit VCVTPS2PD etc.  */
   1, 4, 3, 3,				/* reassoc int, fp, vec_int, vec_fp.  */
   alderlake_memcpy,
   alderlake_memset,
@@ -2642,6 +2704,9 @@ const struct processor_costs btver1_cost = {
   COSTS_N_INSNS (17),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (14),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (48),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (4),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (7),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (14),			/* cost of 512bit VCVTPS2PD etc.  */
   1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
   btver1_memcpy,
   btver1_memset,
@@ -2755,6 +2820,9 @@ const struct processor_costs btver2_cost = {
   COSTS_N_INSNS (19),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (16),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (21),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (4),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (7),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (14),			/* cost of 512bit VCVTPS2PD etc.  */
   1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
   btver2_memcpy,
   btver2_memset,
@@ -2867,6 +2935,9 @@ struct processor_costs pentium4_cost = {
   COSTS_N_INSNS (38),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (23),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (38),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (10),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (20),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (40),			/* cost of 512bit VCVTPS2PD etc.  */
   1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
   pentium4_memcpy,
   pentium4_memset,
@@ -2982,6 +3053,9 @@ struct processor_costs nocona_cost = {
   COSTS_N_INSNS (40),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (32),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (41),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (10),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (20),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (40),			/* cost of 512bit VCVTPS2PD etc.  */
   1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
   nocona_memcpy,
   nocona_memset,
@@ -3095,6 +3169,9 @@ struct processor_costs atom_cost = {
   COSTS_N_INSNS (60),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (31),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (63),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (6),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (12),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (24),			/* cost of 512bit VCVTPS2PD etc.  */
   2, 2, 2, 2,				/* reassoc int, fp, vec_int, vec_fp.  */
   atom_memcpy,
   atom_memset,
@@ -3208,6 +3285,9 @@ struct processor_costs slm_cost = {
   COSTS_N_INSNS (69),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (20),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (35),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (3),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (6),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (12),			/* cost of 512bit VCVTPS2PD etc.  */
   1, 2, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
   slm_memcpy,
   slm_memset,
@@ -3335,6 +3415,9 @@ struct processor_costs tremont_cost = {
   COSTS_N_INSNS (17),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (14),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (18),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (3),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (6),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (12),			/* cost of 512bit VCVTPS2PD etc.  */
   1, 4, 3, 3,				/* reassoc int, fp, vec_int, vec_fp.  */
   tremont_memcpy,
   tremont_memset,
@@ -3448,6 +3531,9 @@ struct processor_costs intel_cost = {
   COSTS_N_INSNS (20),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (40),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (40),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (8),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (16),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (32),			/* cost of 512bit VCVTPS2PD etc.  */
   1, 4, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
   intel_memcpy,
   intel_memset,
@@ -3566,6 +3652,9 @@ struct processor_costs lujiazui_cost = {
   COSTS_N_INSNS (17),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (32),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (60),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (3),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (6),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (12),			/* cost of 512bit VCVTPS2PD etc.  */
   1, 4, 3, 3,				/* reassoc int, fp, vec_int, vec_fp.  */
   lujiazui_memcpy,
   lujiazui_memset,
@@ -3682,6 +3771,9 @@ struct processor_costs yongfeng_cost = {
   COSTS_N_INSNS (14),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (20),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (35),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (3),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (6),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (12),			/* cost of 512bit VCVTPS2PD etc.  */
   4, 4, 4, 4,				/* reassoc int, fp, vec_int, vec_fp.  */
   yongfeng_memcpy,
   yongfeng_memset,
@@ -3798,6 +3890,9 @@ struct processor_costs shijidadao_cost = {
   COSTS_N_INSNS (14),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (11),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (18),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (3),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (6),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (12),			/* cost of 512bit VCVTPS2PD etc.  */
   4, 4, 4, 4,				/* reassoc int, fp, vec_int, vec_fp.  */
   shijidadao_memcpy,
   shijidadao_memset,
@@ -3922,6 +4017,9 @@ struct processor_costs generic_cost = {
   COSTS_N_INSNS (17),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (14),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (18),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (3),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (4),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (5),			/* cost of 512bit VCVTPS2PD etc.  */
   1, 4, 3, 3,				/* reassoc int, fp, vec_int, vec_fp.  */
   generic_memcpy,
   generic_memset,
@@ -4051,6 +4149,9 @@ struct processor_costs core_cost = {
   COSTS_N_INSNS (32),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (30),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (58),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (2),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (2),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (2),			/* cost of 512bit VCVTPS2PD etc.  */
   1, 4, 2, 2,				/* reassoc int, fp, vec_int, vec_fp.  */
   core_memcpy,
   core_memset,
diff --git a/gcc/config/mips/mips.cc b/gcc/config/mips/mips.cc
index 24a28dc..0d3d026 100644
--- a/gcc/config/mips/mips.cc
+++ b/gcc/config/mips/mips.cc
@@ -20678,6 +20678,9 @@ mips_option_override (void)
 	      "-mcompact-branches=never");
     }
 
+  if (is_micromips && TARGET_MSA)
+    error ("unsupported combination: %s", "-mmicromips -mmsa");
+
   /* Require explicit relocs for MIPS R6 onwards.  This enables simplification
      of the compact branch and jump support through the backend.  */
   if (!TARGET_EXPLICIT_RELOCS && mips_isa_rev >= 6)
diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md
index 2a3884c..d0919ec 100644
--- a/gcc/config/riscv/bitmanip.md
+++ b/gcc/config/riscv/bitmanip.md
@@ -1263,3 +1263,41 @@
   expand_crc_using_clmul (<SUBX:MODE>mode, <SUBX1:MODE>mode, operands);
   DONE;
 })
+
+;; If we have an XOR/IOR with a constant operand (C) and the we can
+;; synthesize ~C more efficiently than C, then synthesize ~C and use
+;; xnor/orn instead.
+;;
+;; The same can be done for AND, but mvconst_internal's issues get in
+;; the way.  That's future work.
+(define_split
+  [(set (match_operand:X 0 "register_operand")
+	(any_or:X (match_operand:X 1 "register_operand")
+		  (match_operand:X 2 "const_int_operand")))
+   (clobber (match_operand:X 3 "register_operand"))]
+  "TARGET_ZBB
+   && (riscv_const_insns (operands[2], true)
+       > riscv_const_insns (GEN_INT (~INTVAL (operands[2])), true))"
+  [(const_int 0)]
+{
+  /* Get the inverted constant into the temporary register.  */
+  riscv_emit_move (operands[3], GEN_INT (~INTVAL (operands[2])));
+
+  /* For xnor, the NOT operation is in a different position.  So
+     we have to customize the split code we generate a bit.
+
+     It is expected that AND will be handled like IOR in the future.  */
+  if (<CODE> == XOR)
+    {
+      rtx x = gen_rtx_XOR (<X:MODE>mode, operands[1], operands[3]);
+      x = gen_rtx_NOT (<X:MODE>mode, x);
+      emit_insn (gen_rtx_SET (operands[0], x));
+    }
+  else
+    {
+      rtx x = gen_rtx_NOT (<X:MODE>mode, operands[3]);
+      x = gen_rtx_IOR (<X:MODE>mode, x, operands[1]);
+      emit_insn (gen_rtx_SET (operands[0], x));
+    }
+  DONE;
+})
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index d3656a7..bad59e2 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -13136,9 +13136,6 @@ parse_features_for_version (tree decl,
 					  DECL_SOURCE_LOCATION (decl));
   gcc_assert (parse_res);
 
-  if (arch_string != default_opts->x_riscv_arch_string)
-    free (CONST_CAST (void *, (const void *) arch_string));
-
   cl_target_option_restore (&global_options, &global_options_set,
 			    &cur_target);
 }
diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index 51eb64f..3ab4d76 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -2136,18 +2136,34 @@
 	     (match_operand 7 "const_int_operand")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
-	  (vec_duplicate:V_VLS
-	    (match_operand:<VEL> 3 "direct_broadcast_operand"))
+	  ;; (vec_duplicate:V_VLS ;; wrapper activated by wrap_vec_dup below.
+	  (match_operand:<VEL> 3 "direct_broadcast_operand") ;; )
 	  (match_operand:V_VLS 2 "vector_merge_operand")))]
   "TARGET_VECTOR"
 {
   /* Transform vmv.v.x/vfmv.v.f (avl = 1) into vmv.s.x since vmv.s.x/vfmv.s.f
      has better chances to do vsetvl fusion in vsetvl pass.  */
+  bool wrap_vec_dup = true;
+  rtx vec_cst = NULL_RTX;
   if (riscv_vector::splat_to_scalar_move_p (operands))
     {
       operands[1] = riscv_vector::gen_scalar_move_mask (<VM>mode);
       operands[3] = force_reg (<VEL>mode, operands[3]);
     }
+  else if (immediate_operand (operands[3], <VEL>mode)
+	   && (vec_cst = gen_const_vec_duplicate (<MODE>mode, operands[3]))
+	   && (/* -> pred_broadcast<mode>_zero */
+	       (vector_least_significant_set_mask_operand (operands[1],
+							   <VM>mode)
+		&& vector_const_0_operand (vec_cst, <MODE>mode))
+	       || (/* pred_broadcast<mode>_imm */
+		   vector_all_trues_mask_operand (operands[1], <VM>mode)
+		   && vector_const_int_or_double_0_operand (vec_cst,
+							    <MODE>mode))))
+    {
+      operands[3] = vec_cst;
+      wrap_vec_dup = false;
+    }
   /* Handle vmv.s.x instruction (Wb1 mask) which has memory scalar.  */
   else if (satisfies_constraint_Wdm (operands[3]))
     {
@@ -2191,6 +2207,8 @@
     ;
   else
     operands[3] = force_reg (<VEL>mode, operands[3]);
+  if (wrap_vec_dup)
+    operands[3] = gen_rtx_VEC_DUPLICATE (<MODE>mode, operands[3]);
 })
 
 (define_insn_and_split "*pred_broadcast<mode>"
diff --git a/gcc/config/sh/sh-modes.def b/gcc/config/sh/sh-modes.def
index 80650b4..e31ae69 100644
--- a/gcc/config/sh/sh-modes.def
+++ b/gcc/config/sh/sh-modes.def
@@ -17,6 +17,12 @@ You should have received a copy of the GNU General Public License
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 
+/* SH has the same reversed quiet bit as MIPS.  */
+RESET_FLOAT_FORMAT (SF, mips_single_format);
+RESET_FLOAT_FORMAT (DF, mips_double_format);
+/* TFmode: IEEE quad floating point (software).  */
+FLOAT_MODE (TF, 16, mips_quad_format);
+
 /* Vector modes.  */
 VECTOR_MODE  (INT, QI, 2);    /*                 V2QI */
 VECTOR_MODES (INT, 4);        /*            V4QI V2HI */
diff --git a/gcc/cp/ChangeLog b/gcc/cp/ChangeLog
index 644b36a..e85a710 100644
--- a/gcc/cp/ChangeLog
+++ b/gcc/cp/ChangeLog
@@ -1,3 +1,31 @@
+2025-04-21  Jason Merrill  <jason@redhat.com>
+
+	* constexpr.cc (cxx_eval_outermost_constant_expr): Move
+	verify_constant later.
+
+2025-04-21  Jason Merrill  <jason@redhat.com>
+
+	PR c++/118775
+	* constexpr.cc (cxx_eval_call_expression): Add assert.
+	(fold_to_constant): Handle processing_template_decl.
+	* init.cc (build_new_1): Use fold_to_constant.
+
+2025-04-21  Jason Merrill  <jason@redhat.com>
+
+	PR c++/99456
+	* constexpr.cc (cxx_eval_constant_expression): Check strict
+	instead of manifestly_const_eval.
+	(maybe_constant_init_1): Be strict for static constexpr vars.
+
+2025-04-19  Jason Merrill  <jason@redhat.com>
+
+	* coroutines.cc (coro_build_expr_stmt)
+	(coro_build_cvt_void_expr_stmt): Remove.
+	(build_actor_fn): Use finish_expr_stmt.
+	* semantics.cc (finish_expr_stmt): Avoid wrapping statement in
+	EXPR_STMT.
+	(finish_stmt_expr_expr): Add comment.
+
 2025-04-17  Jason Merrill  <jason@redhat.com>
 
 	* constexpr.cc (is_valid_constexpr_fn): Improve diagnostic.
diff --git a/gcc/cp/constexpr.cc b/gcc/cp/constexpr.cc
index f56c5c4..8a11e62 100644
--- a/gcc/cp/constexpr.cc
+++ b/gcc/cp/constexpr.cc
@@ -2956,12 +2956,11 @@ cxx_eval_call_expression (const constexpr_ctx *ctx, tree t,
 	  gcc_assert (arg0);
 	  if (new_op_p)
 	    {
-	      /* FIXME: We should not get here; the VERIFY_CONSTANT above
-		 should have already caught it.  But currently a conversion
-		 from pointer type to arithmetic type is only considered
-		 non-constant for CONVERT_EXPRs, not NOP_EXPRs.  */
 	      if (!tree_fits_uhwi_p (arg0))
 		{
+		  /* We should not get here; the VERIFY_CONSTANT above
+		     should have already caught it.  */
+		  gcc_checking_assert (false);
 		  if (!ctx->quiet)
 		    error_at (loc, "cannot allocate array: size not constant");
 		  *non_constant_p = true;
@@ -8479,7 +8478,7 @@ cxx_eval_constant_expression (const constexpr_ctx *ctx, tree t,
 	if (TREE_CODE (t) == CONVERT_EXPR
 	    && ARITHMETIC_TYPE_P (type)
 	    && INDIRECT_TYPE_P (TREE_TYPE (op))
-	    && ctx->manifestly_const_eval == mce_true)
+	    && ctx->strict)
 	  {
 	    if (!ctx->quiet)
 	      error_at (loc,
@@ -9228,11 +9227,6 @@ cxx_eval_outermost_constant_expr (tree t, bool allow_non_constant,
   if (r == void_node && !constexpr_dtor && ctx.ctor)
     r = ctx.ctor;
 
-  if (!constexpr_dtor)
-    verify_constant (r, allow_non_constant, &non_constant_p, &overflow_p);
-  else
-    DECL_INITIALIZED_BY_CONSTANT_EXPRESSION_P (object) = true;
-
   unsigned int i;
   tree cleanup;
   /* Evaluate the cleanups.  */
@@ -9251,15 +9245,6 @@ cxx_eval_outermost_constant_expr (tree t, bool allow_non_constant,
       non_constant_p = true;
     }
 
-  if (TREE_CODE (r) == CONSTRUCTOR && CONSTRUCTOR_NO_CLEARING (r))
-    {
-      if (!allow_non_constant)
-	error ("%qE is not a constant expression because it refers to "
-	       "an incompletely initialized variable", t);
-      TREE_CONSTANT (r) = false;
-      non_constant_p = true;
-    }
-
   if (!non_constant_p && cxx_dialect >= cxx20
       && !global_ctx.heap_vars.is_empty ())
     {
@@ -9316,6 +9301,21 @@ cxx_eval_outermost_constant_expr (tree t, bool allow_non_constant,
       non_constant_p = true;
     }
 
+  if (!non_constant_p && !constexpr_dtor)
+    verify_constant (r, allow_non_constant, &non_constant_p, &overflow_p);
+
+  /* After verify_constant because reduced_constant_expression_p can unset
+     CONSTRUCTOR_NO_CLEARING.  */
+  if (!non_constant_p
+      && TREE_CODE (r) == CONSTRUCTOR && CONSTRUCTOR_NO_CLEARING (r))
+    {
+      if (!allow_non_constant)
+	error ("%qE is not a constant expression because it refers to "
+	       "an incompletely initialized variable", t);
+      TREE_CONSTANT (r) = false;
+      non_constant_p = true;
+    }
+
   if (non_constant_p)
     /* If we saw something bad, go back to our argument.  The wrapping below is
        only for the cases of TREE_CONSTANT argument or overflow.  */
@@ -9332,13 +9332,17 @@ cxx_eval_outermost_constant_expr (tree t, bool allow_non_constant,
 
   if (non_constant_p && !allow_non_constant)
     return error_mark_node;
-  else if (constexpr_dtor)
-    return r;
   else if (non_constant_p && TREE_CONSTANT (r))
     r = mark_non_constant (r);
   else if (non_constant_p)
     return t;
 
+  if (constexpr_dtor)
+    {
+      DECL_INITIALIZED_BY_CONSTANT_EXPRESSION_P (object) = true;
+      return r;
+    }
+
   /* Check we are not trying to return the wrong type.  */
   if (!same_type_ignoring_top_level_qualifiers_p (type, TREE_TYPE (r)))
     {
@@ -9490,6 +9494,9 @@ fold_simple (tree t)
 tree
 fold_to_constant (tree t)
 {
+  if (processing_template_decl)
+    return t;
+
   tree r = fold (t);
   if (CONSTANT_CLASS_P (r) && !TREE_OVERFLOW (r))
     return r;
@@ -9747,16 +9754,26 @@ maybe_constant_init_1 (tree t, tree decl, bool allow_non_constant,
     {
       /* [basic.start.static] allows constant-initialization of variables with
 	 static or thread storage duration even if it isn't required, but we
-	 shouldn't bend the rules the same way for automatic variables.  */
+	 shouldn't bend the rules the same way for automatic variables.
+
+	 But still enforce the requirements of constexpr/constinit.
+	 [dcl.constinit] "If a variable declared with the constinit specifier
+	 has dynamic initialization, the program is ill-formed, even if the
+	 implementation would perform that initialization as a static
+	 initialization."  */
       bool is_static = (decl && DECL_P (decl)
 			&& (TREE_STATIC (decl) || DECL_EXTERNAL (decl)));
+      bool strict = (!is_static
+		     || (decl && DECL_P (decl)
+			 && (DECL_DECLARED_CONSTEXPR_P (decl)
+			     || DECL_DECLARED_CONSTINIT_P (decl))));
       if (is_static)
 	manifestly_const_eval = mce_true;
 
       if (cp_unevaluated_operand && manifestly_const_eval != mce_true)
 	return fold_to_constant (t);
 
-      t = cxx_eval_outermost_constant_expr (t, allow_non_constant, !is_static,
+      t = cxx_eval_outermost_constant_expr (t, allow_non_constant, strict,
 					    manifestly_const_eval,
 					    false, decl);
     }
diff --git a/gcc/cp/coroutines.cc b/gcc/cp/coroutines.cc
index b92d09f..743da06 100644
--- a/gcc/cp/coroutines.cc
+++ b/gcc/cp/coroutines.cc
@@ -1852,21 +1852,6 @@ coro_build_frame_access_expr (tree coro_ref, tree member_id, bool preserve_ref,
   return expr;
 }
 
-/* Helpers to build EXPR_STMT and void-cast EXPR_STMT, common ops.  */
-
-static tree
-coro_build_expr_stmt (tree expr, location_t loc)
-{
-  return maybe_cleanup_point_expr_void (build_stmt (loc, EXPR_STMT, expr));
-}
-
-static tree
-coro_build_cvt_void_expr_stmt (tree expr, location_t loc)
-{
-  tree t = build1 (CONVERT_EXPR, void_type_node, expr);
-  return coro_build_expr_stmt (t, loc);
-}
-
 /* Helpers to build an artificial var, with location LOC, NAME and TYPE, in
    CTX, and with initializer INIT.  */
 
@@ -2582,8 +2567,7 @@ build_actor_fn (location_t loc, tree coro_frame_type, tree actor, tree fnbody,
   tree hfa = build_new_method_call (ash, hfa_m, &args, NULL_TREE, LOOKUP_NORMAL,
 				    NULL, tf_warning_or_error);
   r = cp_build_init_expr (ash, hfa);
-  r = coro_build_cvt_void_expr_stmt (r, loc);
-  add_stmt (r);
+  finish_expr_stmt (r);
   release_tree_vector (args);
 
   /* Now we know the real promise, and enough about the frame layout to
@@ -2678,8 +2662,7 @@ build_actor_fn (location_t loc, tree coro_frame_type, tree actor, tree fnbody,
      we must tail call them.  However, some targets do not support indirect
      tail calls to arbitrary callees.  See PR94359.  */
   CALL_EXPR_TAILCALL (resume) = true;
-  resume = coro_build_cvt_void_expr_stmt (resume, loc);
-  add_stmt (resume);
+  finish_expr_stmt (resume);
 
   r = build_stmt (loc, RETURN_EXPR, NULL);
   gcc_checking_assert (maybe_cleanup_point_expr_void (r) == r);
diff --git a/gcc/cp/init.cc b/gcc/cp/init.cc
index e589e45..062a493 100644
--- a/gcc/cp/init.cc
+++ b/gcc/cp/init.cc
@@ -3405,7 +3405,7 @@ build_new_1 (vec<tree, va_gc> **placement, tree type, tree nelts,
 	errval = throw_bad_array_new_length ();
       if (outer_nelts_check != NULL_TREE)
 	size = build3 (COND_EXPR, sizetype, outer_nelts_check, size, errval);
-      size = cp_fully_fold (size);
+      size = fold_to_constant (size);
       /* Create the argument list.  */
       vec_safe_insert (*placement, 0, size);
       /* Do name-lookup to find the appropriate operator.  */
@@ -3462,7 +3462,7 @@ build_new_1 (vec<tree, va_gc> **placement, tree type, tree nelts,
 	    outer_nelts_check = NULL_TREE;
 	}
 
-      size = cp_fully_fold (size);
+      size = fold_to_constant (size);
       /* If size is zero e.g. due to type having zero size, try to
 	 preserve outer_nelts for constant expression evaluation
 	 purposes.  */
diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc
index 7f23efd..1aa35d3 100644
--- a/gcc/cp/semantics.cc
+++ b/gcc/cp/semantics.cc
@@ -1180,10 +1180,13 @@ finish_expr_stmt (tree expr)
         expr = error_mark_node;
 
       /* Simplification of inner statement expressions, compound exprs,
-	 etc can result in us already having an EXPR_STMT.  */
+	 etc can result in us already having an EXPR_STMT or other statement
+	 tree.  Don't wrap them in EXPR_STMT.  */
       if (TREE_CODE (expr) != CLEANUP_POINT_EXPR)
 	{
-	  if (TREE_CODE (expr) != EXPR_STMT)
+	  if (TREE_CODE (expr) != EXPR_STMT
+	      && !STATEMENT_CLASS_P (expr)
+	      && TREE_CODE (expr) != STATEMENT_LIST)
 	    expr = build_stmt (loc, EXPR_STMT, expr);
 	  expr = maybe_cleanup_point_expr_void (expr);
 	}
@@ -3082,6 +3085,7 @@ finish_stmt_expr_expr (tree expr, tree stmt_expr)
 	}
       else if (processing_template_decl)
 	{
+	  /* Not finish_expr_stmt because we don't want convert_to_void.  */
 	  expr = build_stmt (input_location, EXPR_STMT, expr);
 	  expr = add_stmt (expr);
 	  /* Mark the last statement so that we can recognize it as such at
diff --git a/gcc/except.cc b/gcc/except.cc
index 205811c..0fe1e09 100644
--- a/gcc/except.cc
+++ b/gcc/except.cc
@@ -2949,7 +2949,14 @@ switch_to_exception_section (const char * ARG_UNUSED (fnname))
 {
   section *s;
 
-  if (exception_section)
+  if (exception_section
+  /* Don't use the cached section for comdat if it will be different. */
+#ifdef HAVE_LD_EH_GC_SECTIONS
+      && !(targetm_common.have_named_sections
+	   && DECL_COMDAT_GROUP (current_function_decl)
+	   && HAVE_COMDAT_GROUP)
+#endif
+     )
     s = exception_section;
   else
     {
diff --git a/gcc/fortran/ChangeLog b/gcc/fortran/ChangeLog
index 1c45bdb..56325a9 100644
--- a/gcc/fortran/ChangeLog
+++ b/gcc/fortran/ChangeLog
@@ -1,3 +1,10 @@
+2025-04-19  Steven G. Kargl  <kargl@gcc.gnu.org>
+
+	PR fortran/119836
+	* resolve.cc (check_pure_function): Fix checking for
+	an impure subprogram within a DO CONCURRENT construct.
+	(pure_subroutine): Ditto.
+
 2025-04-16  Harald Anlauf  <anlauf@gmx.de>
 
 	PR fortran/106948
diff --git a/gcc/fortran/resolve.cc b/gcc/fortran/resolve.cc
index 2ecbd50..f03708e 100644
--- a/gcc/fortran/resolve.cc
+++ b/gcc/fortran/resolve.cc
@@ -3260,14 +3260,30 @@ static bool check_pure_function (gfc_expr *e)
      gfc_do_concurrent_flag = 0 when the check for an impure function
      occurs.  Check the stack to see if the source code has a nested
      BLOCK construct.  */
+
   for (stack = cs_base; stack; stack = stack->prev)
     {
-      if (stack->current->op == EXEC_BLOCK) saw_block = true;
+      if (!saw_block && stack->current->op == EXEC_BLOCK)
+	{
+	  saw_block = true;
+	  continue;
+	}
+
       if (saw_block && stack->current->op == EXEC_DO_CONCURRENT)
 	{
-	  gfc_error ("Reference to impure function at %L inside a "
-		     "DO CONCURRENT", &e->where);
-	  return false;
+	  bool is_pure;
+	  is_pure = (e->value.function.isym
+		     && (e->value.function.isym->pure
+			 || e->value.function.isym->elemental))
+		    || (e->value.function.esym
+			&& (e->value.function.esym->attr.pure
+			    || e->value.function.esym->attr.elemental));
+	  if (!is_pure)
+	    {
+	      gfc_error ("Reference to impure function at %L inside a "
+			 "DO CONCURRENT", &e->where);
+	      return false;
+	    }
 	}
     }
 
@@ -3663,16 +3679,29 @@ pure_subroutine (gfc_symbol *sym, const char *name, locus *loc)
 
   /* A BLOCK construct within a DO CONCURRENT construct leads to
      gfc_do_concurrent_flag = 0 when the check for an impure subroutine
-     occurs.  Check the stack to see if the source code has a nested
-     BLOCK construct.  */
+     occurs.  Walk up the stack to see if the source code has a nested
+     construct.  */
+
   for (stack = cs_base; stack; stack = stack->prev)
     {
-      if (stack->current->op == EXEC_BLOCK) saw_block = true;
+      if (stack->current->op == EXEC_BLOCK)
+	{
+	  saw_block = true;
+	  continue;
+	}
+
       if (saw_block && stack->current->op == EXEC_DO_CONCURRENT)
 	{
-	  gfc_error ("Subroutine call at %L in a DO CONCURRENT block "
-		     "is not PURE", loc);
-	  return false;
+
+	  bool is_pure = true;
+	  is_pure = sym->attr.pure || sym->attr.elemental;
+
+	  if (!is_pure)
+	    {
+	      gfc_error ("Subroutine call at %L in a DO CONCURRENT block "
+			 "is not PURE", loc);
+	      return false;
+	    }
 	}
     }
 
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 4cdc9c1..4c219bd 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,78 @@
+2025-04-21  Jason Merrill  <jason@redhat.com>
+
+	PR c++/118775
+	* g++.dg/cpp2a/constexpr-new24.C: Adjust diagnostic.
+
+2025-04-21  Andrew Bennett  <andrew.bennett@imgtec.com>
+
+	* gcc.dg/memcpy-4.c: Remove mips specific code.
+	* gcc.target/mips/memcpy-2.c: New test.
+
+2025-04-21  Matthew Fortune  <matthew.fortune@imgtec.com>
+
+	* gcc.target/mips/clear-cache-1.c: Also allow jrc.
+
+2025-04-21  Matthew Fortune  <matthew.fortune@imgtec.com>
+
+	* gcc.dg/tree-ssa/ssa-dom-cse-2.c: Do not check output for
+	MIPS lp64 abi.
+
+2025-04-21  Andrew Pinski  <quic_apinski@quicinc.com>
+
+	PR middle-end/119507
+	* g++.dg/eh/pr119507.C: New test.
+
+2025-04-21  hongtao.liu  <hongtao.liu@intel.com>
+
+	* gcc.target/i386/recip-vec-divf-fma.c: New test.
+
+2025-04-20  H.J. Lu  <hjl.tools@gmail.com>
+
+	PR target/117863
+	* gcc.dg/rtl/i386/vector_eq-2.c: New test.
+	* gcc.dg/rtl/i386/vector_eq-3.c: Likewise.
+
+2025-04-19  Thomas Schwinge  <tschwinge@baylibre.com>
+
+	PR testsuite/119508
+	* rust/compile/nr2/compile.exp: Disable parallel testing.
+
+2025-04-19  Co-authored-by: Jeff Law  <jlaw@ventanamicro.com>
+
+	PR target/118410
+	* gcc.target/riscv/pr118410-1.c: New test.
+	* gcc.target/riscv/pr118410-2.c: Likewise.
+
+2025-04-19  Andrew Pinski  <quic_apinski@quicinc.com>
+
+	* gcc.dg/pr118947-1.c: Use 1025 as the size of the buf.
+	* gcc.dg/pr78408-3.c: Likewise.
+
+2025-04-19  Andrew Pinski  <quic_apinski@quicinc.com>
+
+	PR rtl-optimization/111949
+	* gcc.target/aarch64/bic-1.c: New test.
+
+2025-04-19  Jiaxun Yang  <jiaxun.yang@flygoat.com>
+
+	PR target/111814
+	* gcc.target/sh/pr111814.c: New test.
+
+2025-04-19  Maciej W. Rozycki  <macro@orcam.me.uk>
+
+	* gcc.target/alpha/memcpy-nested-offset-long.c: New file.
+	* gcc.target/alpha/memcpy-nested-offset-quad.c: New file.
+
+2025-04-19  Steven G. Kargl  <kargl@gcc.gnu.org>
+
+	PR fortran/119836
+	* gfortran.dg/do_concurrent_all_clauses.f90: Remove invalid
+	dg-error test.
+	* gfortran.dg/pr119836_1.f90: New test.
+	* gfortran.dg/pr119836_2.f90: New test.
+	* gfortran.dg/pr119836_3.f90: New test.
+	* gfortran.dg/pr119836_4.f90: New test.
+
 2025-04-18  Thomas Schwinge  <tschwinge@baylibre.com>
 
 	PR cobol/119818
diff --git a/gcc/testsuite/g++.dg/cpp2a/constexpr-new24.C b/gcc/testsuite/g++.dg/cpp2a/constexpr-new24.C
index ee62f18..17c9f54 100644
--- a/gcc/testsuite/g++.dg/cpp2a/constexpr-new24.C
+++ b/gcc/testsuite/g++.dg/cpp2a/constexpr-new24.C
@@ -6,14 +6,14 @@ int a;
 constexpr char *
 f1 ()
 {
-  constexpr auto p = new char[(long int) &a]; // { dg-error "size not constant" }
+  constexpr auto p = new char[(long int) &a]; // { dg-error "conversion from pointer" }
   return p;
 }
 
 constexpr char *
 f2 ()
 {
-  auto p = new char[(long int) &a];  // { dg-error "size not constant" }
+  auto p = new char[(long int) &a];  // { dg-error "conversion from pointer" }
   return p;
 }
 
diff --git a/gcc/testsuite/g++.dg/eh/pr119507.C b/gcc/testsuite/g++.dg/eh/pr119507.C
new file mode 100644
index 0000000..c68536f
--- /dev/null
+++ b/gcc/testsuite/g++.dg/eh/pr119507.C
@@ -0,0 +1,19 @@
+// { dg-do compile { target comdat_group } }
+// ARM EABI has its own exception handling data handling and does not use gcc_except_table
+// { dg-skip-if "!TARGET_EXCEPTION_DATA" { arm_eabi } }
+// Force off function sections
+// Force on exceptions
+// { dg-options "-fno-function-sections -fexceptions" }
+// PR middle-end/119507
+
+
+inline int comdat() { try { throw 1; } catch (int) { return 1; } return 0; }
+int another_func_with_exception() { try { throw 1; } catch (int) { return 1; } return 0; }
+inline int comdat1() { try { throw 1; } catch (int) { return 1; } return 0; }
+int foo() { return comdat() + comdat1(); }
+
+// Make sure the gcc puts the exception table for both comdat and comdat1 in their own section
+// { dg-final { scan-assembler-times ".section\[\t \]\[^\n\]*.gcc_except_table._Z6comdatv" 1 } }
+// { dg-final { scan-assembler-times ".section\[\t \]\[^\n\]*.gcc_except_table._Z7comdat1v" 1 } }
+// There should be 3 exception tables, 
+// { dg-final { scan-assembler-times ".section\[\t \]\[^\n\]*.gcc_except_table" 3 } }
diff --git a/gcc/testsuite/gcc.dg/memcpy-4.c b/gcc/testsuite/gcc.dg/memcpy-4.c
index 4c726f0..b17b369 100644
--- a/gcc/testsuite/gcc.dg/memcpy-4.c
+++ b/gcc/testsuite/gcc.dg/memcpy-4.c
@@ -1,13 +1,8 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -fdump-rtl-expand" } */
+/* { dg-options "-O2" } */
 
-#ifdef __mips
-__attribute__((nomips16))
-#endif
 void
 f1 (char *p)
 {
   __builtin_memcpy (p, "12345", 5);
 }
-
-/* { dg-final { scan-rtl-dump "mem/u.*mem/u" "expand" { target mips*-*-* } } } */
diff --git a/gcc/testsuite/gcc.dg/pr118947-1.c b/gcc/testsuite/gcc.dg/pr118947-1.c
index 70b7f80..8733e8d 100644
--- a/gcc/testsuite/gcc.dg/pr118947-1.c
+++ b/gcc/testsuite/gcc.dg/pr118947-1.c
@@ -6,10 +6,10 @@
 void* aaa();
 void* bbb()
 {
-    char buf[32] = {};
+    char buf[1025] = {};
     /*  Tha call to aaa should not matter and clobber buf. */
     void* ret = aaa();
-    __builtin_memcpy(ret, buf, 32);
+    __builtin_memcpy(ret, buf, sizeof(buf));
     return ret;
 }
 
diff --git a/gcc/testsuite/gcc.dg/pr78408-3.c b/gcc/testsuite/gcc.dg/pr78408-3.c
index 3de90d0..5ea5458 100644
--- a/gcc/testsuite/gcc.dg/pr78408-3.c
+++ b/gcc/testsuite/gcc.dg/pr78408-3.c
@@ -7,8 +7,8 @@ void* aaa();
 void* bbb()
 {
     void* ret = aaa();
-    char buf[32] = {};
-    __builtin_memcpy(ret, buf, 32);
+    char buf[1025] = {};
+    __builtin_memcpy(ret, buf, sizeof(buf));
     return ret;
 }
 
diff --git a/gcc/testsuite/gcc.dg/rtl/i386/vector_eq-2.c b/gcc/testsuite/gcc.dg/rtl/i386/vector_eq-2.c
new file mode 100644
index 0000000..871d489
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/rtl/i386/vector_eq-2.c
@@ -0,0 +1,71 @@
+/* { dg-do compile { target { i?86-*-* x86_64-*-* } } } */
+/* { dg-additional-options "-O2 -march=x86-64-v3" } */
+
+typedef int v4si __attribute__((vector_size(16)));
+typedef int v8si __attribute__((vector_size(32)));
+typedef int v2di __attribute__((vector_size(16)));
+
+v4si __RTL (startwith ("vregs1")) foo1 (void)
+{
+(function "foo1"
+  (insn-chain
+    (block 2
+      (edge-from entry (flags "FALLTHRU"))
+      (cnote 1 [bb 2] NOTE_INSN_BASIC_BLOCK)
+      (cnote 2 NOTE_INSN_FUNCTION_BEG)
+      (cinsn 3 (set (reg:V4SI <0>) (const_vector:V4SI [(const_int -1) (const_int -1) (const_int -1) (const_int -1)])))
+      (cinsn 4 (set (reg:V4SI <1>) (const_vector:V4SI [(const_int -1) (const_int -1) (const_int -1) (const_int -1)])))
+      (cinsn 5 (set (reg:V4SI <2>)
+		    (eq:V4SI (reg:V4SI <0>) (reg:V4SI <1>))))
+      (cinsn 6 (set (reg:V4SI <3>) (reg:V4SI <2>)))
+      (cinsn 7 (set (reg:V4SI xmm0) (reg:V4SI <3>)))
+      (edge-to exit (flags "FALLTHRU"))
+    )
+  )
+ (crtl (return_rtx (reg/i:V4SI xmm0)))
+)
+}
+
+v8si __RTL (startwith ("vregs1")) foo2 (void)
+{
+(function "foo2"
+  (insn-chain
+    (block 2
+      (edge-from entry (flags "FALLTHRU"))
+      (cnote 1 [bb 2] NOTE_INSN_BASIC_BLOCK)
+      (cnote 2 NOTE_INSN_FUNCTION_BEG)
+      (cinsn 3 (set (reg:V8SI <0>) (const_vector:V8SI [(const_int -1) (const_int -1) (const_int -1) (const_int -1) (const_int -1) (const_int -1) (const_int -1) (const_int -1)])))
+      (cinsn 4 (set (reg:V8SI <1>) (const_vector:V8SI [(const_int -1) (const_int -1) (const_int -1) (const_int -1) (const_int -1) (const_int -1) (const_int -1) (const_int -1)])))
+      (cinsn 5 (set (reg:V8SI <2>)
+		    (eq:V8SI (reg:V8SI <0>) (reg:V8SI <1>))))
+      (cinsn 6 (set (reg:V8SI <3>) (reg:V8SI <2>)))
+      (cinsn 7 (set (reg:V8SI xmm0) (reg:V8SI <3>)))
+      (edge-to exit (flags "FALLTHRU"))
+    )
+  )
+ (crtl (return_rtx (reg/i:V8SI xmm0)))
+)
+}
+
+v2di __RTL (startwith ("vregs1")) foo3 (void)
+{
+(function "foo3"
+  (insn-chain
+    (block 2
+      (edge-from entry (flags "FALLTHRU"))
+      (cnote 1 [bb 2] NOTE_INSN_BASIC_BLOCK)
+      (cnote 2 NOTE_INSN_FUNCTION_BEG)
+      (cinsn 3 (set (reg:V2DI <0>) (const_vector:V2DI [(const_int -1) (const_int -1)])))
+      (cinsn 4 (set (reg:V2DI <1>) (const_vector:V2DI [(const_int -1) (const_int -1)])))
+      (cinsn 5 (set (reg:V2DI <2>)
+		    (eq:V2DI (reg:V2DI <0>) (reg:V2DI <1>))))
+      (cinsn 6 (set (reg:V2DI <3>) (reg:V2DI <2>)))
+      (cinsn 7 (set (reg:V2DI xmm0) (reg:V2DI <3>)))
+      (edge-to exit (flags "FALLTHRU"))
+    )
+  )
+ (crtl (return_rtx (reg/i:V2DI xmm0)))
+)
+}
+
+/* { dg-final { scan-assembler-times "vpcmpeq" 3 } } */
diff --git a/gcc/testsuite/gcc.dg/rtl/i386/vector_eq-3.c b/gcc/testsuite/gcc.dg/rtl/i386/vector_eq-3.c
new file mode 100644
index 0000000..276c4c2
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/rtl/i386/vector_eq-3.c
@@ -0,0 +1,74 @@
+/* { dg-do compile { target { i?86-*-* x86_64-*-* } } } */
+/* { dg-additional-options "-O2 -march=x86-64-v3" } */
+
+typedef int v4si __attribute__((vector_size(16)));
+typedef int v8si __attribute__((vector_size(32)));
+typedef int v2di __attribute__((vector_size(16)));
+
+v4si __RTL (startwith ("vregs1")) foo1 (void)
+{
+(function "foo1"
+  (insn-chain
+    (block 2
+      (edge-from entry (flags "FALLTHRU"))
+      (cnote 1 [bb 2] NOTE_INSN_BASIC_BLOCK)
+      (cnote 2 NOTE_INSN_FUNCTION_BEG)
+      (cinsn 3 (set (reg:V4SI <1>)
+		    (mem:V4SI (reg:SI di) [0 ptr S128 A128])))
+      (cinsn 4 (set (reg:V4SI <2>)
+		    (eq:V4SI (reg:V4SI <1>)
+			     (mem:V4SI (reg:SI di) [0 ptr S128 A128]))))
+      (cinsn 5 (set (reg:V4SI <3>) (reg:V4SI <2>)))
+      (cinsn 6 (set (reg:V4SI xmm0) (reg:V4SI <3>)))
+      (edge-to exit (flags "FALLTHRU"))
+    )
+  )
+ (crtl (return_rtx (reg/i:V4SI xmm0)))
+)
+}
+
+v8si __RTL (startwith ("vregs1")) foo2 (void)
+{
+(function "foo2"
+  (insn-chain
+    (block 2
+      (edge-from entry (flags "FALLTHRU"))
+      (cnote 1 [bb 2] NOTE_INSN_BASIC_BLOCK)
+      (cnote 2 NOTE_INSN_FUNCTION_BEG)
+      (cinsn 3 (set (reg:V8SI <1>)
+		    (mem:V8SI (reg:SI di) [0 ptr S256 A256])))
+      (cinsn 4 (set (reg:V8SI <2>)
+		    (eq:V8SI (mem:V8SI (reg:SI di) [0 ptr S256 A256])
+			     (reg:V8SI <1>))))
+      (cinsn 5 (set (reg:V8SI <3>) (reg:V8SI <2>)))
+      (cinsn 6 (set (reg:V8SI xmm0) (reg:V8SI <3>)))
+      (edge-to exit (flags "FALLTHRU"))
+    )
+  )
+ (crtl (return_rtx (reg/i:V8SI xmm0)))
+)
+}
+
+v2di __RTL (startwith ("vregs1")) foo3 (void)
+{
+(function "foo3"
+  (insn-chain
+    (block 2
+      (edge-from entry (flags "FALLTHRU"))
+      (cnote 1 [bb 2] NOTE_INSN_BASIC_BLOCK)
+      (cnote 2 NOTE_INSN_FUNCTION_BEG)
+      (cinsn 3 (set (reg:V2DI <1>)
+		    (mem:V2DI (reg:SI di) [0 ptr S128 A128])))
+      (cinsn 4 (set (reg:V2DI <2>)
+		    (eq:V2DI (reg:V2DI <1>)
+			     (mem:V2DI (reg:SI di) [0 ptr S128 A128]))))
+      (cinsn 5 (set (reg:V2DI <3>) (reg:V2DI <2>)))
+      (cinsn 6 (set (reg:V2DI xmm0) (reg:V2DI <3>)))
+      (edge-to exit (flags "FALLTHRU"))
+    )
+  )
+ (crtl (return_rtx (reg/i:V2DI xmm0)))
+)
+}
+
+/* { dg-final { scan-assembler-times "vpcmpeq" 3 } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-cse-2.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-cse-2.c
index a879d30..6fa52f6 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-cse-2.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-cse-2.c
@@ -27,4 +27,4 @@ foo ()
    but the loop reads only one element at a time, and DOM cannot resolve these.
    The same happens on powerpc depending on the SIMD support available.  */
 
-/* { dg-final { scan-tree-dump "return 28;" "optimized" { xfail { { alpha*-*-* hppa*64*-*-* nvptx*-*-* mmix-knuth-mmixware } || { { { lp64 && { powerpc*-*-* sparc*-*-* riscv*-*-* } } || aarch64_sve } || { arm*-*-* && { ! arm_neon } } } } } } } */
+/* { dg-final { scan-tree-dump "return 28;" "optimized" { xfail { { alpha*-*-* hppa*64*-*-* nvptx*-*-* mmix-knuth-mmixware } || { { { lp64 && { mips*-*-* powerpc*-*-* sparc*-*-* riscv*-*-* } } || aarch64_sve } || { arm*-*-* && { ! arm_neon } } } } } } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/bic-1.c b/gcc/testsuite/gcc.target/aarch64/bic-1.c
new file mode 100644
index 0000000..65e1514
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/bic-1.c
@@ -0,0 +1,40 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+/* PR rtl-optmization/111949 */
+
+/*
+**func1:
+**	bic	w([0-9]+), w0, w1
+**	and	w0, w\1, 1
+**      ret
+*/
+
+unsigned func1(unsigned a, bool b)
+{
+        int c = a & b;
+        return (c ^ a)&1;
+}
+
+/*
+**func2:
+**	bic	w([0-9]+), w1, w0
+**	and	w0, w\1, 255
+**      ret
+*/
+unsigned func2(bool a, bool b)
+{
+  return ~a & b;
+}
+
+/*
+**func3:
+**	bic	w([0-9]+), w1, w0
+**	and	w0, w\1, 1
+**      ret
+*/
+bool func3(bool a, unsigned char b)
+{
+  return !a & b;
+}
diff --git a/gcc/testsuite/gcc.target/alpha/memcpy-nested-offset-long.c b/gcc/testsuite/gcc.target/alpha/memcpy-nested-offset-long.c
new file mode 100644
index 0000000..631d14f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/alpha/memcpy-nested-offset-long.c
@@ -0,0 +1,76 @@
+/* { dg-do compile } */
+/* { dg-options "" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" } } */
+
+typedef unsigned int __attribute__ ((mode (DI))) int64_t;
+typedef unsigned int __attribute__ ((mode (SI))) int32_t;
+
+typedef union
+  {
+    int32_t l[8];
+  }
+val;
+
+typedef struct
+  {
+    int32_t l[2];
+    val v;
+  }
+tre;
+
+typedef struct
+  {
+    int32_t l[3];
+    tre t;
+  }
+due;
+
+typedef struct
+  {
+    val v;
+    int64_t q;
+    int32_t l[2];
+    due d;
+  }
+uno;
+
+void
+memcpy_nested_offset_long (uno *u)
+{
+  u->d.t.v = u->v;
+}
+
+/* Expect assembly such as:
+
+	ldq $4,0($16)
+	ldq $3,8($16)
+	ldq $2,16($16)
+	srl $4,32,$7
+	ldq $1,24($16)
+	srl $3,32,$6
+	stl $4,68($16)
+	srl $2,32,$5
+	stl $7,72($16)
+	srl $1,32,$4
+	stl $3,76($16)
+	stl $6,80($16)
+	stl $2,84($16)
+	stl $5,88($16)
+	stl $1,92($16)
+	stl $4,96($16)
+
+   that is with four quadword loads at offsets 0, 8, 16, 24 each and
+   eight longword stores at offsets 68, 72, 76, 80, 84, 88, 92, 96 each.  */
+
+/* { dg-final { scan-assembler-times "\\sldq\\s\\\$\[0-9\]+,0\\\(\\\$16\\\)\\s" 1 } } */
+/* { dg-final { scan-assembler-times "\\sldq\\s\\\$\[0-9\]+,8\\\(\\\$16\\\)\\s" 1 } } */
+/* { dg-final { scan-assembler-times "\\sldq\\s\\\$\[0-9\]+,16\\\(\\\$16\\\)\\s" 1 } } */
+/* { dg-final { scan-assembler-times "\\sldq\\s\\\$\[0-9\]+,24\\\(\\\$16\\\)\\s" 1 } } */
+/* { dg-final { scan-assembler-times "\\sstl\\s\\\$\[0-9\]+,68\\\(\\\$16\\\)\\s" 1 } } */
+/* { dg-final { scan-assembler-times "\\sstl\\s\\\$\[0-9\]+,72\\\(\\\$16\\\)\\s" 1 } } */
+/* { dg-final { scan-assembler-times "\\sstl\\s\\\$\[0-9\]+,76\\\(\\\$16\\\)\\s" 1 } } */
+/* { dg-final { scan-assembler-times "\\sstl\\s\\\$\[0-9\]+,80\\\(\\\$16\\\)\\s" 1 } } */
+/* { dg-final { scan-assembler-times "\\sstl\\s\\\$\[0-9\]+,84\\\(\\\$16\\\)\\s" 1 } } */
+/* { dg-final { scan-assembler-times "\\sstl\\s\\\$\[0-9\]+,88\\\(\\\$16\\\)\\s" 1 } } */
+/* { dg-final { scan-assembler-times "\\sstl\\s\\\$\[0-9\]+,92\\\(\\\$16\\\)\\s" 1 } } */
+/* { dg-final { scan-assembler-times "\\sstl\\s\\\$\[0-9\]+,96\\\(\\\$16\\\)\\s" 1 } } */
diff --git a/gcc/testsuite/gcc.target/alpha/memcpy-nested-offset-quad.c b/gcc/testsuite/gcc.target/alpha/memcpy-nested-offset-quad.c
new file mode 100644
index 0000000..1d2227e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/alpha/memcpy-nested-offset-quad.c
@@ -0,0 +1,64 @@
+/* { dg-do compile } */
+/* { dg-options "" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" } } */
+
+typedef unsigned int __attribute__ ((mode (DI))) int64_t;
+typedef unsigned int __attribute__ ((mode (SI))) int32_t;
+
+typedef union
+  {
+    int32_t l[8];
+  }
+val;
+
+typedef struct
+  {
+    int32_t l[2];
+    val v;
+  }
+tre;
+
+typedef struct
+  {
+    int32_t l[3];
+    tre t;
+  }
+due;
+
+typedef struct
+  {
+    val v;
+    int64_t q;
+    int32_t l[3];
+    due d;
+  }
+uno;
+
+void
+memcpy_nested_offset_quad (uno *u)
+{
+  u->d.t.v = u->v;
+}
+
+/* Expect assembly such as:
+
+	ldq $4,0($16)
+	ldq $3,8($16)
+	ldq $2,16($16)
+	ldq $1,24($16)
+	stq $4,72($16)
+	stq $3,80($16)
+	stq $2,88($16)
+	stq $1,96($16)
+
+   that is with four quadword loads at offsets 0, 8, 16, 24 each
+   and four quadword stores at offsets 72, 80, 88, 96 each.  */
+
+/* { dg-final { scan-assembler-times "\\sldq\\s\\\$\[0-9\]+,0\\\(\\\$16\\\)\\s" 1 } } */
+/* { dg-final { scan-assembler-times "\\sldq\\s\\\$\[0-9\]+,8\\\(\\\$16\\\)\\s" 1 } } */
+/* { dg-final { scan-assembler-times "\\sldq\\s\\\$\[0-9\]+,16\\\(\\\$16\\\)\\s" 1 } } */
+/* { dg-final { scan-assembler-times "\\sldq\\s\\\$\[0-9\]+,24\\\(\\\$16\\\)\\s" 1 } } */
+/* { dg-final { scan-assembler-times "\\sstq\\s\\\$\[0-9\]+,72\\\(\\\$16\\\)\\s" 1 } } */
+/* { dg-final { scan-assembler-times "\\sstq\\s\\\$\[0-9\]+,80\\\(\\\$16\\\)\\s" 1 } } */
+/* { dg-final { scan-assembler-times "\\sstq\\s\\\$\[0-9\]+,88\\\(\\\$16\\\)\\s" 1 } } */
+/* { dg-final { scan-assembler-times "\\sstq\\s\\\$\[0-9\]+,96\\\(\\\$16\\\)\\s" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/recip-vec-divf-fma.c b/gcc/testsuite/gcc.target/i386/recip-vec-divf-fma.c
new file mode 100644
index 0000000..ad9e07b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/recip-vec-divf-fma.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-Ofast -mfma -mavx2" } */
+/* { dg-final { scan-assembler-times {(?n)vfn?m(add|sub)[1-3]*ps} 2 } } */
+
+typedef float v4sf __attribute__((vector_size(16)));
+/* (a - (rcp(b) * a * b)) * rcp(b) + rcp(b) * a  */
+
+v4sf
+foo (v4sf a, v4sf b)
+{
+    return a / b;
+}
diff --git a/gcc/testsuite/gcc.target/mips/clear-cache-1.c b/gcc/testsuite/gcc.target/mips/clear-cache-1.c
index f1554f5..cd11c66 100644
--- a/gcc/testsuite/gcc.target/mips/clear-cache-1.c
+++ b/gcc/testsuite/gcc.target/mips/clear-cache-1.c
@@ -1,7 +1,7 @@
 /* { dg-do compile } */
 /* { dg-options "-msynci isa_rev>=2" } */
 /* { dg-final { scan-assembler "\tsynci\t" } } */
-/* { dg-final { scan-assembler "\tjr.hb\t" } } */
+/* { dg-final { scan-assembler "\tjrc?.hb\t" } } */
 /* { dg-final { scan-assembler-not "_flush_cache|mips_sync_icache|_cacheflush" } } */
 
 NOMIPS16 void f()
diff --git a/gcc/testsuite/gcc.target/mips/memcpy-2.c b/gcc/testsuite/gcc.target/mips/memcpy-2.c
new file mode 100644
index 0000000..df0cd18
--- /dev/null
+++ b/gcc/testsuite/gcc.target/mips/memcpy-2.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "isa_rev<=5 -fdump-rtl-expand" } */
+/* { dg-skip-if "code quality test" { *-*-* } { "-Os" } { "" } } */
+
+__attribute__((nomips16))
+void
+f1 (char *p)
+{
+  __builtin_memcpy (p, "12345", 5);
+}
+
+/* { dg-final { scan-rtl-dump "mem/u.*mem/u" "expand" } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/power11-3.c b/gcc/testsuite/gcc.target/powerpc/power11-3.c
index fa1aedd..56bf881 100644
--- a/gcc/testsuite/gcc.target/powerpc/power11-3.c
+++ b/gcc/testsuite/gcc.target/powerpc/power11-3.c
@@ -1,5 +1,6 @@
 /* { dg-do compile }  */
 /* { dg-options "-mdejagnu-cpu=power8 -O2" }  */
+/* { dg-require-ifunc "" } */
 
 /* Check if we can set the power11 target via a target_clones attribute.  */
 
diff --git a/gcc/testsuite/gcc.target/riscv/pr118410-1.c b/gcc/testsuite/gcc.target/riscv/pr118410-1.c
new file mode 100644
index 0000000..4a8b847
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/pr118410-1.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" } } */
+/* { dg-options "-march=rv64gcb -mabi=lp64d" { target { rv64} } } */
+/* { dg-options "-march=rv32gcb -mabi=ilp32" { target { rv32} } } */
+
+long orlow(long x) { return x | ((1L << 24) - 1); }
+
+/* { dg-final { scan-assembler-times "orn\t" 1 } } */
+/* { dg-final { scan-assembler-not "addi\t" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/pr118410-2.c b/gcc/testsuite/gcc.target/riscv/pr118410-2.c
new file mode 100644
index 0000000..b63a1d9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/pr118410-2.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" } } */
+/* { dg-options "-march=rv64gcb -mabi=lp64d" { target { rv64} } } */
+/* { dg-options "-march=rv32gcb -mabi=ilp32" { target { rv32} } } */
+
+long xorlow(long x) { return x ^ ((1L << 24) - 1); }
+
+/* { dg-final { scan-assembler-times "xnor\t" 1 } } */
+/* { dg-final { scan-assembler-not "addi\t" } } */
diff --git a/gcc/testsuite/gcc.target/sh/pr111814.c b/gcc/testsuite/gcc.target/sh/pr111814.c
new file mode 100644
index 0000000..a88e5d7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/sh/pr111814.c
@@ -0,0 +1,7 @@
+/* Verify that __builtin_nan("") produces a constant matches
+   architecture specification. */
+/* { dg-do compile } */
+
+double d = __builtin_nan ("");
+
+/* { dg-final { scan-assembler "\t.long\t-1\n\t.long\t2146959359\n" } } */
diff --git a/gcc/testsuite/gfortran.dg/do_concurrent_all_clauses.f90 b/gcc/testsuite/gfortran.dg/do_concurrent_all_clauses.f90
index 0c8a6ad..a7fa7c3 100644
--- a/gcc/testsuite/gfortran.dg/do_concurrent_all_clauses.f90
+++ b/gcc/testsuite/gfortran.dg/do_concurrent_all_clauses.f90
@@ -18,7 +18,7 @@ program do_concurrent_all_clauses
       squared = i * i
       arr(i) = temp2 + squared
       sum = sum + arr(i)
-      max_val = max(max_val, arr(i)) ! { dg-error "Reference to impure function" }
+      max_val = max(max_val, arr(i))
     end block
   end do
   print *, arr, sum, max_val
diff --git a/gcc/testsuite/gfortran.dg/pr119836_1.f90 b/gcc/testsuite/gfortran.dg/pr119836_1.f90
new file mode 100644
index 0000000..984e2d0
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/pr119836_1.f90
@@ -0,0 +1,18 @@
+!
+! { dg-do run }
+!
+! PR fortran/119836
+!
+program p
+   implicit none
+   integer, parameter :: n = 4
+   integer :: i
+   integer :: y(n), x(n)
+   do concurrent (i=1:n)
+      x(i) = shiftl (i,1)     ! accepted
+      block
+         y(i) = shiftl (i,1)  ! wrongly rejected
+      end block
+   end do
+   if (any(x /= y)) stop 1
+end program p
diff --git a/gcc/testsuite/gfortran.dg/pr119836_2.f90 b/gcc/testsuite/gfortran.dg/pr119836_2.f90
new file mode 100644
index 0000000..5e2d0c9
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/pr119836_2.f90
@@ -0,0 +1,21 @@
+!
+! { dg-do compile }
+!
+! PR fortran/119836
+!
+! Although intrinsic functions contained within the Fortran standard
+! are pure procedures, many of the additional intrinsic functions
+! supplied in libgfortran are impure.  RAND() is one such function.
+!
+program foo
+   implicit none
+   integer i
+   real x(4)
+   do concurrent (i=1:4)
+      x = rand()     ! { dg-error "Reference to impure function" }
+      block
+         x = rand()  ! { dg-error "Reference to impure function" }
+      end block
+   end do
+   print *, x
+end program foo
diff --git a/gcc/testsuite/gfortran.dg/pr119836_3.f90 b/gcc/testsuite/gfortran.dg/pr119836_3.f90
new file mode 100644
index 0000000..69a5fcf
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/pr119836_3.f90
@@ -0,0 +1,30 @@
+!
+! { dg-do run }
+!
+! PR fortran/119836
+!
+program p
+   implicit none
+   integer, parameter :: n = 4
+   integer :: i
+   integer :: y(n), x(n)
+   x = [(i,i=1,n)]
+   do concurrent (i=1:n)
+      call bar(x, y)
+   end do
+   if (any(x /= y)) stop 1
+   x = 2 * x
+   do concurrent (i=1:n)
+      block
+         call bar(x, y)
+      end block
+   end do
+   if (any(x /= y)) stop 1
+
+   contains
+      elemental subroutine bar(x, y)
+         integer, intent(in) :: x
+         integer, intent(out) :: y
+         y = x
+      end subroutine
+end program p
diff --git a/gcc/testsuite/gfortran.dg/pr119836_4.f90 b/gcc/testsuite/gfortran.dg/pr119836_4.f90
new file mode 100644
index 0000000..dc6f72b
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/pr119836_4.f90
@@ -0,0 +1,30 @@
+!
+! { dg-do compile }
+!
+! PR fortran/119836
+!
+program p
+   implicit none
+   integer, parameter :: n = 4
+   integer :: i
+   integer :: y(n), x(n)
+   x = [(i,i=1,n)]
+   do concurrent (i=1:n)
+      call bar(x, y)       ! { dg-error "Subroutine call" }
+   end do
+   if (any(x /= y)) stop 1
+   x = 2 * x
+   do concurrent (i=1:n)
+      block
+         call bar(x, y)    ! { dg-error "Subroutine call" }
+      end block
+   end do
+   if (any(x /= y)) stop 1
+
+   contains
+      subroutine bar(x, y)
+         integer, intent(in) :: x(:)
+         integer, intent(out) :: y(:)
+         y = x
+      end subroutine
+end program p
diff --git a/gcc/testsuite/rust/compile/nr2/compile.exp b/gcc/testsuite/rust/compile/nr2/compile.exp
index 4d91dd0..9e15cdd 100644
--- a/gcc/testsuite/rust/compile/nr2/compile.exp
+++ b/gcc/testsuite/rust/compile/nr2/compile.exp
@@ -19,6 +19,15 @@
 # Load support procs.
 load_lib rust-dg.exp
 
+# These tests don't run runtest_file_p consistently if it
+# doesn't return the same values, so disable parallelization
+# of this *.exp file.  The first parallel runtest to reach
+# this will run all the tests serially.
+if ![gcc_parallel_test_run_p compile] {
+    return
+}
+gcc_parallel_test_enable 0
+
 # Initialize `dg'.
 dg-init
 
@@ -136,3 +145,5 @@ namespace eval rust-nr2-ns {
 
 # All done.
 dg-finish
+
+gcc_parallel_test_enable 1
diff --git a/gcc/vec.h b/gcc/vec.h
index 915df06..eae4b0f 100644
--- a/gcc/vec.h
+++ b/gcc/vec.h
@@ -2395,11 +2395,11 @@ public:
   array_slice (vec<OtherT, A, vl_embed> *v)
     : m_base (v ? v->address () : nullptr), m_size (v ? v->length () : 0) {}
 
-  iterator begin () { return m_base; }
-  iterator end () { return m_base + m_size; }
+  iterator begin () {  gcc_checking_assert (is_valid ()); return m_base; }
+  iterator end () {  gcc_checking_assert (is_valid ()); return m_base + m_size; }
 
-  const_iterator begin () const { return m_base; }
-  const_iterator end () const { return m_base + m_size; }
+  const_iterator begin () const { gcc_checking_assert (is_valid ()); return m_base; }
+  const_iterator end () const { gcc_checking_assert (is_valid ()); return m_base + m_size; }
 
   value_type &front ();
   value_type &back ();
diff --git a/libgcc/ChangeLog b/libgcc/ChangeLog
index 946bf13..66feed5 100644
--- a/libgcc/ChangeLog
+++ b/libgcc/ChangeLog
@@ -1,3 +1,37 @@
+2025-04-19  Jiaxun Yang  <jiaxun.yang@flygoat.com>
+
+	PR target/118257
+	* config/sh/sfp-machine.h (_FPU_GETCW): Implement with builtin.
+	(_FPU_SETCW): Likewise.
+	(FP_EX_ENABLE_SHIFT): Derive from arch spec.
+	(FP_EX_CAUSE_SHIFT): Likewise.
+	(FP_RND_MASK): Likewise.
+	(FP_EX_INVALID): Likewise.
+	(FP_EX_DIVZERO): Likewise.
+	(FP_EX_ALL): Likewise.
+	(FP_EX_OVERFLOW): Likewise.
+	(FP_EX_UNDERFLOW): Likewise.
+	(FP_EX_INEXACT): Likewise.
+	(_FP_DECL_EX): Declear default FCSR value.
+	(FP_RND_NEAREST): Derive from arch spec.
+	(FP_RND_ZERO): Likewise.
+	(FP_INIT_ROUNDMODE): Likewise.
+	(FP_ROUNDMODE): Likewise.
+	(FP_TRAPPING_EXCEPTIONS): Likewise.
+	(FP_HANDLE_EXCEPTIONS): Implement with _FPU_SETCW.
+
+2025-04-19  Jiaxun Yang  <jiaxun.yang@flygoat.com>
+
+	PR target/111814
+	* config/sh/sfp-machine.h (_FP_NANFRAC_B): Reverse signaling bit.
+	(_FP_NANFRAC_H): Likewise.
+	(_FP_NANFRAC_S): Likewise.
+	(_FP_NANFRAC_D): Likewise.
+	(_FP_NANFRAC_Q): Likewise.
+	(_FP_KEEPNANFRACP): Enable for target.
+	(_FP_QNANNEGATEDP): Enable for target.
+	(_FP_CHOOSENAN): Port from MIPS.
+
 2025-04-14  Thomas Schwinge  <tschwinge@baylibre.com>
 
 	PR target/118794
diff --git a/libgcc/config/sh/sfp-machine.h b/libgcc/config/sh/sfp-machine.h
index 66984d4..8030c80 100644
--- a/libgcc/config/sh/sfp-machine.h
+++ b/libgcc/config/sh/sfp-machine.h
@@ -39,11 +39,11 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 #define _FP_DIV_MEAT_D(R,X,Y)	_FP_DIV_MEAT_2_udiv(D,R,X,Y)
 #define _FP_DIV_MEAT_Q(R,X,Y)	_FP_DIV_MEAT_4_udiv(Q,R,X,Y)
 
-#define _FP_NANFRAC_B	_FP_QNANBIT_B
-#define _FP_NANFRAC_H	_FP_QNANBIT_H
-#define _FP_NANFRAC_S	_FP_QNANBIT_S
-#define _FP_NANFRAC_D	_FP_QNANBIT_D, 0
-#define _FP_NANFRAC_Q	_FP_QNANBIT_Q, 0, 0, 0
+#define _FP_NANFRAC_B	(_FP_QNANBIT_B - 1)
+#define _FP_NANFRAC_H	(_FP_QNANBIT_H - 1)
+#define _FP_NANFRAC_S	(_FP_QNANBIT_S - 1)
+#define _FP_NANFRAC_D	(_FP_QNANBIT_D - 1), -1
+#define _FP_NANFRAC_Q	(_FP_QNANBIT_Q - 1), -1, -1, -1
 
 /* The type of the result of a floating point comparison.  This must
    match __libgcc_cmp_return__ in GCC for the target.  */
@@ -56,15 +56,71 @@ typedef int __gcc_CMPtype __attribute__ ((mode (__libgcc_cmp_return__)));
 #define _FP_NANSIGN_D	0
 #define _FP_NANSIGN_Q	0
 
-#define _FP_KEEPNANFRACP 0
-#define _FP_QNANNEGATEDP 0
+#define _FP_KEEPNANFRACP 1
+#define _FP_QNANNEGATEDP 1
+
+/* X is chosen unless one of the NaNs is sNaN.  */
+# define _FP_CHOOSENAN(fs, wc, R, X, Y, OP)			\
+  do {								\
+    if ((_FP_FRAC_HIGH_RAW_##fs(X) |				\
+	 _FP_FRAC_HIGH_RAW_##fs(Y)) & _FP_QNANBIT_##fs)		\
+      {								\
+	R##_s = _FP_NANSIGN_##fs;				\
+	_FP_FRAC_SET_##wc(R,_FP_NANFRAC_##fs);			\
+      }								\
+    else							\
+      {								\
+	R##_s = X##_s;						\
+	_FP_FRAC_COPY_##wc(R,X);				\
+      }								\
+    R##_c = FP_CLS_NAN;						\
+  } while (0)
+
+#ifdef __SH_FPU_ANY__
+#define _FPU_GETCW(fpscr) fpscr = __builtin_sh_get_fpscr ()
+#define _FPU_SETCW(fpscr) __builtin_sh_set_fpscr (fpscr)
+#define FP_EX_ENABLE_SHIFT 5
+#define FP_EX_CAUSE_SHIFT	10
 
-#define _FP_CHOOSENAN(fs, wc, R, X, Y, OP)  \
-  do {                      \
-    R##_s = _FP_NANSIGN_##fs;           \
-    _FP_FRAC_SET_##wc(R,_FP_NANFRAC_##fs);  \
-    R##_c = FP_CLS_NAN;             \
+#define	FP_EX_INVALID	0x0040
+#define	FP_EX_DIVZERO	0x0020
+#if defined (__SH2E__)
+#define	FP_EX_ALL	(FP_EX_DIVZERO | FP_EX_INVALID)
+#else
+#define	FP_EX_OVERFLOW	0x0010
+#define	FP_EX_UNDERFLOW	0x0008
+#define	FP_EX_INEXACT	0x0004
+#define	FP_EX_ALL	(FP_EX_DIVZERO | FP_EX_INEXACT | \
+    FP_EX_INVALID | FP_EX_OVERFLOW | FP_EX_UNDERFLOW)
+#endif
+#define _FP_DECL_EX \
+  unsigned int _fcsr __attribute__ ((unused)) = FP_RND_NEAREST
+/* Rounding modes.  */
+#define	FP_RND_NEAREST  0x0
+#define	FP_RND_ZERO     0x1
+/* Placeholder, hardware does not have PINF/MINF modes.  */
+#define FP_RND_PINF     0x2
+#define FP_RND_MINF     0x3
+#define FP_RND_MASK     3
+
+#define FP_INIT_ROUNDMODE _FPU_GETCW (_fcsr)
+#define FP_ROUNDMODE (_fcsr & FP_RND_MASK)
+#define FP_TRAPPING_EXCEPTIONS ((_fcsr >> FP_EX_ENABLE_SHIFT) & FP_EX_ALL)
+#define FP_HANDLE_EXCEPTIONS				\
+  do {							\
+    _fcsr &= ~(FP_EX_ALL << FP_EX_CAUSE_SHIFT);		\
+    _fcsr |= _fex | (_fex << FP_EX_CAUSE_SHIFT);	\
+    _FPU_SETCW (_fcsr);			\
   } while (0)
+#else
+#define FP_EX_INVALID (1 << 4)
+#define FP_EX_DIVZERO (1 << 3)
+#if !defined (__SH2E__)
+#define FP_EX_OVERFLOW (1 << 2)
+#define FP_EX_UNDERFLOW (1 << 1)
+#define FP_EX_INEXACT (1 << 0)
+#endif
+#endif
 
 #define _FP_TININESS_AFTER_ROUNDING 1
 
diff --git a/libgcobol/ChangeLog b/libgcobol/ChangeLog
index 6a0e961..9de1714 100644
--- a/libgcobol/ChangeLog
+++ b/libgcobol/ChangeLog
@@ -1,3 +1,10 @@
+2025-04-21  Rainer Orth  <ro@CeBiTec.Uni-Bielefeld.DE>
+
+	* configure.ac: Check for struct tm.tm_zone.
+	* configure, config.h.in: Regenerate.
+	* intrinsic.cc (__gg__formatted_current_date): Guard tm.tm_zone
+	use with HAVE_STRUCT_TM_TM_ZONE.
+
 2025-04-15  Andreas Schwab  <schwab@suse.de>
 
 	* configure.tgt: Set LIBGCOBOL_SUPPORTED for riscv64-*-linux* with
diff --git a/libgcobol/config.h.in b/libgcobol/config.h.in
index 6a53279..fdf5e3e 100644
--- a/libgcobol/config.h.in
+++ b/libgcobol/config.h.in
@@ -72,6 +72,9 @@
 /* Define to 1 if you have the `strtof128' function. */
 #undef HAVE_STRTOF128
 
+/* Define to 1 if `tm_zone' is a member of `struct tm'. */
+#undef HAVE_STRUCT_TM_TM_ZONE
+
 /* Define to 1 if you have the <sys/stat.h> header file. */
 #undef HAVE_SYS_STAT_H
 
diff --git a/libgcobol/configure b/libgcobol/configure
index e83119d..6821591 100755
--- a/libgcobol/configure
+++ b/libgcobol/configure
@@ -2449,6 +2449,63 @@ $as_echo "$ac_res" >&6; }
   eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
 
 } # ac_fn_cxx_check_func
+
+# ac_fn_cxx_check_member LINENO AGGR MEMBER VAR INCLUDES
+# ------------------------------------------------------
+# Tries to find if the field MEMBER exists in type AGGR, after including
+# INCLUDES, setting cache variable VAR accordingly.
+ac_fn_cxx_check_member ()
+{
+  as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2.$3" >&5
+$as_echo_n "checking for $2.$3... " >&6; }
+if eval \${$4+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+$5
+int
+main ()
+{
+static $2 ac_aggr;
+if (ac_aggr.$3)
+return 0;
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_cxx_try_compile "$LINENO"; then :
+  eval "$4=yes"
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+$5
+int
+main ()
+{
+static $2 ac_aggr;
+if (sizeof ac_aggr.$3)
+return 0;
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_cxx_try_compile "$LINENO"; then :
+  eval "$4=yes"
+else
+  eval "$4=no"
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+eval ac_res=\$$4
+	       { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
+$as_echo "$ac_res" >&6; }
+  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
+
+} # ac_fn_cxx_check_member
 cat >config.log <<_ACEOF
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.
@@ -11693,7 +11750,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 11696 "configure"
+#line 11753 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -11799,7 +11856,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 11802 "configure"
+#line 11859 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -17434,6 +17491,19 @@ $as_echo "#define USE_IEC_60559 1" >>confdefs.h
 
 
 
+# struct tm tm_zone is a POSIX.1-2024 addition.
+ac_fn_cxx_check_member "$LINENO" "struct tm" "tm_zone" "ac_cv_member_struct_tm_tm_zone" "#include <time.h>
+"
+if test "x$ac_cv_member_struct_tm_tm_zone" = xyes; then :
+
+cat >>confdefs.h <<_ACEOF
+#define HAVE_STRUCT_TM_TM_ZONE 1
+_ACEOF
+
+
+fi
+
+
 if test "${multilib}" = "yes"; then
   multilib_arg="--enable-multilib"
 else
diff --git a/libgcobol/configure.ac b/libgcobol/configure.ac
index a1e9513..4bb6905 100644
--- a/libgcobol/configure.ac
+++ b/libgcobol/configure.ac
@@ -231,6 +231,9 @@ elif test "${ENABLE_LIBQUADMATH_SUPPORT}" = "default" ; then
 fi
 LIBGCOBOL_CHECK_FLOAT128
 
+# struct tm tm_zone is a POSIX.1-2024 addition.
+AC_CHECK_MEMBERS([struct tm.tm_zone],,,[#include <time.h>])
+
 if test "${multilib}" = "yes"; then
   multilib_arg="--enable-multilib"
 else
diff --git a/libgcobol/intrinsic.cc b/libgcobol/intrinsic.cc
index 181b053..97f2bdc 100644
--- a/libgcobol/intrinsic.cc
+++ b/libgcobol/intrinsic.cc
@@ -1482,7 +1482,9 @@ __gg__formatted_current_date( cblc_field_t *dest, // Destination string
   __gg__clock_gettime(CLOCK_REALTIME, &ts);
 
   struct tm tm = {};
+#ifdef HAVE_STRUCT_TM_TM_ZONE
   tm.tm_zone = "GMT";
+#endif
   if( is_zulu )
     {
     gmtime_r(&ts.tv_sec, &tm);