29 files changed, 677 insertions, 107 deletions
diff --git a/gcc/config/i386/cygming.h b/gcc/config/i386/cygming.h
index d587d25..743cc38 100644
--- a/gcc/config/i386/cygming.h
+++ b/gcc/config/i386/cygming.h
@@ -28,16 +28,15 @@ along with GCC; see the file COPYING3.  If not see
 #undef TARGET_SEH
 #define TARGET_SEH  (TARGET_64BIT_MS_ABI && flag_unwind_tables)
 
+#undef PREFERRED_STACK_BOUNDARY_DEFAULT
+#define PREFERRED_STACK_BOUNDARY_DEFAULT \
+  (TARGET_64BIT ? 128 : MIN_STACK_BOUNDARY)
+
 /* Win64 with SEH cannot represent DRAP stack frames.  Disable its use.
    Force the use of different mechanisms to allocate aligned local data.  */
 #undef MAX_STACK_ALIGNMENT
 #define MAX_STACK_ALIGNMENT  (TARGET_SEH ? 128 : MAX_OFILE_ALIGNMENT)
 
-/* 32-bit Windows aligns the stack on a 4-byte boundary but SSE instructions
-   may require 16-byte alignment.  */
-#undef STACK_REALIGN_DEFAULT
-#define STACK_REALIGN_DEFAULT (TARGET_64BIT ? 0 : 1)
-
 /* Support hooks for SEH.  */
 #undef  TARGET_ASM_UNWIND_EMIT
 #define TARGET_ASM_UNWIND_EMIT  i386_pe_seh_unwind_emit
diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc
index 1ba5ac4..54b3f6d 100644
--- a/gcc/config/i386/i386-features.cc
+++ b/gcc/config/i386/i386-features.cc
@@ -518,15 +518,17 @@ scalar_chain::build (bitmap candidates, unsigned insn_uid, bitmap disallowed)
    instead of using a scalar one.  */
 
 int
-general_scalar_chain::vector_const_cost (rtx exp)
+general_scalar_chain::vector_const_cost (rtx exp, basic_block bb)
 {
   gcc_assert (CONST_INT_P (exp));
 
   if (standard_sse_constant_p (exp, vmode))
     return ix86_cost->sse_op;
+  if (optimize_bb_for_size_p (bb))
+    return COSTS_N_BYTES (8);
   /* We have separate costs for SImode and DImode, use SImode costs
      for smaller modes.  */
-  return ix86_cost->sse_load[smode == DImode ? 1 : 0];
+  return COSTS_N_INSNS (ix86_cost->sse_load[smode == DImode ? 1 : 0]) / 2;
 }
 
 /* Compute a gain for chain conversion.  */
@@ -547,7 +549,7 @@ general_scalar_chain::compute_convert_gain ()
      smaller modes than SImode the int load/store costs need to be
      adjusted as well.  */
   unsigned sse_cost_idx = smode == DImode ? 1 : 0;
-  unsigned m = smode == DImode ? (TARGET_64BIT ? 1 : 2) : 1;
+  int m = smode == DImode ? (TARGET_64BIT ? 1 : 2) : 1;
 
   EXECUTE_IF_SET_IN_BITMAP (insns, 0, insn_uid, bi)
     {
@@ -555,26 +557,55 @@ general_scalar_chain::compute_convert_gain ()
       rtx def_set = single_set (insn);
       rtx src = SET_SRC (def_set);
       rtx dst = SET_DEST (def_set);
+      basic_block bb = BLOCK_FOR_INSN (insn);
       int igain = 0;
 
       if (REG_P (src) && REG_P (dst))
-	igain += 2 * m - ix86_cost->xmm_move;
+	{
+	  if (optimize_bb_for_size_p (bb))
+	    /* reg-reg move is 2 bytes, while SSE 3.  */
+	    igain += COSTS_N_BYTES (2 * m - 3);
+	  else
+	    /* Move costs are normalized to reg-reg move having cost 2.  */
+	    igain += COSTS_N_INSNS (2 * m - ix86_cost->xmm_move) / 2;
+	}
       else if (REG_P (src) && MEM_P (dst))
-	igain
-	  += m * ix86_cost->int_store[2] - ix86_cost->sse_store[sse_cost_idx];
+	{
+	  if (optimize_bb_for_size_p (bb))
+	    /* Integer load/store is 3+ bytes and SSE 4+.  */
+	    igain += COSTS_N_BYTES (3 * m - 4);
+	  else
+	    igain
+	      += COSTS_N_INSNS (m * ix86_cost->int_store[2]
+				- ix86_cost->sse_store[sse_cost_idx]) / 2;
+	}
       else if (MEM_P (src) && REG_P (dst))
-	igain += m * ix86_cost->int_load[2] - ix86_cost->sse_load[sse_cost_idx];
+	{
+	  if (optimize_bb_for_size_p (bb))
+	    igain += COSTS_N_BYTES (3 * m - 4);
+	  else
+	    igain += COSTS_N_INSNS (m * ix86_cost->int_load[2]
+				    - ix86_cost->sse_load[sse_cost_idx]) / 2;
+	}
       else
 	{
 	  /* For operations on memory operands, include the overhead
 	     of explicit load and store instructions.  */
 	  if (MEM_P (dst))
-	    igain += optimize_insn_for_size_p ()
-		     ? -COSTS_N_BYTES (8)
-		     : (m * (ix86_cost->int_load[2]
-			     + ix86_cost->int_store[2])
-			- (ix86_cost->sse_load[sse_cost_idx] +
-			   ix86_cost->sse_store[sse_cost_idx]));
+	    {
+	      if (optimize_bb_for_size_p (bb))
+		/* ??? This probably should account size difference
+		   of SSE and integer load rather than full SSE load.  */
+		igain -= COSTS_N_BYTES (8);
+	      else
+		{
+		  int cost = (m * (ix86_cost->int_load[2]
+				   + ix86_cost->int_store[2])
+			     - (ix86_cost->sse_load[sse_cost_idx] +
+				ix86_cost->sse_store[sse_cost_idx]));
+		  igain += COSTS_N_INSNS (cost) / 2;
+		}
+	    }
 
 	  switch (GET_CODE (src))
 	    {
@@ -595,7 +626,7 @@ general_scalar_chain::compute_convert_gain ()
 	      igain += ix86_cost->shift_const - ix86_cost->sse_op;
 
 	      if (CONST_INT_P (XEXP (src, 0)))
-		igain -= vector_const_cost (XEXP (src, 0));
+		igain -= vector_const_cost (XEXP (src, 0), bb);
 	      break;
 
 	    case ROTATE:
@@ -631,16 +662,17 @@ general_scalar_chain::compute_convert_gain ()
 		igain += m * ix86_cost->add;
 
 	      if (CONST_INT_P (XEXP (src, 0)))
-		igain -= vector_const_cost (XEXP (src, 0));
+		igain -= vector_const_cost (XEXP (src, 0), bb);
 	      if (CONST_INT_P (XEXP (src, 1)))
-		igain -= vector_const_cost (XEXP (src, 1));
+		igain -= vector_const_cost (XEXP (src, 1), bb);
 	      if (MEM_P (XEXP (src, 1)))
 		{
-		  if (optimize_insn_for_size_p ())
+		  if (optimize_bb_for_size_p (bb))
 		    igain -= COSTS_N_BYTES (m == 2 ? 3 : 5);
 		  else
-		    igain += m * ix86_cost->int_load[2]
-			     - ix86_cost->sse_load[sse_cost_idx];
+		    igain += COSTS_N_INSNS
+			       (m * ix86_cost->int_load[2]
+				 - ix86_cost->sse_load[sse_cost_idx]) / 2;
 		}
 	      break;
 
@@ -698,7 +730,7 @@ general_scalar_chain::compute_convert_gain ()
 	    case CONST_INT:
 	      if (REG_P (dst))
 		{
-		  if (optimize_insn_for_size_p ())
+		  if (optimize_bb_for_size_p (bb))
 		    {
 		      /* xor (2 bytes) vs. xorps (3 bytes).  */
 		      if (src == const0_rtx)
@@ -722,14 +754,14 @@ general_scalar_chain::compute_convert_gain ()
 		      /* DImode can be immediate for TARGET_64BIT
 			 and SImode always.  */
 		      igain += m * COSTS_N_INSNS (1);
-		      igain -= vector_const_cost (src);
+		      igain -= vector_const_cost (src, bb);
 		    }
 		}
 	      else if (MEM_P (dst))
 		{
 		  igain += (m * ix86_cost->int_store[2]
 			    - ix86_cost->sse_store[sse_cost_idx]);
-		  igain -= vector_const_cost (src);
+		  igain -= vector_const_cost (src, bb);
 		}
 	      break;
 
@@ -737,13 +769,14 @@ general_scalar_chain::compute_convert_gain ()
 	      if (XVECEXP (XEXP (src, 1), 0, 0) == const0_rtx)
 		{
 		  // movd (4 bytes) replaced with movdqa (4 bytes).
-		  if (!optimize_insn_for_size_p ())
-		    igain += ix86_cost->sse_to_integer - ix86_cost->xmm_move;
+		  if (!optimize_bb_for_size_p (bb))
+		    igain += COSTS_N_INSNS (ix86_cost->sse_to_integer
+					    - ix86_cost->xmm_move) / 2;
 		}
 	      else
 		{
 		  // pshufd; movd replaced with pshufd.
-		  if (optimize_insn_for_size_p ())
+		  if (optimize_bb_for_size_p (bb))
 		    igain += COSTS_N_BYTES (4);
 		  else
 		    igain += ix86_cost->sse_to_integer;
@@ -769,11 +802,11 @@ general_scalar_chain::compute_convert_gain ()
   /* Cost the integer to sse and sse to integer moves.  */
   if (!optimize_function_for_size_p (cfun))
     {
-      cost += n_sse_to_integer * ix86_cost->sse_to_integer;
+      cost += n_sse_to_integer * COSTS_N_INSNS (ix86_cost->sse_to_integer) / 2;
       /* ???  integer_to_sse but we only have that in the RA cost table.
 	      Assume sse_to_integer/integer_to_sse are the same which they
 	      are at the moment.  */
-      cost += n_integer_to_sse * ix86_cost->sse_to_integer;
+      cost += n_integer_to_sse * COSTS_N_INSNS (ix86_cost->integer_to_sse) / 2;
     }
   else if (TARGET_64BIT || smode == SImode)
     {
@@ -1508,13 +1541,13 @@ general_scalar_chain::convert_insn (rtx_insn *insn)
    with numerous special cases.  */
 
 static int
-timode_immed_const_gain (rtx cst)
+timode_immed_const_gain (rtx cst, basic_block bb)
 {
   /* movabsq vs. movabsq+vmovq+vunpacklqdq.  */
   if (CONST_WIDE_INT_P (cst)
       && CONST_WIDE_INT_NUNITS (cst) == 2
       && CONST_WIDE_INT_ELT (cst, 0) == CONST_WIDE_INT_ELT (cst, 1))
-    return optimize_insn_for_size_p () ? -COSTS_N_BYTES (9)
+    return optimize_bb_for_size_p (bb) ? -COSTS_N_BYTES (9)
 				       : -COSTS_N_INSNS (2);
   /* 2x movabsq ~ vmovdqa.  */
   return 0;
@@ -1546,33 +1579,34 @@ timode_scalar_chain::compute_convert_gain ()
       rtx src = SET_SRC (def_set);
       rtx dst = SET_DEST (def_set);
       HOST_WIDE_INT op1val;
+      basic_block bb = BLOCK_FOR_INSN (insn);
       int scost, vcost;
       int igain = 0;
 
       switch (GET_CODE (src))
 	{
 	case REG:
-	  if (optimize_insn_for_size_p ())
+	  if (optimize_bb_for_size_p (bb))
 	    igain = MEM_P (dst) ? COSTS_N_BYTES (6) : COSTS_N_BYTES (3);
 	  else
 	    igain = COSTS_N_INSNS (1);
 	  break;
 
 	case MEM:
-	  igain = optimize_insn_for_size_p () ? COSTS_N_BYTES (7)
+	  igain = optimize_bb_for_size_p (bb) ? COSTS_N_BYTES (7)
 					      : COSTS_N_INSNS (1);
 	  break;
 
 	case CONST_INT:
 	  if (MEM_P (dst)
 	      && standard_sse_constant_p (src, V1TImode))
-	    igain = optimize_insn_for_size_p () ? COSTS_N_BYTES (11) : 1;
+	    igain = optimize_bb_for_size_p (bb) ? COSTS_N_BYTES (11) : 1;
 	  break;
 
 	case CONST_WIDE_INT:
 	  /* 2 x mov vs. vmovdqa.  */
 	  if (MEM_P (dst))
-	    igain = optimize_insn_for_size_p () ? COSTS_N_BYTES (3)
+	    igain = optimize_bb_for_size_p (bb) ? COSTS_N_BYTES (3)
 						: COSTS_N_INSNS (1);
 	  break;
 
@@ -1587,14 +1621,14 @@ timode_scalar_chain::compute_convert_gain ()
 	  if (!MEM_P (dst))
 	    igain = COSTS_N_INSNS (1);
 	  if (CONST_SCALAR_INT_P (XEXP (src, 1)))
-	    igain += timode_immed_const_gain (XEXP (src, 1));
+	    igain += timode_immed_const_gain (XEXP (src, 1), bb);
 	  break;
 
 	case ASHIFT:
 	case LSHIFTRT:
 	  /* See ix86_expand_v1ti_shift.  */
 	  op1val = INTVAL (XEXP (src, 1));
-	  if (optimize_insn_for_size_p ())
+	  if (optimize_bb_for_size_p (bb))
 	    {
 	      if (op1val == 64 || op1val == 65)
 		scost = COSTS_N_BYTES (5);
@@ -1628,7 +1662,7 @@ timode_scalar_chain::compute_convert_gain ()
 	case ASHIFTRT:
 	  /* See ix86_expand_v1ti_ashiftrt.  */
 	  op1val = INTVAL (XEXP (src, 1));
-	  if (optimize_insn_for_size_p ())
+	  if (optimize_bb_for_size_p (bb))
 	    {
 	      if (op1val == 64 || op1val == 127)
 		scost = COSTS_N_BYTES (7);
@@ -1706,7 +1740,7 @@ timode_scalar_chain::compute_convert_gain ()
 	case ROTATERT:
 	  /* See ix86_expand_v1ti_rotate.  */
 	  op1val = INTVAL (XEXP (src, 1));
-	  if (optimize_insn_for_size_p ())
+	  if (optimize_bb_for_size_p (bb))
 	    {
 	      scost = COSTS_N_BYTES (13);
 	      if ((op1val & 31) == 0)
@@ -1738,16 +1772,16 @@ timode_scalar_chain::compute_convert_gain ()
 	    {
 	      if (GET_CODE (XEXP (src, 0)) == AND)
 		/* and;and;or (9 bytes) vs. ptest (5 bytes).  */
-		igain = optimize_insn_for_size_p() ? COSTS_N_BYTES (4)
-						   : COSTS_N_INSNS (2);
+		igain = optimize_bb_for_size_p (bb) ? COSTS_N_BYTES (4)
+						    : COSTS_N_INSNS (2);
 	      /* or (3 bytes) vs. ptest (5 bytes).  */
-	      else if (optimize_insn_for_size_p ())
+	      else if (optimize_bb_for_size_p (bb))
 		igain = -COSTS_N_BYTES (2);
 	    }
 	  else if (XEXP (src, 1) == const1_rtx)
 	    /* and;cmp -1 (7 bytes) vs. pcmpeqd;pxor;ptest (13 bytes).  */
-	    igain = optimize_insn_for_size_p() ? -COSTS_N_BYTES (6)
-					       : -COSTS_N_INSNS (1);
+	    igain = optimize_bb_for_size_p (bb) ? -COSTS_N_BYTES (6)
+						: -COSTS_N_INSNS (1);
 	  break;
 
 	default:
diff --git a/gcc/config/i386/i386-features.h b/gcc/config/i386/i386-features.h
index 24b0c4e..7f7c0f7 100644
--- a/gcc/config/i386/i386-features.h
+++ b/gcc/config/i386/i386-features.h
@@ -188,7 +188,7 @@ class general_scalar_chain : public scalar_chain
 
  private:
   void convert_insn (rtx_insn *insn) final override;
-  int vector_const_cost (rtx exp);
+  int vector_const_cost (rtx exp, basic_block bb);
   rtx convert_rotate (enum rtx_code, rtx op0, rtx op1, rtx_insn *insn);
 };
 
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index fd36ea8..9c24a92 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -7942,6 +7942,15 @@ ix86_update_stack_boundary (void)
   if (ix86_tls_descriptor_calls_expanded_in_cfun
       && crtl->preferred_stack_boundary < 128)
     crtl->preferred_stack_boundary = 128;
+
+  /* For 32-bit MS ABI, both the incoming and preferred stack boundaries
+     are 32 bits, but if force_align_arg_pointer is specified, it should
+     prefer 128 bits for a backward-compatibility reason, which is also
+     what the doc suggests.  */
+  if (lookup_attribute ("force_align_arg_pointer",
+			TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl)))
+      && crtl->preferred_stack_boundary < 128)
+    crtl->preferred_stack_boundary = 128;
 }
 
 /* Handle the TARGET_GET_DRAP_RTX hook.  Return NULL if no DRAP is
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 6a38de3..18fa97a 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -179,6 +179,7 @@ struct processor_costs {
   const int xmm_move, ymm_move, /* cost of moving XMM and YMM register.  */
 	    zmm_move;
   const int sse_to_integer;	/* cost of moving SSE register to integer.  */
+  const int integer_to_sse;	/* cost of moving integer register to SSE. */
   const int gather_static, gather_per_elt; /* Cost of gather load is computed
 				   as static + per_item * nelts. */
   const int scatter_static, scatter_per_elt; /* Cost of gather store is
diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h
index 6cce70a..e509129 100644
--- a/gcc/config/i386/x86-tune-costs.h
+++ b/gcc/config/i386/x86-tune-costs.h
@@ -107,6 +107,7 @@ struct processor_costs ix86_size_cost = {/* costs for tuning for size */
 					   in 128bit, 256bit and 512bit */
   4, 4, 6,				/* cost of moving XMM,YMM,ZMM register */
   4,					/* cost of moving SSE register to integer.  */
+  4,					/* cost of moving integer register to SSE.  */
   COSTS_N_BYTES (5), 0,			/* Gather load static, per_elt.  */
   COSTS_N_BYTES (5), 0,			/* Gather store static, per_elt.  */
   0,					/* size of l1 cache  */
@@ -227,6 +228,7 @@ struct processor_costs i386_cost = {	/* 386 specific costs */
   {4, 8, 16, 32, 64},			/* cost of unaligned stores.  */
   2, 4, 8,				/* cost of moving XMM,YMM,ZMM register */
   3,					/* cost of moving SSE register to integer.  */
+  3,					/* cost of moving integer register to SSE.  */
   4, 4,					/* Gather load static, per_elt.  */
   4, 4,					/* Gather store static, per_elt.  */
   0,					/* size of l1 cache  */
@@ -345,6 +347,7 @@ struct processor_costs i486_cost = {	/* 486 specific costs */
   {4, 8, 16, 32, 64},			/* cost of unaligned stores.  */
   2, 4, 8,				/* cost of moving XMM,YMM,ZMM register */
   3,					/* cost of moving SSE register to integer.  */
+  3,					/* cost of moving integer register to SSE.  */
   4, 4,					/* Gather load static, per_elt.  */
   4, 4,					/* Gather store static, per_elt.  */
   4,					/* size of l1 cache.  486 has 8kB cache
@@ -465,6 +468,7 @@ struct processor_costs pentium_cost = {
   {4, 8, 16, 32, 64},			/* cost of unaligned stores.  */
   2, 4, 8,				/* cost of moving XMM,YMM,ZMM register */
   3,					/* cost of moving SSE register to integer.  */
+  3,					/* cost of moving integer register to SSE.  */
   4, 4,					/* Gather load static, per_elt.  */
   4, 4,					/* Gather store static, per_elt.  */
   8,					/* size of l1 cache.  */
@@ -576,6 +580,7 @@ struct processor_costs lakemont_cost = {
   {4, 8, 16, 32, 64},			/* cost of unaligned stores.  */
   2, 4, 8,				/* cost of moving XMM,YMM,ZMM register */
   3,					/* cost of moving SSE register to integer.  */
+  3,					/* cost of moving integer register to SSE.  */
   4, 4,					/* Gather load static, per_elt.  */
   4, 4,					/* Gather store static, per_elt.  */
   8,					/* size of l1 cache.  */
@@ -702,6 +707,7 @@ struct processor_costs pentiumpro_cost = {
   {4, 8, 16, 32, 64},			/* cost of unaligned stores.  */
   2, 4, 8,				/* cost of moving XMM,YMM,ZMM register */
   3,					/* cost of moving SSE register to integer.  */
+  3,					/* cost of moving integer register to SSE.  */
   4, 4,					/* Gather load static, per_elt.  */
   4, 4,					/* Gather store static, per_elt.  */
   8,					/* size of l1 cache.  */
@@ -819,6 +825,7 @@ struct processor_costs geode_cost = {
   {2, 2, 8, 16, 32},			/* cost of unaligned stores.  */
   2, 4, 8,				/* cost of moving XMM,YMM,ZMM register */
   6,					/* cost of moving SSE register to integer.  */
+  6,					/* cost of moving integer register to SSE.  */
   2, 2,					/* Gather load static, per_elt.  */
   2, 2,					/* Gather store static, per_elt.  */
   64,					/* size of l1 cache.  */
@@ -936,6 +943,7 @@ struct processor_costs k6_cost = {
   {2, 2, 8, 16, 32},			/* cost of unaligned stores.  */
   2, 4, 8,				/* cost of moving XMM,YMM,ZMM register */
   6,					/* cost of moving SSE register to integer.  */
+  6,					/* cost of moving integer register to SSE.  */
   2, 2,					/* Gather load static, per_elt.  */
   2, 2,					/* Gather store static, per_elt.  */
   32,					/* size of l1 cache.  */
@@ -1059,6 +1067,7 @@ struct processor_costs athlon_cost = {
   {4, 4, 10, 10, 20},			/* cost of unaligned stores.  */
   2, 4, 8,				/* cost of moving XMM,YMM,ZMM register */
   5,					/* cost of moving SSE register to integer.  */
+  5,					/* cost of moving integer register to SSE.  */
   4, 4,					/* Gather load static, per_elt.  */
   4, 4,					/* Gather store static, per_elt.  */
   64,					/* size of l1 cache.  */
@@ -1184,6 +1193,7 @@ struct processor_costs k8_cost = {
   {4, 4, 10, 10, 20},			/* cost of unaligned stores.  */
   2, 4, 8,				/* cost of moving XMM,YMM,ZMM register */
   5,					/* cost of moving SSE register to integer.  */
+  5,					/* cost of moving integer register to SSE.  */
   4, 4,					/* Gather load static, per_elt.  */
   4, 4,					/* Gather store static, per_elt.  */
   64,					/* size of l1 cache.  */
@@ -1322,6 +1332,7 @@ struct processor_costs amdfam10_cost = {
   {4, 4, 5, 10, 20},			/* cost of unaligned stores.  */
   2, 4, 8,				/* cost of moving XMM,YMM,ZMM register */
   3,					/* cost of moving SSE register to integer.  */
+  3,					/* cost of moving integer register to SSE.  */
   4, 4,					/* Gather load static, per_elt.  */
   4, 4,					/* Gather store static, per_elt.  */
   64,					/* size of l1 cache.  */
@@ -1452,6 +1463,7 @@ const struct processor_costs bdver_cost = {
   {10, 10, 10, 40, 60},			/* cost of unaligned stores.  */
   2, 4, 8,				/* cost of moving XMM,YMM,ZMM register */
   16,					/* cost of moving SSE register to integer.  */
+  16,					/* cost of moving integer register to SSE.  */
   12, 12,				/* Gather load static, per_elt.  */
   10, 10,				/* Gather store static, per_elt.  */
   16,					/* size of l1 cache.  */
@@ -1603,6 +1615,7 @@ struct processor_costs znver1_cost = {
   {8, 8, 8, 16, 32},			/* cost of unaligned stores.  */
   2, 3, 6,				/* cost of moving XMM,YMM,ZMM register.  */
   6,					/* cost of moving SSE register to integer.  */
+  6,					/* cost of moving integer register to SSE.  */
   /* VGATHERDPD is 23 uops and throughput is 9, VGATHERDPD is 35 uops,
      throughput 12.  Approx 9 uops do not depend on vector size and every load
      is 7 uops.  */
@@ -1770,6 +1783,7 @@ struct processor_costs znver2_cost = {
   2, 2, 3,				/* cost of moving XMM,YMM,ZMM
 					   register.  */
   6,					/* cost of moving SSE register to integer.  */
+  6,					/* cost of moving integer register to SSE.  */
   /* VGATHERDPD is 23 uops and throughput is 9, VGATHERDPD is 35 uops,
      throughput 12.  Approx 9 uops do not depend on vector size and every load
      is 7 uops.  */
@@ -1912,6 +1926,7 @@ struct processor_costs znver3_cost = {
   2, 2, 3,				/* cost of moving XMM,YMM,ZMM
 					   register.  */
   6,					/* cost of moving SSE register to integer.  */
+  6,					/* cost of moving integer register to SSE.  */
   /* VGATHERDPD is 15 uops and throughput is 4, VGATHERDPS is 23 uops,
      throughput 9.  Approx 7 uops do not depend on vector size and every load
      is 4 uops.  */
@@ -2056,6 +2071,7 @@ struct processor_costs znver4_cost = {
   2, 2, 2,				/* cost of moving XMM,YMM,ZMM
 					   register.  */
   6,					/* cost of moving SSE register to integer.  */
+  6,					/* cost of moving integer register to SSE.  */
   /* VGATHERDPD is 17 uops and throughput is 4, VGATHERDPS is 24 uops,
      throughput 5.  Approx 7 uops do not depend on vector size and every load
      is 5 uops.  */
@@ -2204,6 +2220,7 @@ struct processor_costs znver5_cost = {
   2, 2, 2,				/* cost of moving XMM,YMM,ZMM
 					   register.  */
   6,					/* cost of moving SSE register to integer.  */
+  6,					/* cost of moving integer register to SSE.  */
 
   /* TODO: gather and scatter instructions are currently disabled in
      x86-tune.def.  In some cases they are however a win, see PR116582
@@ -2372,6 +2389,7 @@ struct processor_costs skylake_cost = {
   {8, 8, 8, 8, 16},			/* cost of unaligned stores.  */
   2, 2, 4,				/* cost of moving XMM,YMM,ZMM register */
   6,					/* cost of moving SSE register to integer.  */
+  6,					/* cost of moving integer register to SSE.  */
   20, 8,				/* Gather load static, per_elt.  */
   22, 10,				/* Gather store static, per_elt.  */
   64,					/* size of l1 cache.  */
@@ -2508,6 +2526,7 @@ struct processor_costs icelake_cost = {
   {8, 8, 8, 8, 16},			/* cost of unaligned stores.  */
   2, 2, 4,				/* cost of moving XMM,YMM,ZMM register */
   6,					/* cost of moving SSE register to integer.  */
+  6,					/* cost of moving integer register to SSE.  */
   20, 8,				/* Gather load static, per_elt.  */
   22, 10,				/* Gather store static, per_elt.  */
   64,					/* size of l1 cache.  */
@@ -2638,6 +2657,7 @@ struct processor_costs alderlake_cost = {
   {8, 8, 8, 10, 15},			/* cost of unaligned storess.  */
   2, 3, 4,				/* cost of moving XMM,YMM,ZMM register */
   6,					/* cost of moving SSE register to integer.  */
+  6,					/* cost of moving integer register to SSE.  */
   18, 6,				/* Gather load static, per_elt.  */
   18, 6,				/* Gather store static, per_elt.  */
   32,					/* size of l1 cache.  */
@@ -2761,6 +2781,7 @@ const struct processor_costs btver1_cost = {
   {10, 10, 12, 48, 96},			/* cost of unaligned stores.  */
   2, 4, 8,				/* cost of moving XMM,YMM,ZMM register */
   14,					/* cost of moving SSE register to integer.  */
+  14,					/* cost of moving integer register to SSE.  */
   10, 10,				/* Gather load static, per_elt.  */
   10, 10,				/* Gather store static, per_elt.  */
   32,					/* size of l1 cache.  */
@@ -2881,6 +2902,7 @@ const struct processor_costs btver2_cost = {
   {10, 10, 12, 48, 96},			/* cost of unaligned stores.  */
   2, 4, 8,				/* cost of moving XMM,YMM,ZMM register */
   14,					/* cost of moving SSE register to integer.  */
+  14,					/* cost of moving integer register to SSE.  */
   10, 10,				/* Gather load static, per_elt.  */
   10, 10,				/* Gather store static, per_elt.  */
   32,					/* size of l1 cache.  */
@@ -3000,6 +3022,7 @@ struct processor_costs pentium4_cost = {
   {32, 32, 32, 64, 128},		/* cost of unaligned stores.  */
   12, 24, 48,				/* cost of moving XMM,YMM,ZMM register */
   20,					/* cost of moving SSE register to integer.  */
+  20,					/* cost of moving integer register to SSE.  */
   16, 16,				/* Gather load static, per_elt.  */
   16, 16,				/* Gather store static, per_elt.  */
   8,					/* size of l1 cache.  */
@@ -3122,6 +3145,7 @@ struct processor_costs nocona_cost = {
   {24, 24, 24, 48, 96},			/* cost of unaligned stores.  */
   6, 12, 24,				/* cost of moving XMM,YMM,ZMM register */
   20,					/* cost of moving SSE register to integer.  */
+  20,					/* cost of moving integer register to SSE.  */
   12, 12,				/* Gather load static, per_elt.  */
   12, 12,				/* Gather store static, per_elt.  */
   8,					/* size of l1 cache.  */
@@ -3242,6 +3266,7 @@ struct processor_costs atom_cost = {
   {16, 16, 16, 32, 64},			/* cost of unaligned stores.  */
   2, 4, 8,				/* cost of moving XMM,YMM,ZMM register */
   8,					/* cost of moving SSE register to integer.  */
+  8,					/* cost of moving integer register to SSE.  */
   8, 8,					/* Gather load static, per_elt.  */
   8, 8,					/* Gather store static, per_elt.  */
   32,					/* size of l1 cache.  */
@@ -3362,6 +3387,7 @@ struct processor_costs slm_cost = {
   {16, 16, 16, 32, 64},			/* cost of unaligned stores.  */
   2, 4, 8,				/* cost of moving XMM,YMM,ZMM register */
   8,					/* cost of moving SSE register to integer.  */
+  8,					/* cost of moving integer register to SSE.  */
   8, 8,					/* Gather load static, per_elt.  */
   8, 8,					/* Gather store static, per_elt.  */
   32,					/* size of l1 cache.  */
@@ -3494,6 +3520,7 @@ struct processor_costs tremont_cost = {
   {6, 6, 6, 10, 15},			/* cost of unaligned storess.  */
   2, 3, 4,				/* cost of moving XMM,YMM,ZMM register */
   6,					/* cost of moving SSE register to integer.  */
+  6,					/* cost of moving integer register to SSE.  */
   18, 6,				/* Gather load static, per_elt.  */
   18, 6,				/* Gather store static, per_elt.  */
   32,					/* size of l1 cache.  */
@@ -3616,6 +3643,7 @@ struct processor_costs intel_cost = {
   {10, 10, 10, 10, 10},			/* cost of unaligned loads.  */
   2, 2, 2,				/* cost of moving XMM,YMM,ZMM register */
   4,					/* cost of moving SSE register to integer.  */
+  4,					/* cost of moving integer register to SSE.  */
   6, 6,					/* Gather load static, per_elt.  */
   6, 6,					/* Gather store static, per_elt.  */
   32,					/* size of l1 cache.  */
@@ -3731,15 +3759,16 @@ struct processor_costs lujiazui_cost = {
   {6, 6, 6},				/* cost of loading integer registers
 					   in QImode, HImode and SImode.
 					   Relative to reg-reg move (2).  */
-  {6, 6, 6},			/* cost of storing integer registers.  */
+  {6, 6, 6},				/* cost of storing integer registers.  */
   {6, 6, 6, 10, 15},			/* cost of loading SSE register
-				in 32bit, 64bit, 128bit, 256bit and 512bit.  */
+					   in 32bit, 64bit, 128bit, 256bit and 512bit.  */
   {6, 6, 6, 10, 15},			/* cost of storing SSE register
-				in 32bit, 64bit, 128bit, 256bit and 512bit.  */
+					   in 32bit, 64bit, 128bit, 256bit and 512bit.  */
   {6, 6, 6, 10, 15},			/* cost of unaligned loads.  */
   {6, 6, 6, 10, 15},			/* cost of unaligned storess.  */
-  2, 3, 4,			/* cost of moving XMM,YMM,ZMM register.  */
-  6,				/* cost of moving SSE register to integer.  */
+  2, 3, 4,				/* cost of moving XMM,YMM,ZMM register.  */
+  6,					/* cost of moving SSE register to integer.  */
+  6,					/* cost of moving integer register to SSE.  */
   18, 6,				/* Gather load static, per_elt.  */
   18, 6,				/* Gather store static, per_elt.  */
   32,				  	/* size of l1 cache.  */
@@ -3864,6 +3893,7 @@ struct processor_costs yongfeng_cost = {
   {8, 8, 8, 12, 15},			/* cost of unaligned storess.  */
   2, 3, 4,			/* cost of moving XMM,YMM,ZMM register.  */
   8,				/* cost of moving SSE register to integer.  */
+  8,					/* cost of moving integer register to SSE.  */
   18, 6,				/* Gather load static, per_elt.  */
   18, 6,				/* Gather store static, per_elt.  */
   32,				  	/* size of l1 cache.  */
@@ -3987,6 +4017,7 @@ struct processor_costs shijidadao_cost = {
   {8, 8, 8, 12, 15},			/* cost of unaligned storess.  */
   2, 3, 4,			/* cost of moving XMM,YMM,ZMM register.  */
   8,				/* cost of moving SSE register to integer.  */
+  8,					/* cost of moving integer register to SSE.  */
   18, 6,				/* Gather load static, per_elt.  */
   18, 6,				/* Gather store static, per_elt.  */
   32,				  	/* size of l1 cache.  */
@@ -4116,6 +4147,7 @@ struct processor_costs generic_cost = {
   {6, 6, 6, 10, 15},			/* cost of unaligned storess.  */
   2, 3, 4,				/* cost of moving XMM,YMM,ZMM register */
   6,					/* cost of moving SSE register to integer.  */
+  6,					/* cost of moving integer register to SSE.  */
   18, 6,				/* Gather load static, per_elt.  */
   18, 6,				/* Gather store static, per_elt.  */
   32,					/* size of l1 cache.  */
@@ -4249,6 +4281,7 @@ struct processor_costs core_cost = {
   {6, 6, 6, 6, 12},			/* cost of unaligned stores.  */
   2, 2, 4,				/* cost of moving XMM,YMM,ZMM register */
   2,					/* cost of moving SSE register to integer.  */
+  2,					/* cost of moving integer register to SSE.  */
   /* VGATHERDPD is 7 uops, rec throughput 5, while VGATHERDPD is 9 uops,
      rec. throughput 6.
      So 5 uops statically and one uops per load.  */
diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md
index 95df533..c226c39 100644
--- a/gcc/config/riscv/bitmanip.md
+++ b/gcc/config/riscv/bitmanip.md
@@ -1222,7 +1222,7 @@
      we can't keep it in 64 bit variable.)
      then use clmul instruction to implement the CRC,
      otherwise (TARGET_ZBKB) generate table based using brev.  */
-  if ((TARGET_ZBKC || TARGET_ZBC) && <ANYI:MODE>mode < word_mode)
+  if ((TARGET_ZBKC || TARGET_ZBC || TARGET_ZVBC) && <ANYI:MODE>mode < word_mode)
     expand_reversed_crc_using_clmul (<ANYI:MODE>mode, <ANYI1:MODE>mode,
 				     operands);
   else if (TARGET_ZBKB)
@@ -1254,7 +1254,8 @@
 		      (match_operand:SUBX 3)]
 		      UNSPEC_CRC))]
   /* We don't support the case when data's size is bigger than CRC's size.  */
-  "(TARGET_ZBKC || TARGET_ZBC) && <SUBX:MODE>mode >= <SUBX1:MODE>mode"
+  "(TARGET_ZBKC || TARGET_ZBC || TARGET_ZVBC)
+   && <SUBX:MODE>mode >= <SUBX1:MODE>mode"
 {
   /* If we have the ZBC or ZBKC extension (ie, clmul) and
      it is possible to store the quotient within a single variable
diff --git a/gcc/config/riscv/iterators.md b/gcc/config/riscv/iterators.md
index 214c20b..584b345 100644
--- a/gcc/config/riscv/iterators.md
+++ b/gcc/config/riscv/iterators.md
@@ -262,6 +262,9 @@
 
 (define_code_attr fix_uns [(fix "fix") (unsigned_fix "fixuns")])
 
+(define_code_attr OPTAB [(ior "IOR")
+                         (xor "XOR")])
+
 
 ;; -------------------------------------------------------------------
 ;; Code Attributes
diff --git a/gcc/config/riscv/predicates.md b/gcc/config/riscv/predicates.md
index c9a638c..23690792 100644
--- a/gcc/config/riscv/predicates.md
+++ b/gcc/config/riscv/predicates.md
@@ -380,14 +380,6 @@
   (and (match_code "const_int")
        (match_test "SINGLE_BIT_MASK_OPERAND (UINTVAL (op))")))
 
-;; Register, small constant or single bit constant for use in
-;; bseti/binvi.
-(define_predicate "arith_or_zbs_operand"
-  (ior (match_operand 0 "const_arith_operand")
-       (match_operand 0 "register_operand")
-       (and (match_test "TARGET_ZBS")
-	    (match_operand 0 "single_bit_mask_operand"))))
-
 (define_predicate "not_single_bit_mask_operand"
   (and (match_code "const_int")
        (match_test "SINGLE_BIT_MASK_OPERAND (~UINTVAL (op))")))
@@ -689,3 +681,7 @@
 (define_predicate "bitpos_mask_operand"
   (and (match_code "const_int")
        (match_test "TARGET_64BIT ? INTVAL (op) == 63 : INTVAL (op) == 31")))
+
+(define_predicate "reg_or_const_int_operand"
+  (ior (match_operand 0 "const_int_operand")
+       (match_operand 0 "register_operand")))
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index b0d5bbb..271a9a3 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -140,6 +140,7 @@ extern void riscv_expand_sssub (rtx, rtx, rtx);
 extern void riscv_expand_ustrunc (rtx, rtx);
 extern void riscv_expand_sstrunc (rtx, rtx);
 extern int riscv_register_move_cost (machine_mode, reg_class_t, reg_class_t);
+extern bool synthesize_ior_xor (rtx_code, rtx [3]);
 
 #ifdef RTX_CODE
 extern void riscv_expand_int_scc (rtx, enum rtx_code, rtx, rtx, bool *invert_ptr = 0);
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 3ee88db..8b77a35 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -14035,17 +14035,53 @@ expand_crc_using_clmul (scalar_mode crc_mode, scalar_mode data_mode,
   rtx data = gen_rtx_ZERO_EXTEND (word_mode, operands[2]);
   riscv_expand_op (XOR, word_mode, a0, crc, data);
 
-  if (TARGET_64BIT)
-    emit_insn (gen_riscv_clmul_di (a0, a0, t0));
-  else
-    emit_insn (gen_riscv_clmul_si (a0, a0, t0));
+  if (TARGET_ZBKC || TARGET_ZBC)
+    {
+      if (TARGET_64BIT)
+	emit_insn (gen_riscv_clmul_di (a0, a0, t0));
+      else
+	emit_insn (gen_riscv_clmul_si (a0, a0, t0));
 
-  riscv_expand_op (LSHIFTRT, word_mode, a0, a0,
-		   gen_int_mode (crc_size, word_mode));
-  if (TARGET_64BIT)
-    emit_insn (gen_riscv_clmul_di (a0, a0, t1));
+      riscv_expand_op (LSHIFTRT, word_mode, a0, a0,
+		       gen_int_mode (crc_size, word_mode));
+      if (TARGET_64BIT)
+	emit_insn (gen_riscv_clmul_di (a0, a0, t1));
+      else
+	emit_insn (gen_riscv_clmul_si (a0, a0, t1));
+    }
   else
-    emit_insn (gen_riscv_clmul_si (a0, a0, t1));
+    {
+      machine_mode vmode;
+      if (!riscv_vector::get_vector_mode (DImode, 1).exists (&vmode))
+	gcc_unreachable ();
+
+      rtx vec = gen_reg_rtx (vmode);
+
+      insn_code icode1 = code_for_pred_broadcast (vmode);
+      rtx ops1[] = {vec, a0};
+      emit_nonvlmax_insn (icode1, UNARY_OP, ops1, CONST1_RTX (Pmode));
+
+      rtx rvv1di_reg = gen_rtx_SUBREG (RVVM1DImode, vec, 0);
+      insn_code icode2 = code_for_pred_vclmul_scalar (UNSPEC_VCLMUL,
+						      E_RVVM1DImode);
+      rtx ops2[] = {rvv1di_reg, rvv1di_reg, t0};
+      emit_nonvlmax_insn (icode2, riscv_vector::BINARY_OP, ops2, CONST1_RTX
+			  (Pmode));
+
+      rtx shift_amount = gen_int_mode (data_size, Pmode);
+      insn_code icode3 = code_for_pred_scalar (LSHIFTRT, vmode);
+      rtx ops3[] = {vec, vec, shift_amount};
+      emit_nonvlmax_insn (icode3, BINARY_OP, ops3, CONST1_RTX (Pmode));
+
+      insn_code icode4 = code_for_pred_vclmul_scalar (UNSPEC_VCLMULH,
+						      E_RVVM1DImode);
+      rtx ops4[] = {rvv1di_reg, rvv1di_reg, t1};
+      emit_nonvlmax_insn (icode4, riscv_vector::BINARY_OP, ops4, CONST1_RTX
+			  (Pmode));
+
+      rtx vec_low_lane = gen_lowpart (DImode, vec);
+      riscv_emit_move (a0, vec_low_lane);
+    }
 
   if (crc_size > data_size)
     {
@@ -14094,19 +14130,53 @@ expand_reversed_crc_using_clmul (scalar_mode crc_mode, scalar_mode data_mode,
   rtx a0 = gen_reg_rtx (word_mode);
   riscv_expand_op (XOR, word_mode, a0, crc, data);
 
-  if (TARGET_64BIT)
-    emit_insn (gen_riscv_clmul_di (a0, a0, t0));
-  else
-    emit_insn (gen_riscv_clmul_si (a0, a0, t0));
+  if (TARGET_ZBKC || TARGET_ZBC)
+    {
+      if (TARGET_64BIT)
+	emit_insn (gen_riscv_clmul_di (a0, a0, t0));
+      else
+	emit_insn (gen_riscv_clmul_si (a0, a0, t0));
 
-  rtx num_shift = gen_int_mode (GET_MODE_BITSIZE (word_mode) - data_size,
-				word_mode);
-  riscv_expand_op (ASHIFT, word_mode, a0, a0, num_shift);
+      rtx num_shift = gen_int_mode (BITS_PER_WORD - data_size, word_mode);
+      riscv_expand_op (ASHIFT, word_mode, a0, a0, num_shift);
 
-  if (TARGET_64BIT)
-    emit_insn (gen_riscv_clmulh_di (a0, a0, t1));
+      if (TARGET_64BIT)
+	emit_insn (gen_riscv_clmulh_di (a0, a0, t1));
+      else
+	emit_insn (gen_riscv_clmulh_si (a0, a0, t1));
+    }
   else
-    emit_insn (gen_riscv_clmulh_si (a0, a0, t1));
+    {
+      machine_mode vmode;
+      if (!riscv_vector::get_vector_mode (DImode, 1).exists (&vmode))
+	gcc_unreachable ();
+
+      rtx vec = gen_reg_rtx (vmode);
+      insn_code icode1 = code_for_pred_broadcast (vmode);
+      rtx ops1[] = {vec, a0};
+      emit_nonvlmax_insn (icode1, UNARY_OP, ops1, CONST1_RTX (Pmode));
+
+      rtx rvv1di_reg = gen_rtx_SUBREG (RVVM1DImode, vec, 0);
+      insn_code icode2 = code_for_pred_vclmul_scalar (UNSPEC_VCLMUL,
+						      E_RVVM1DImode);
+      rtx ops2[] = {rvv1di_reg, rvv1di_reg, t0};
+      emit_nonvlmax_insn (icode2, riscv_vector::BINARY_OP, ops2, CONST1_RTX
+			  (Pmode));
+
+      rtx shift_amount = gen_int_mode (BITS_PER_WORD - data_size, Pmode);
+      insn_code icode3 = code_for_pred_scalar (ASHIFT, vmode);
+      rtx ops3[] = {vec, vec, shift_amount};
+      emit_nonvlmax_insn (icode3, BINARY_OP, ops3, CONST1_RTX (Pmode));
+
+      insn_code icode4 = code_for_pred_vclmul_scalar (UNSPEC_VCLMULH,
+						      E_RVVM1DImode);
+      rtx ops4[] = {rvv1di_reg, rvv1di_reg, t1};
+      emit_nonvlmax_insn (icode4, riscv_vector::BINARY_OP, ops4, CONST1_RTX
+			  (Pmode));
+
+      rtx vec_low_lane = gen_lowpart (DImode, vec);
+      riscv_emit_move (a0, vec_low_lane);
+    }
 
   if (crc_size > data_size)
     {
@@ -14140,6 +14210,205 @@ bool need_shadow_stack_push_pop_p ()
   return is_zicfiss_p () && riscv_save_return_addr_reg_p ();
 }
 
+/* Synthesize OPERANDS[0] = OPERANDS[1] CODE OPERANDS[2].
+
+    OPERANDS[0] and OPERANDS[1] will be a REG and may be the same
+    REG.
+
+    OPERANDS[2] is a CONST_INT.
+
+    CODE is IOR or XOR.
+
+    Return TRUE if the operation was fully synthesized and the caller
+    need not generate additional code.  Return FALSE if the operation
+    was not synthesized and the caller is responsible for emitting the
+    proper sequence.  */
+
+bool
+synthesize_ior_xor (rtx_code code, rtx operands[3])
+{
+  /* Trivial cases that don't need synthesis.  */
+  if (SMALL_OPERAND (INTVAL (operands[2]))
+     || ((TARGET_ZBS || TARGET_XTHEADBS || TARGET_ZBKB)
+	 && single_bit_mask_operand (operands[2], word_mode)))
+    return false;
+
+  /* The number of instructions to synthesize the constant is a good
+     estimate of the budget.  That does not account for out of order
+     execution an fusion in the constant synthesis those would naturally
+     decrease the budget.  It also does not account for the IOR/XOR at
+     the end of the sequence which would increase the budget.  */
+  int budget = (TARGET_ZBS ? riscv_const_insns (operands[2], true) : -1);
+  int original_budget = budget;
+
+  /* Bits we need to set in operands[0].  As we synthesize the operation,
+     we clear bits in IVAL.  Once IVAL is zero, then synthesis of the
+     operation is complete.  */
+  unsigned HOST_WIDE_INT ival = INTVAL (operands[2]);
+  
+  /* Check if we want to use [x]ori. Then get the remaining bits
+     and decrease the budget by one. */
+  if ((ival & HOST_WIDE_INT_UC (0x7ff)) != 0)
+    {
+      ival &= ~HOST_WIDE_INT_UC (0x7ff);
+      budget--;
+    }
+
+  /* Check for bseti cases. For each remaining bit in ival,
+     decrease the budget by one. */
+  while (ival)
+    {
+      HOST_WIDE_INT tmpval = HOST_WIDE_INT_UC (1) << ctz_hwi (ival);
+      ival &= ~tmpval;
+      budget--;
+    }
+
+  /* If we're flipping all but a small number of bits we can pre-flip
+     the outliers, then flip all the bits, which would restore those
+     bits that were pre-flipped. */
+  if ((TARGET_ZBS || TARGET_XTHEADBS || TARGET_ZBKB)
+      && budget < 0
+      && code == XOR
+      && popcount_hwi (~INTVAL (operands[2])) < original_budget)
+    {
+      /* Pre-flipping bits we want to preserve.  */
+      rtx input = operands[1];
+      ival = ~INTVAL (operands[2]);
+      while (ival)
+	{
+	  HOST_WIDE_INT tmpval = HOST_WIDE_INT_UC (1) << ctz_hwi (ival);
+	  rtx x = GEN_INT (tmpval);
+	  x = gen_rtx_XOR (word_mode, input, x);
+	  emit_insn (gen_rtx_SET (operands[0], x));
+	  input = operands[0];
+	  ival &= ~tmpval;
+	}
+
+      /* Now flip all the bits, which restores the bits we were
+	 preserving.  */
+      rtx x = gen_rtx_NOT (word_mode, input);
+      emit_insn (gen_rtx_SET (operands[0], x));
+      return true;
+    }
+
+  /* One more approach we can try.  If our budget is 3+ instructions,
+     then we can try to rotate the source so that the bits we want to
+     set are in the low 11 bits.  We then use [x]ori to set those low
+     bits, then rotate things back into their proper place.  */
+  if ((TARGET_ZBB || TARGET_XTHEADBB || TARGET_ZBKB)
+      && budget < 0
+      && popcount_hwi (INTVAL (operands[2])) <= 11
+      && riscv_const_insns (operands[2], true) >= 3)
+    {
+      ival = INTVAL (operands[2]);
+      /* First see if the constant trivially fits into 11 bits in the LSB.  */
+      int lsb = ctz_hwi (ival);
+      int msb = BITS_PER_WORD - 1 - clz_hwi (ival);
+      if (msb - lsb + 1 <= 11)
+	{
+	  /* Rotate the source right by LSB bits.  */
+	  rtx x = GEN_INT (lsb);
+	  x = gen_rtx_ROTATERT (word_mode, operands[1], x);
+	  emit_insn (gen_rtx_SET (operands[0], x));
+
+	  /* Shift the constant right by LSB bits.  */
+	  x = GEN_INT (ival >> lsb);
+
+	  /* Perform the IOR/XOR operation.  */
+	  x = gen_rtx_fmt_ee (code, word_mode, operands[0], x);
+	  emit_insn (gen_rtx_SET (operands[0], x));
+
+	  /* And rotate left to put everything back in place, we don't
+	     have rotate left by a constant, so use rotate right by
+	     an adjusted constant.  */
+	  x = GEN_INT (BITS_PER_WORD - lsb);
+	  x = gen_rtx_ROTATERT (word_mode, operands[1], x);
+	  emit_insn (gen_rtx_SET (operands[0], x));
+	  return true;
+	}
+
+      /* Maybe the bits are split between the high and low parts
+	 of the constant.  A bit more complex, but still manageable.
+
+	 Conceptually we want to rotate left the constant by the number
+	 of leading zeros after masking off all but the low 11 bits.  */
+      int rotcount = clz_hwi (ival & 0x7ff) - (BITS_PER_WORD - 11);
+
+      /* Rotate the constant left by MSB bits.  */
+      ival = (ival << rotcount) | (ival >> (BITS_PER_WORD - rotcount));
+
+      /* Now we can do the same tests as before. */
+      lsb = ctz_hwi (ival);
+      msb = BITS_PER_WORD - clz_hwi (ival);
+      if ((INTVAL (operands[2]) & HOST_WIDE_INT_UC (0x7ff)) != 0
+	  && msb - lsb + 1 <= 11)
+	{
+	  /* Rotate the source left by ROTCOUNT bits, we don't have
+	     rotate left by a constant, so use rotate right by an
+	     adjusted constant.  */
+	  rtx x = GEN_INT (BITS_PER_WORD - rotcount);
+	  x = gen_rtx_ROTATERT (word_mode, operands[1], x);
+	  emit_insn (gen_rtx_SET (operands[0], x));
+
+	  /* We've already rotated the constant.  So perform the IOR/XOR
+	     operation.  */
+	  x = GEN_INT (ival);
+	  x = gen_rtx_fmt_ee (code, word_mode, operands[0], x);
+	  emit_insn (gen_rtx_SET (operands[0], x));
+
+	  /* And rotate right to put everything into its proper place.  */
+	  x = GEN_INT (rotcount);
+	  x = gen_rtx_ROTATERT (word_mode, operands[0], x);
+	  emit_insn (gen_rtx_SET (operands[0], x));
+	  return true;
+	}
+    }
+
+  /* If after accounting for bseti the remaining budget has 
+     gone to less than zero, it forces the value into a
+     register and performs the IOR operation.  It returns
+     TRUE to the caller so the caller knows code generation
+     is complete. */
+  if (budget < 0)
+    {
+      rtx x = force_reg (word_mode, operands[2]); 
+      x = gen_rtx_fmt_ee (code, word_mode, operands[1], x);
+      emit_insn (gen_rtx_SET (operands[0], x));
+      return true;
+    }
+
+  /* Synthesis is better than loading the constant.  */
+  ival = INTVAL (operands[2]);
+  rtx input = operands[1];
+
+  /* Emit the [x]ori insn that sets the low 11 bits into
+     the proper state.  */
+  if ((ival & HOST_WIDE_INT_UC (0x7ff)) != 0)
+    {
+      rtx x = GEN_INT (ival & HOST_WIDE_INT_UC (0x7ff));
+      x = gen_rtx_fmt_ee (code, word_mode, input, x);
+      emit_insn (gen_rtx_SET (operands[0], x));
+      input = operands[0];
+      ival &= ~HOST_WIDE_INT_UC (0x7ff);
+    }
+
+  /* We figure out a single bit as a constant and
+     generate a CONST_INT node for that.  Then we 
+     construct the IOR node, then the SET node and 
+     emit it.  An IOR with a suitable constant that is
+     a single bit will be implemented with a bseti. */
+  while (ival)
+    {
+      HOST_WIDE_INT tmpval = HOST_WIDE_INT_UC (1) << ctz_hwi (ival);
+      rtx x = GEN_INT (tmpval);
+      x = gen_rtx_fmt_ee (code, word_mode, input, x);
+      emit_insn (gen_rtx_SET (operands[0], x));
+      input = operands[0];
+      ival &= ~tmpval;
+    }
+  return true;
+}
+
 /* Initialize the GCC target structure.  */
 #undef TARGET_ASM_ALIGNED_HI_OP
 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 259997f..154b49d 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -1767,8 +1767,15 @@
 (define_expand "<optab><mode>3"
   [(set (match_operand:X 0 "register_operand")
 	(any_or:X (match_operand:X 1 "register_operand" "")
-		   (match_operand:X 2 "arith_or_zbs_operand" "")))]
-  "")
+		   (match_operand:X 2 "reg_or_const_int_operand" "")))]
+  ""
+
+{
+  /* If synthesis of the logical op is successful, then no further code
+     generation is necessary.  Else just generate code normally.  */
+  if (CONST_INT_P (operands[2]) && synthesize_ior_xor (<OPTAB>, operands))
+    DONE;
+})
 
 (define_insn "*<optab><mode>3"
   [(set (match_operand:X                0 "register_operand" "=r,r")
diff --git a/gcc/fortran/dependency.cc b/gcc/fortran/dependency.cc
index 57c0c49..aa8a57a 100644
--- a/gcc/fortran/dependency.cc
+++ b/gcc/fortran/dependency.cc
@@ -944,8 +944,12 @@ gfc_ref_needs_temporary_p (gfc_ref *ref)
 	   types), not in characters.  */
 	return subarray_p;
 
-      case REF_COMPONENT:
       case REF_INQUIRY:
+	/* Within an array reference, inquiry references of complex
+	   variables generally need a temporary.  */
+	return subarray_p;
+
+      case REF_COMPONENT:
 	break;
       }
 
diff --git a/gcc/fortran/trans-types.cc b/gcc/fortran/trans-types.cc
index 3374778..f898075 100644
--- a/gcc/fortran/trans-types.cc
+++ b/gcc/fortran/trans-types.cc
@@ -1140,11 +1140,6 @@ gfc_init_types (void)
     }
   gfc_character1_type_node = gfc_character_types[0];
 
-  /* The middle end only recognizes a single unsigned type.  For
-     compatibility of existing test cases, let's just use the
-     character type.  The reader of tree dumps is expected to be able
-     to deal with this.  */
-
   if (flag_unsigned)
     {
       for (index = 0; gfc_unsigned_kinds[index].kind != 0;++index)
@@ -1159,18 +1154,26 @@ gfc_init_types (void)
 		  break;
 		}
 	    }
-	  if (index_char > 0)
+	  if (index_char > -1)
 	    {
-	      gfc_unsigned_types[index] = gfc_character_types[index_char];
+	      type = gfc_character_types[index_char];
+	      if (TYPE_STRING_FLAG (type))
+		{
+		  type = build_distinct_type_copy (type);
+		  TYPE_CANONICAL (type)
+		    = TYPE_CANONICAL (gfc_character_types[index_char]);
+		}
+	      else
+		type = build_variant_type_copy (type);
+	      TYPE_NAME (type) = NULL_TREE;
+	      TYPE_STRING_FLAG (type) = 0;
 	    }
 	  else
-	    {
-	      type = gfc_build_unsigned_type (&gfc_unsigned_kinds[index]);
-	      gfc_unsigned_types[index] = type;
-	      snprintf (name_buf, sizeof(name_buf), "unsigned(kind=%d)",
-			gfc_integer_kinds[index].kind);
-	      PUSH_TYPE (name_buf, type);
-	    }
+	    type = gfc_build_unsigned_type (&gfc_unsigned_kinds[index]);
+	  gfc_unsigned_types[index] = type;
+	  snprintf (name_buf, sizeof(name_buf), "unsigned(kind=%d)",
+		    gfc_integer_kinds[index].kind);
+	  PUSH_TYPE (name_buf, type);
 	}
     }
 
diff --git a/gcc/testsuite/gcc.dg/pr87600.h b/gcc/testsuite/gcc.dg/pr87600.h
index af91f63..c89071eb 100644
--- a/gcc/testsuite/gcc.dg/pr87600.h
+++ b/gcc/testsuite/gcc.dg/pr87600.h
@@ -7,7 +7,7 @@
 #elif defined (__i386__)
 # define REG1 "%eax"
 # define REG2 "%edx"
-#elif defined (__powerpc__) || defined (__POWERPC__)
+#elif defined (__powerpc__) || defined (__POWERPC__) || defined (__PPC__)
 # define REG1 "r3"
 # define REG2 "r4"
 #elif defined (__s390__)
diff --git a/gcc/testsuite/gcc.dg/pr89313.c b/gcc/testsuite/gcc.dg/pr89313.c
index 76cb091..7de64da 100644
--- a/gcc/testsuite/gcc.dg/pr89313.c
+++ b/gcc/testsuite/gcc.dg/pr89313.c
@@ -8,7 +8,7 @@
 # define REG "r0"
 #elif defined (__i386__)
 # define REG "%eax"
-#elif defined (__powerpc__) || defined (__POWERPC__)
+#elif defined (__powerpc__) || defined (__POWERPC__) || defined (__PPC__)
 # define REG "r3"
 #elif defined (__s390__)
 # define REG "0"
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr120080.c b/gcc/testsuite/gcc.dg/tree-ssa/pr120080.c
new file mode 100644
index 0000000..d71ef5e
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr120080.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-fgimple -O2" } */
+
+void __GIMPLE (ssa,startwith("switchlower1"))
+foo (int b)
+{
+  __BB(2):
+  switch (b) {default: L9; case 0: L5; case 5: L5; case 101: L5; }
+
+  __BB(3):
+L9:
+  switch (b) {default: L7; case 5: L6; case 101: L6; }
+
+  __BB(4):
+L6:
+  __builtin_unreachable ();
+
+  __BB(5):
+L7:
+  __builtin_trap ();
+
+  __BB(6):
+L5:
+  return;
+
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/pr99988.c b/gcc/testsuite/gcc.target/aarch64/pr99988.c
index 7cca496..c09ce67 100644
--- a/gcc/testsuite/gcc.target/aarch64/pr99988.c
+++ b/gcc/testsuite/gcc.target/aarch64/pr99988.c
@@ -1,5 +1,5 @@
 /* { dg-do compile { target lp64 } } */
-/* { dg-options "-O2 -mbranch-protection=standard" } */
+/* { dg-options "-O2 -mbranch-protection=standard -fno-bit-tests" } */
 /* { dg-final { scan-assembler-times {bti j} 13 } } */
 int a;
 int c();
diff --git a/gcc/testsuite/gcc.target/i386/minmax-6.c b/gcc/testsuite/gcc.target/i386/minmax-6.c
index 615f919..23f61c5 100644
--- a/gcc/testsuite/gcc.target/i386/minmax-6.c
+++ b/gcc/testsuite/gcc.target/i386/minmax-6.c
@@ -15,4 +15,4 @@ UMVLine16Y_11 (short unsigned int * Pic, int y, int width)
 /* We do not want the RA to spill %esi for it's dual-use but using
    pmaxsd is OK.  */
 /* { dg-final { scan-assembler-not "rsp" { target { ! { ia32 } } } } } */
-/* { dg-final { scan-assembler "pmaxsd" } } */
+/* { dg-final { scan-assembler "pmaxsd" { xfail *-*-* } } } */
diff --git a/gcc/testsuite/gcc.target/i386/minmax-7.c b/gcc/testsuite/gcc.target/i386/minmax-7.c
index 619a939..b2cb1c2 100644
--- a/gcc/testsuite/gcc.target/i386/minmax-7.c
+++ b/gcc/testsuite/gcc.target/i386/minmax-7.c
@@ -17,4 +17,4 @@ void bar (int aleft, int axcenter)
 /* We do not want the RA to spill %esi for it's dual-use but using
    pminsd is OK.  */
 /* { dg-final { scan-assembler-not "rsp" { target { ! { ia32 } } } } } */
-/* { dg-final { scan-assembler "pminsd" } } */
+/* { dg-final { scan-assembler "pminsd" { xfail *-*-* } } } */
diff --git a/gcc/testsuite/gcc.target/riscv/ior-synthesis-1.c b/gcc/testsuite/gcc.target/riscv/ior-synthesis-1.c
new file mode 100644
index 0000000..04644cd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/ior-synthesis-1.c
@@ -0,0 +1,8 @@
+/* { dg-do compile { target { rv64 } } } */
+/* { dg-options "-march=rv64gb -mabi=lp64d" } */
+
+unsigned long foo(unsigned long src) { return src | 0x8c00000000000001; }
+
+/* { dg-final { scan-assembler-times "\\srori\t" 2 } } */
+/* { dg-final { scan-assembler-times "\\sori\t" 1 } } */
+
diff --git a/gcc/testsuite/gcc.target/riscv/ior-synthesis-2.c b/gcc/testsuite/gcc.target/riscv/ior-synthesis-2.c
new file mode 100644
index 0000000..f28fe5e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/ior-synthesis-2.c
@@ -0,0 +1,8 @@
+/* { dg-do compile { target { rv64 } } } */
+/* { dg-options "-march=rv64gb -mabi=lp64d" } */
+
+unsigned long foo(unsigned long src) { return src | 0x8800000000000007; }
+
+/* { dg-final { scan-assembler-times "\\sbseti\t" 2 } } */
+/* { dg-final { scan-assembler-times "\\sori\t" 1 } } */
+
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/crc-builtin-zvbc.c b/gcc/testsuite/gcc.target/riscv/rvv/base/crc-builtin-zvbc.c
new file mode 100644
index 0000000..2d5fa88
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/crc-builtin-zvbc.c
@@ -0,0 +1,66 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvbc -mabi=lp64d" } */
+
+#include <stdint-gcc.h>
+
+int8_t crc8_data8 ()
+{
+  return __builtin_crc8_data8 (0x34, 'a', 0x12);
+}
+
+int16_t crc16_data8 ()
+{
+  return __builtin_crc16_data8 (0x1234, 'a', 0x1021);
+}
+
+int16_t crc16_data16 ()
+{
+  return __builtin_crc16_data16 (0x1234, 0x3214, 0x1021);
+}
+
+int32_t crc32_data8 ()
+{
+  return __builtin_crc32_data8 (0xffffffff, 0x32, 0x4002123);
+}
+
+int32_t crc32_data16 ()
+{
+  return __builtin_crc32_data16 (0xffffffff, 0x3232, 0x4002123);
+}
+
+int32_t crc32_data32 ()
+{
+  return __builtin_crc32_data32 (0xffffffff, 0x123546ff, 0x4002123);
+}
+
+int8_t rev_crc8_data8 ()
+{
+  return __builtin_rev_crc8_data8 (0x34, 'a', 0x12);
+}
+
+int16_t rev_crc16_data8 ()
+{
+  return __builtin_rev_crc16_data8 (0x1234, 'a', 0x1021);
+}
+
+int16_t rev_crc16_data16 ()
+{
+  return __builtin_rev_crc16_data16 (0x1234, 0x3214, 0x1021);
+}
+
+int32_t rev_crc32_data8 ()
+{
+  return __builtin_rev_crc32_data8 (0xffffffff, 0x32, 0x4002123);
+}
+
+int32_t rev_crc32_data16 ()
+{
+  return __builtin_rev_crc32_data16 (0xffffffff, 0x3232, 0x4002123);
+}
+
+int32_t rev_crc32_data32 ()
+{
+  return __builtin_rev_crc32_data32 (0xffffffff, 0x123546ff, 0x4002123);
+}
+/* { dg-final { scan-assembler-times "vclmul.vx" 12 } } */
+/* { dg-final { scan-assembler-times "vclmulh.vx" 12 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/xor-synthesis-1.c b/gcc/testsuite/gcc.target/riscv/xor-synthesis-1.c
new file mode 100644
index 0000000..c630a79
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/xor-synthesis-1.c
@@ -0,0 +1,8 @@
+/* { dg-do compile { target { rv64 } } } */
+/* { dg-options "-march=rv64gb -mabi=lp64d" } */
+
+unsigned long foo(unsigned long src) { return src ^ 0xffffffffefffffffUL; }
+
+/* { dg-final { scan-assembler-times "\\sbinvi\t" 1 } } */
+/* { dg-final { scan-assembler-times "\\snot\t" 1 } } */
+
diff --git a/gcc/testsuite/gcc.target/riscv/xor-synthesis-2.c b/gcc/testsuite/gcc.target/riscv/xor-synthesis-2.c
new file mode 100644
index 0000000..25457d2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/xor-synthesis-2.c
@@ -0,0 +1,10 @@
+/* { dg-do compile { target { rv64 } } } */
+/* { dg-options "-march=rv64gb -mabi=lp64d" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-O1" "-Og" } } */
+
+unsigned long foo(unsigned long src) { return src ^ 0x8800000000000007; }
+
+/* xfailed until we remove mvconst_internal.  */
+/* { dg-final { scan-assembler-times "\\sbinvi\t" 2 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times "\\sxori\t" 1 { xfail *-*-* } } } */
+
diff --git a/gcc/testsuite/gcc.target/riscv/xor-synthesis-3.c b/gcc/testsuite/gcc.target/riscv/xor-synthesis-3.c
new file mode 100644
index 0000000..765904b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/xor-synthesis-3.c
@@ -0,0 +1,8 @@
+/* { dg-do compile { target { rv64 } } } */
+/* { dg-options "-march=rv64gb -mabi=lp64d" } */
+
+unsigned long foo(unsigned long src) { return src ^ 0x8c00000000000001; }
+
+/* { dg-final { scan-assembler-times "\\srori\t" 2 } } */
+/* { dg-final { scan-assembler-times "\\sxori\t" 1 } } */
+
diff --git a/gcc/testsuite/gfortran.dg/guality/pr120193.f90 b/gcc/testsuite/gfortran.dg/guality/pr120193.f90
new file mode 100644
index 0000000..e65febf
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/guality/pr120193.f90
@@ -0,0 +1,26 @@
+! PR fortran/120193
+! { dg-do run }
+! { dg-options "-g -funsigned" }
+! { dg-skip-if "" { *-*-* }  { "*" } { "-O0" } }
+
+program foo
+  unsigned(kind=1) :: a(2), e
+  unsigned(kind=2) :: b(2), f
+  unsigned(kind=4) :: c(2), g
+  unsigned(kind=8) :: d(2), h
+  character(kind=1, len=1) :: i(2), j
+  character(kind=4, len=1) :: k(2), l
+  a = 97u_1	! { dg-final { gdb-test 24 "a" "d" } }
+  b = 97u_2	! { dg-final { gdb-test 24 "b" "c" } }
+  c = 97u_4	! { dg-final { gdb-test 24 "c" "b" } }
+  d = 97u_8	! { dg-final { gdb-test 24 "d" "a" } }
+  e = 97u_1	! { dg-final { gdb-test 24 "e" "97" } }
+  f = 97u_2	! { dg-final { gdb-test 24 "f" "97" } }
+  g = 97u_4	! { dg-final { gdb-test 24 "g" "97" } }
+  h = 97u_8	! { dg-final { gdb-test 24 "h" "97" } }
+  i = 'a'	! { dg-final { gdb-test 24 "i" "('a', 'a')" } }
+  j = 'b'	! { dg-final { gdb-test 24 "j" "'b'" } }
+  k = 'c'
+  l = 'd'
+  print *, a
+end program
diff --git a/gcc/testsuite/gfortran.dg/transfer_array_subref.f90 b/gcc/testsuite/gfortran.dg/transfer_array_subref.f90
new file mode 100644
index 0000000..b480dff
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/transfer_array_subref.f90
@@ -0,0 +1,48 @@
+! { dg-do run }
+! { dg-additional-options "-O2 -fdump-tree-optimized" }
+!
+! PR fortran/102891 - passing of inquiry ref of complex array to TRANSFER
+
+program main
+  implicit none
+  integer, parameter :: dp = 8
+
+  type complex_wrap1
+     complex(dp) :: z(2)
+  end type complex_wrap1
+
+  type complex_wrap2
+     complex(dp), dimension(:), allocatable :: z
+  end type complex_wrap2
+
+  type(complex_wrap1) :: x = complex_wrap1([ (1, 2), (3, 4) ])
+  type(complex_wrap2) :: w
+
+  w%z = x%z
+
+  ! The following statements should get optimized away...
+  if (size (transfer ( x%z%re ,[1.0_dp])) /= 2) error stop 1
+  if (size (transfer ((x%z%re),[1.0_dp])) /= 2) error stop 2
+  if (size (transfer ([x%z%re],[1.0_dp])) /= 2) error stop 3
+  if (size (transfer ( x%z%im ,[1.0_dp])) /= 2) error stop 4
+  if (size (transfer ((x%z%im),[1.0_dp])) /= 2) error stop 5
+  if (size (transfer ([x%z%im],[1.0_dp])) /= 2) error stop 6
+
+  ! ... while the following may not:
+  if (any  (transfer ( x%z%re ,[1.0_dp])  /= x%z%re)) stop 7
+  if (any  (transfer ( x%z%im ,[1.0_dp])  /= x%z%im)) stop 8
+
+  if (size (transfer ( w%z%re ,[1.0_dp])) /= 2) stop 11
+  if (size (transfer ((w%z%re),[1.0_dp])) /= 2) stop 12
+  if (size (transfer ([w%z%re],[1.0_dp])) /= 2) stop 13
+  if (size (transfer ( w%z%im ,[1.0_dp])) /= 2) stop 14
+  if (size (transfer ((w%z%im),[1.0_dp])) /= 2) stop 15
+  if (size (transfer ([w%z%im],[1.0_dp])) /= 2) stop 16
+
+  if (any  (transfer ( w%z%re ,[1.0_dp])  /= x%z%re)) stop 17
+  if (any  (transfer ( w%z%im ,[1.0_dp])  /= x%z%im)) stop 18
+
+  deallocate (w%z)
+end program main
+
+! { dg-final { scan-tree-dump-not "_gfortran_error_stop_numeric" "optimized" } }
diff --git a/gcc/tree-switch-conversion.cc b/gcc/tree-switch-conversion.cc
index dea217a..bd4de96 100644
--- a/gcc/tree-switch-conversion.cc
+++ b/gcc/tree-switch-conversion.cc
@@ -1793,12 +1793,14 @@ bit_test_cluster::find_bit_tests (vec<cluster *> &clusters, int max_c)
      end up with as few clusters as possible.  */
 
   unsigned l = clusters.length ();
-  auto_vec<min_cluster_item> min;
-  min.reserve (l + 1);
 
-  gcc_checking_assert (l > 0);
+  if (l == 0)
+    return clusters.copy ();
   gcc_checking_assert (l <= INT_MAX);
 
+  auto_vec<min_cluster_item> min;
+  min.reserve (l + 1);
+
   int bits_in_word = GET_MODE_BITSIZE (word_mode);
 
   /* First phase: Compute the minimum number of clusters for each prefix of the