Merged current trunk to branch.

author: Thomas Koenig <tkoenig@gcc.gnu.org> 2021-09-13 19:49:49 +0200
committer: Thomas Koenig <tkoenig@gcc.gnu.org> 2021-09-13 19:49:49 +0200
commit: b18a97e5dd0935e1c4a626c230f21457d0aad3d5 (patch)
tree: c1818f41af6fe780deafb6cd6a183f32085fe654 /gcc/expr.c
parent: e76a53644c9d70e998c0d050e9a456af388c6b61 (diff)
download: gcc-b18a97e5dd0935e1c4a626c230f21457d0aad3d5.zip
gcc-b18a97e5dd0935e1c4a626c230f21457d0aad3d5.tar.gz
gcc-b18a97e5dd0935e1c4a626c230f21457d0aad3d5.tar.bz2
1 files changed, 510 insertions, 195 deletions
diff --git a/gcc/expr.c b/gcc/expr.c
index 33934d6..e0bcbcc 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -1,5 +1,5 @@
 /* Convert tree expression to rtl instructions, for GNU compiler.
-   Copyright (C) 1988-2020 Free Software Foundation, Inc.
+   Copyright (C) 1988-2021 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
@@ -62,6 +62,8 @@ along with GCC; see the file COPYING3.  If not see
 #include "ccmp.h"
 #include "gimple-fold.h"
 #include "rtx-vector-builder.h"
+#include "tree-pretty-print.h"
+#include "flags.h"
 
 
 /* If this is nonzero, we do not bother generating VOLATILE
@@ -234,8 +236,27 @@ convert_move (rtx to, rtx from, int unsignedp)
 	  >= GET_MODE_PRECISION (to_int_mode))
       && SUBREG_CHECK_PROMOTED_SIGN (from, unsignedp))
     {
+      scalar_int_mode int_orig_mode;
+      scalar_int_mode int_inner_mode;
+      machine_mode orig_mode = GET_MODE (from);
+
       from = gen_lowpart (to_int_mode, SUBREG_REG (from));
       from_mode = to_int_mode;
+
+      /* Preserve SUBREG_PROMOTED_VAR_P if the new mode is wider than
+	 the original mode, but narrower than the inner mode.  */
+      if (GET_CODE (from) == SUBREG
+	  && is_a <scalar_int_mode> (orig_mode, &int_orig_mode)
+	  && GET_MODE_PRECISION (to_int_mode)
+	     > GET_MODE_PRECISION (int_orig_mode)
+	  && is_a <scalar_int_mode> (GET_MODE (SUBREG_REG (from)),
+				     &int_inner_mode)
+	  && GET_MODE_PRECISION (int_inner_mode)
+	     > GET_MODE_PRECISION (to_int_mode))
+	{
+	  SUBREG_PROMOTED_VAR_P (from) = 1;
+	  SUBREG_PROMOTED_SET (from, unsignedp);
+	}
     }
 
   gcc_assert (GET_CODE (to) != SUBREG || !SUBREG_PROMOTED_VAR_P (to));
@@ -686,7 +707,27 @@ convert_modes (machine_mode mode, machine_mode oldmode, rtx x, int unsignedp)
       && (GET_MODE_PRECISION (subreg_promoted_mode (x))
 	  >= GET_MODE_PRECISION (int_mode))
       && SUBREG_CHECK_PROMOTED_SIGN (x, unsignedp))
-    x = gen_lowpart (int_mode, SUBREG_REG (x));
+    {
+      scalar_int_mode int_orig_mode;
+      scalar_int_mode int_inner_mode;
+      machine_mode orig_mode = GET_MODE (x);
+      x = gen_lowpart (int_mode, SUBREG_REG (x));
+
+      /* Preserve SUBREG_PROMOTED_VAR_P if the new mode is wider than
+	 the original mode, but narrower than the inner mode.  */
+      if (GET_CODE (x) == SUBREG
+	  && is_a <scalar_int_mode> (orig_mode, &int_orig_mode)
+	  && GET_MODE_PRECISION (int_mode)
+	     > GET_MODE_PRECISION (int_orig_mode)
+	  && is_a <scalar_int_mode> (GET_MODE (SUBREG_REG (x)),
+				     &int_inner_mode)
+	  && GET_MODE_PRECISION (int_inner_mode)
+	     > GET_MODE_PRECISION (int_mode))
+	{
+	  SUBREG_PROMOTED_VAR_P (x) = 1;
+	  SUBREG_PROMOTED_SET (x, unsignedp);
+	}
+    }
 
   if (GET_MODE (x) != VOIDmode)
     oldmode = GET_MODE (x);
@@ -744,7 +785,7 @@ static unsigned int
 alignment_for_piecewise_move (unsigned int max_pieces, unsigned int align)
 {
   scalar_int_mode tmode
-    = int_mode_for_size (max_pieces * BITS_PER_UNIT, 1).require ();
+    = int_mode_for_size (max_pieces * BITS_PER_UNIT, 0).require ();
 
   if (align >= GET_MODE_ALIGNMENT (tmode))
     align = GET_MODE_ALIGNMENT (tmode);
@@ -767,15 +808,36 @@ alignment_for_piecewise_move (unsigned int max_pieces, unsigned int align)
   return align;
 }
 
-/* Return the widest integer mode that is narrower than SIZE bytes.  */
+/* Return the widest QI vector, if QI_MODE is true, or integer mode
+   that is narrower than SIZE bytes.  */
 
-static scalar_int_mode
-widest_int_mode_for_size (unsigned int size)
+static fixed_size_mode
+widest_fixed_size_mode_for_size (unsigned int size, bool qi_vector)
 {
-  scalar_int_mode result = NARROWEST_INT_MODE;
+  fixed_size_mode result = NARROWEST_INT_MODE;
 
   gcc_checking_assert (size > 1);
 
+  /* Use QI vector only if size is wider than a WORD.  */
+  if (qi_vector && size > UNITS_PER_WORD)
+    {
+      machine_mode mode;
+      fixed_size_mode candidate;
+      FOR_EACH_MODE_IN_CLASS (mode, MODE_VECTOR_INT)
+	if (is_a<fixed_size_mode> (mode, &candidate)
+	    && GET_MODE_INNER (candidate) == QImode)
+	  {
+	    if (GET_MODE_SIZE (candidate) >= size)
+	      break;
+	    if (optab_handler (vec_duplicate_optab, candidate)
+		!= CODE_FOR_nothing)
+	      result = candidate;
+	  }
+
+      if (result != NARROWEST_INT_MODE)
+	return result;
+    }
+
   opt_scalar_int_mode tmode;
   FOR_EACH_MODE_IN_CLASS (tmode, MODE_INT)
     if (GET_MODE_SIZE (tmode.require ()) < size)
@@ -813,12 +875,29 @@ by_pieces_ninsns (unsigned HOST_WIDE_INT l, unsigned int align,
 		  unsigned int max_size, by_pieces_operation op)
 {
   unsigned HOST_WIDE_INT n_insns = 0;
+  fixed_size_mode mode;
+
+  if (targetm.overlap_op_by_pieces_p () && op != COMPARE_BY_PIECES)
+    {
+      /* NB: Round up L and ALIGN to the widest integer mode for
+	 MAX_SIZE.  */
+      mode = widest_fixed_size_mode_for_size (max_size,
+					      op == SET_BY_PIECES);
+      if (optab_handler (mov_optab, mode) != CODE_FOR_nothing)
+	{
+	  unsigned HOST_WIDE_INT up = ROUND_UP (l, GET_MODE_SIZE (mode));
+	  if (up > l)
+	    l = up;
+	  align = GET_MODE_ALIGNMENT (mode);
+	}
+    }
 
   align = alignment_for_piecewise_move (MOVE_MAX_PIECES, align);
 
   while (max_size > 1 && l > 0)
     {
-      scalar_int_mode mode = widest_int_mode_for_size (max_size);
+      mode = widest_fixed_size_mode_for_size (max_size,
+					      op == SET_BY_PIECES);
       enum insn_code icode;
 
       unsigned int modesize = GET_MODE_SIZE (mode);
@@ -886,7 +965,7 @@ class pieces_addr
   void *m_cfndata;
 public:
   pieces_addr (rtx, bool, by_pieces_constfn, void *);
-  rtx adjust (scalar_int_mode, HOST_WIDE_INT);
+  rtx adjust (fixed_size_mode, HOST_WIDE_INT, by_pieces_prev * = nullptr);
   void increment_address (HOST_WIDE_INT);
   void maybe_predec (HOST_WIDE_INT);
   void maybe_postinc (HOST_WIDE_INT);
@@ -988,10 +1067,12 @@ pieces_addr::decide_autoinc (machine_mode ARG_UNUSED (mode), bool reverse,
    but we still modify the MEM's properties.  */
 
 rtx
-pieces_addr::adjust (scalar_int_mode mode, HOST_WIDE_INT offset)
+pieces_addr::adjust (fixed_size_mode mode, HOST_WIDE_INT offset,
+		     by_pieces_prev *prev)
 {
   if (m_constfn)
-    return m_constfn (m_cfndata, offset, mode);
+    /* Pass the previous data to m_constfn.  */
+    return m_constfn (m_cfndata, prev, offset, mode);
   if (m_obj == NULL_RTX)
     return NULL_RTX;
   if (m_auto)
@@ -1039,13 +1120,25 @@ pieces_addr::maybe_postinc (HOST_WIDE_INT size)
 
 class op_by_pieces_d
 {
+ private:
+  fixed_size_mode get_usable_mode (fixed_size_mode, unsigned int);
+  fixed_size_mode smallest_fixed_size_mode_for_size (unsigned int);
+
  protected:
   pieces_addr m_to, m_from;
-  unsigned HOST_WIDE_INT m_len;
+  /* Make m_len read-only so that smallest_fixed_size_mode_for_size can
+     use it to check the valid mode size.  */
+  const unsigned HOST_WIDE_INT m_len;
   HOST_WIDE_INT m_offset;
   unsigned int m_align;
   unsigned int m_max_size;
   bool m_reverse;
+  /* True if this is a stack push.  */
+  bool m_push;
+  /* True if targetm.overlap_op_by_pieces_p () returns true.  */
+  bool m_overlap_op_by_pieces;
+  /* True if QI vector mode can be used.  */
+  bool m_qi_vector_mode;
 
   /* Virtual functions, overriden by derived classes for the specific
      operation.  */
@@ -1056,8 +1149,9 @@ class op_by_pieces_d
   }
 
  public:
-  op_by_pieces_d (rtx, bool, rtx, bool, by_pieces_constfn, void *,
-		  unsigned HOST_WIDE_INT, unsigned int);
+  op_by_pieces_d (unsigned int, rtx, bool, rtx, bool, by_pieces_constfn,
+		  void *, unsigned HOST_WIDE_INT, unsigned int, bool,
+		  bool = false);
   void run ();
 };
 
@@ -1065,17 +1159,21 @@ class op_by_pieces_d
    objects named TO and FROM, which are identified as loads or stores
    by TO_LOAD and FROM_LOAD.  If FROM is a load, the optional FROM_CFN
    and its associated FROM_CFN_DATA can be used to replace loads with
-   constant values.  LEN describes the length of the operation.  */
+   constant values.  MAX_PIECES describes the maximum number of bytes
+   at a time which can be moved efficiently.  LEN describes the length
+   of the operation.  */
 
-op_by_pieces_d::op_by_pieces_d (rtx to, bool to_load,
-				rtx from, bool from_load,
+op_by_pieces_d::op_by_pieces_d (unsigned int max_pieces, rtx to,
+				bool to_load, rtx from, bool from_load,
 				by_pieces_constfn from_cfn,
 				void *from_cfn_data,
 				unsigned HOST_WIDE_INT len,
-				unsigned int align)
+				unsigned int align, bool push,
+				bool qi_vector_mode)
   : m_to (to, to_load, NULL, NULL),
     m_from (from, from_load, from_cfn, from_cfn_data),
-    m_len (len), m_max_size (MOVE_MAX_PIECES + 1)
+    m_len (len), m_max_size (max_pieces + 1),
+    m_push (push), m_qi_vector_mode (qi_vector_mode)
 {
   int toi = m_to.get_addr_inc ();
   int fromi = m_from.get_addr_inc ();
@@ -1096,7 +1194,9 @@ op_by_pieces_d::op_by_pieces_d (rtx to, bool to_load,
   if (by_pieces_ninsns (len, align, m_max_size, MOVE_BY_PIECES) > 2)
     {
       /* Find the mode of the largest comparison.  */
-      scalar_int_mode mode = widest_int_mode_for_size (m_max_size);
+      fixed_size_mode mode
+	= widest_fixed_size_mode_for_size (m_max_size,
+					   m_qi_vector_mode);
 
       m_from.decide_autoinc (mode, m_reverse, len);
       m_to.decide_autoinc (mode, m_reverse, len);
@@ -1104,6 +1204,56 @@ op_by_pieces_d::op_by_pieces_d (rtx to, bool to_load,
 
   align = alignment_for_piecewise_move (MOVE_MAX_PIECES, align);
   m_align = align;
+
+  m_overlap_op_by_pieces = targetm.overlap_op_by_pieces_p ();
+}
+
+/* This function returns the largest usable integer mode for LEN bytes
+   whose size is no bigger than size of MODE.  */
+
+fixed_size_mode
+op_by_pieces_d::get_usable_mode (fixed_size_mode mode, unsigned int len)
+{
+  unsigned int size;
+  do
+    {
+      size = GET_MODE_SIZE (mode);
+      if (len >= size && prepare_mode (mode, m_align))
+	break;
+      /* widest_fixed_size_mode_for_size checks SIZE > 1.  */
+      mode = widest_fixed_size_mode_for_size (size, m_qi_vector_mode);
+    }
+  while (1);
+  return mode;
+}
+
+/* Return the smallest integer or QI vector mode that is not narrower
+   than SIZE bytes.  */
+
+fixed_size_mode
+op_by_pieces_d::smallest_fixed_size_mode_for_size (unsigned int size)
+{
+  /* Use QI vector only for > size of WORD.  */
+  if (m_qi_vector_mode && size > UNITS_PER_WORD)
+    {
+      machine_mode mode;
+      fixed_size_mode candidate;
+      FOR_EACH_MODE_IN_CLASS (mode, MODE_VECTOR_INT)
+	if (is_a<fixed_size_mode> (mode, &candidate)
+	    && GET_MODE_INNER (candidate) == QImode)
+	  {
+	    /* Don't return a mode wider than M_LEN.  */
+	    if (GET_MODE_SIZE (candidate) > m_len)
+	      break;
+
+	    if (GET_MODE_SIZE (candidate) >= size
+		&& (optab_handler (vec_duplicate_optab, candidate)
+		    != CODE_FOR_nothing))
+	      return candidate;
+	  }
+    }
+
+  return smallest_int_mode_for_size (size * BITS_PER_UNIT);
 }
 
 /* This function contains the main loop used for expanding a block
@@ -1114,50 +1264,98 @@ op_by_pieces_d::op_by_pieces_d (rtx to, bool to_load,
 void
 op_by_pieces_d::run ()
 {
-  while (m_max_size > 1 && m_len > 0)
+  if (m_len == 0)
+    return;
+
+  unsigned HOST_WIDE_INT length = m_len;
+
+  /* widest_fixed_size_mode_for_size checks M_MAX_SIZE > 1.  */
+  fixed_size_mode mode
+    = widest_fixed_size_mode_for_size (m_max_size, m_qi_vector_mode);
+  mode = get_usable_mode (mode, length);
+
+  by_pieces_prev to_prev = { nullptr, mode };
+  by_pieces_prev from_prev = { nullptr, mode };
+
+  do
     {
-      scalar_int_mode mode = widest_int_mode_for_size (m_max_size);
+      unsigned int size = GET_MODE_SIZE (mode);
+      rtx to1 = NULL_RTX, from1;
 
-      if (prepare_mode (mode, m_align))
+      while (length >= size)
 	{
-	  unsigned int size = GET_MODE_SIZE (mode);
-	  rtx to1 = NULL_RTX, from1;
+	  if (m_reverse)
+	    m_offset -= size;
 
-	  while (m_len >= size)
-	    {
-	      if (m_reverse)
-		m_offset -= size;
+	  to1 = m_to.adjust (mode, m_offset, &to_prev);
+	  to_prev.data = to1;
+	  to_prev.mode = mode;
+	  from1 = m_from.adjust (mode, m_offset, &from_prev);
+	  from_prev.data = from1;
+	  from_prev.mode = mode;
 
-	      to1 = m_to.adjust (mode, m_offset);
-	      from1 = m_from.adjust (mode, m_offset);
+	  m_to.maybe_predec (-(HOST_WIDE_INT)size);
+	  m_from.maybe_predec (-(HOST_WIDE_INT)size);
 
-	      m_to.maybe_predec (-(HOST_WIDE_INT)size);
-	      m_from.maybe_predec (-(HOST_WIDE_INT)size);
+	  generate (to1, from1, mode);
 
-	      generate (to1, from1, mode);
+	  m_to.maybe_postinc (size);
+	  m_from.maybe_postinc (size);
 
-	      m_to.maybe_postinc (size);
-	      m_from.maybe_postinc (size);
+	  if (!m_reverse)
+	    m_offset += size;
 
-	      if (!m_reverse)
-		m_offset += size;
+	  length -= size;
+	}
 
-	      m_len -= size;
-	    }
+      finish_mode (mode);
 
-	  finish_mode (mode);
-	}
+      if (length == 0)
+	return;
 
-      m_max_size = GET_MODE_SIZE (mode);
+      if (!m_push && m_overlap_op_by_pieces)
+	{
+	  /* NB: Generate overlapping operations if it is not a stack
+	     push since stack push must not overlap.  Get the smallest
+	     fixed size mode for M_LEN bytes.  */
+	  mode = smallest_fixed_size_mode_for_size (length);
+	  mode = get_usable_mode (mode, GET_MODE_SIZE (mode));
+	  int gap = GET_MODE_SIZE (mode) - length;
+	  if (gap > 0)
+	    {
+	      /* If size of MODE > M_LEN, generate the last operation
+		 in MODE for the remaining bytes with ovelapping memory
+		 from the previois operation.  */
+	      if (m_reverse)
+		m_offset += gap;
+	      else
+		m_offset -= gap;
+	      length += gap;
+	    }
+	}
+      else
+	{
+	  /* widest_fixed_size_mode_for_size checks SIZE > 1.  */
+	  mode = widest_fixed_size_mode_for_size (size,
+						  m_qi_vector_mode);
+	  mode = get_usable_mode (mode, length);
+	}
     }
+  while (1);
 
   /* The code above should have handled everything.  */
-  gcc_assert (!m_len);
+  gcc_assert (!length);
 }
 
 /* Derived class from op_by_pieces_d, providing support for block move
    operations.  */
 
+#ifdef PUSH_ROUNDING
+#define PUSHG_P(to)  ((to) == nullptr)
+#else
+#define PUSHG_P(to)  false
+#endif
+
 class move_by_pieces_d : public op_by_pieces_d
 {
   insn_gen_fn m_gen_fun;
@@ -1167,7 +1365,8 @@ class move_by_pieces_d : public op_by_pieces_d
  public:
   move_by_pieces_d (rtx to, rtx from, unsigned HOST_WIDE_INT len,
 		    unsigned int align)
-    : op_by_pieces_d (to, false, from, true, NULL, NULL, len, align)
+    : op_by_pieces_d (MOVE_MAX_PIECES, to, false, from, true, NULL,
+		      NULL, len, align, PUSHG_P (to))
   {
   }
   rtx finish_retmode (memop_ret);
@@ -1261,8 +1460,10 @@ class store_by_pieces_d : public op_by_pieces_d
 
  public:
   store_by_pieces_d (rtx to, by_pieces_constfn cfn, void *cfn_data,
-		     unsigned HOST_WIDE_INT len, unsigned int align)
-    : op_by_pieces_d (to, false, NULL_RTX, true, cfn, cfn_data, len, align)
+		     unsigned HOST_WIDE_INT len, unsigned int align,
+		     bool qi_vector_mode)
+    : op_by_pieces_d (STORE_MAX_PIECES, to, false, NULL_RTX, true, cfn,
+		      cfn_data, len, align, false, qi_vector_mode)
   {
   }
   rtx finish_retmode (memop_ret);
@@ -1317,7 +1518,7 @@ store_by_pieces_d::finish_retmode (memop_ret retmode)
 
 int
 can_store_by_pieces (unsigned HOST_WIDE_INT len,
-		     rtx (*constfun) (void *, HOST_WIDE_INT, scalar_int_mode),
+		     by_pieces_constfn constfun,
 		     void *constfundata, unsigned int align, bool memsetp)
 {
   unsigned HOST_WIDE_INT l;
@@ -1351,7 +1552,8 @@ can_store_by_pieces (unsigned HOST_WIDE_INT len,
       max_size = STORE_MAX_PIECES + 1;
       while (max_size > 1 && l > 0)
 	{
-	  scalar_int_mode mode = widest_int_mode_for_size (max_size);
+	  fixed_size_mode mode
+	    = widest_fixed_size_mode_for_size (max_size, memsetp);
 
 	  icode = optab_handler (mov_optab, mode);
 	  if (icode != CODE_FOR_nothing
@@ -1364,8 +1566,12 @@ can_store_by_pieces (unsigned HOST_WIDE_INT len,
 		  if (reverse)
 		    offset -= size;
 
-		  cst = (*constfun) (constfundata, offset, mode);
-		  if (!targetm.legitimate_constant_p (mode, cst))
+		  cst = (*constfun) (constfundata, nullptr, offset, mode);
+		  /* All CONST_VECTORs can be loaded for memset since
+		     vec_duplicate_optab is a precondition to pick a
+		     vector mode for the memset expander.  */
+		  if (!((memsetp && VECTOR_MODE_P (mode))
+			|| targetm.legitimate_constant_p (mode, cst)))
 		    return 0;
 
 		  if (!reverse)
@@ -1394,7 +1600,7 @@ can_store_by_pieces (unsigned HOST_WIDE_INT len,
 
 rtx
 store_by_pieces (rtx to, unsigned HOST_WIDE_INT len,
-		 rtx (*constfun) (void *, HOST_WIDE_INT, scalar_int_mode),
+		 by_pieces_constfn constfun,
 		 void *constfundata, unsigned int align, bool memsetp,
 		 memop_ret retmode)
 {
@@ -1409,7 +1615,8 @@ store_by_pieces (rtx to, unsigned HOST_WIDE_INT len,
 		 memsetp ? SET_BY_PIECES : STORE_BY_PIECES,
 		 optimize_insn_for_speed_p ()));
 
-  store_by_pieces_d data (to, constfun, constfundata, len, align);
+  store_by_pieces_d data (to, constfun, constfundata, len, align,
+			  memsetp);
   data.run ();
 
   if (retmode != RETURN_BEGIN)
@@ -1418,15 +1625,6 @@ store_by_pieces (rtx to, unsigned HOST_WIDE_INT len,
     return to;
 }
 
-/* Callback routine for clear_by_pieces.
-   Return const0_rtx unconditionally.  */
-
-static rtx
-clear_by_pieces_1 (void *, HOST_WIDE_INT, scalar_int_mode)
-{
-  return const0_rtx;
-}
-
 /* Generate several move instructions to clear LEN bytes of block TO.  (A MEM
    rtx with BLKmode).  ALIGN is maximum alignment we can assume.  */
 
@@ -1436,7 +1634,10 @@ clear_by_pieces (rtx to, unsigned HOST_WIDE_INT len, unsigned int align)
   if (len == 0)
     return;
 
-  store_by_pieces_d data (to, clear_by_pieces_1, NULL, len, align);
+  /* Use builtin_memset_read_str to support vector mode broadcast.  */
+  char c = 0;
+  store_by_pieces_d data (to, builtin_memset_read_str, &c, len, align,
+			  true);
   data.run ();
 }
 
@@ -1458,7 +1659,8 @@ class compare_by_pieces_d : public op_by_pieces_d
   compare_by_pieces_d (rtx op0, rtx op1, by_pieces_constfn op1_cfn,
 		       void *op1_cfn_data, HOST_WIDE_INT len, int align,
 		       rtx_code_label *fail_label)
-    : op_by_pieces_d (op0, true, op1, true, op1_cfn, op1_cfn_data, len, align)
+    : op_by_pieces_d (COMPARE_MAX_PIECES, op0, true, op1, true, op1_cfn,
+		      op1_cfn_data, len, align, false)
   {
     m_fail_label = fail_label;
   }
@@ -1727,7 +1929,7 @@ block_move_libcall_safe_for_call_parm (void)
   tree fn;
 
   /* If arguments are pushed on the stack, then they're safe.  */
-  if (PUSH_ARGS)
+  if (targetm.calls.push_argument (0))
     return true;
 
   /* If registers go on the stack anyway, any argument is sure to clobber
@@ -3054,7 +3256,8 @@ clear_storage_hints (rtx object, rtx size, enum block_op_methods method,
 		     unsigned int expected_align, HOST_WIDE_INT expected_size,
 		     unsigned HOST_WIDE_INT min_size,
 		     unsigned HOST_WIDE_INT max_size,
-		     unsigned HOST_WIDE_INT probable_max_size)
+		     unsigned HOST_WIDE_INT probable_max_size,
+		     unsigned ctz_size)
 {
   machine_mode mode = GET_MODE (object);
   unsigned int align;
@@ -3101,6 +3304,10 @@ clear_storage_hints (rtx object, rtx size, enum block_op_methods method,
 				   expected_align, expected_size,
 				   min_size, max_size, probable_max_size))
     ;
+  else if (try_store_by_multiple_pieces (object, size, ctz_size,
+					 min_size, max_size,
+					 NULL_RTX, 0, align))
+    ;
   else if (ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (object)))
     return set_storage_via_libcall (object, size, const0_rtx,
 				    method == BLOCK_OP_TAILCALL);
@@ -3118,7 +3325,7 @@ clear_storage (rtx object, rtx size, enum block_op_methods method)
     min = max = UINTVAL (size);
   else
     max = GET_MODE_MASK (GET_MODE (size));
-  return clear_storage_hints (object, size, method, 0, -1, min, max, max);
+  return clear_storage_hints (object, size, method, 0, -1, min, max, max, 0);
 }
 
 
@@ -4538,11 +4745,19 @@ emit_push_insn (rtx x, machine_mode mode, tree type, rtx size,
       skip = (reg_parm_stack_space == 0) ? 0 : used;
 
 #ifdef PUSH_ROUNDING
+      /* NB: Let the backend known the number of bytes to push and
+	 decide if push insns should be generated.  */
+      unsigned int push_size;
+      if (CONST_INT_P (size))
+	push_size = INTVAL (size);
+      else
+	push_size = 0;
+
       /* Do it with several push insns if that doesn't take lots of insns
 	 and if there is no difficulty with push insns that skip bytes
 	 on the stack for alignment purposes.  */
       if (args_addr == 0
-	  && PUSH_ARGS
+	  && targetm.calls.push_argument (push_size)
 	  && CONST_INT_P (size)
 	  && skip == 0
 	  && MEM_ALIGN (xinner) >= align
@@ -4747,7 +4962,7 @@ emit_push_insn (rtx x, machine_mode mode, tree type, rtx size,
 	anti_adjust_stack (gen_int_mode (extra, Pmode));
 
 #ifdef PUSH_ROUNDING
-      if (args_addr == 0 && PUSH_ARGS)
+      if (args_addr == 0 && targetm.calls.push_argument (0))
 	emit_single_push_insn (mode, x, type);
       else
 #endif
@@ -5457,6 +5672,7 @@ expand_assignment (tree to, tree from, bool nontemporal)
 	      /* If to_rtx is a promoted subreg, we need to zero or sign
 		 extend the value afterwards.  */
 	      if (TREE_CODE (to) == MEM_REF
+		  && TYPE_MODE (TREE_TYPE (from)) != BLKmode
 		  && !REF_REVERSE_STORAGE_ORDER (to)
 		  && known_eq (bitpos, 0)
 		  && known_eq (bitsize, GET_MODE_BITSIZE (GET_MODE (to_rtx))))
@@ -5643,7 +5859,8 @@ emit_storent_insn (rtx to, rtx from)
 /* Helper function for store_expr storing of STRING_CST.  */
 
 static rtx
-string_cst_read_str (void *data, HOST_WIDE_INT offset, scalar_int_mode mode)
+string_cst_read_str (void *data, void *, HOST_WIDE_INT offset,
+		     fixed_size_mode mode)
 {
   tree str = (tree) data;
 
@@ -5658,10 +5875,13 @@ string_cst_read_str (void *data, HOST_WIDE_INT offset, scalar_int_mode mode)
       size_t l = TREE_STRING_LENGTH (str) - offset;
       memcpy (p, TREE_STRING_POINTER (str) + offset, l);
       memset (p + l, '\0', GET_MODE_SIZE (mode) - l);
-      return c_readstr (p, mode, false);
+      return c_readstr (p, as_a <scalar_int_mode> (mode), false);
     }
 
-  return c_readstr (TREE_STRING_POINTER (str) + offset, mode, false);
+  /* The by-pieces infrastructure does not try to pick a vector mode
+     for storing STRING_CST.  */
+  return c_readstr (TREE_STRING_POINTER (str) + offset,
+		    as_a <scalar_int_mode> (mode), false);
 }
 
 /* Generate code for computing expression EXP,
@@ -6967,7 +7187,8 @@ store_constructor (tree exp, rtx target, int cleared, poly_int64 size,
 	    && eltmode == GET_MODE_INNER (mode)
 	    && ((icode = optab_handler (vec_duplicate_optab, mode))
 		!= CODE_FOR_nothing)
-	    && (elt = uniform_vector_p (exp)))
+	    && (elt = uniform_vector_p (exp))
+	    && !VECTOR_TYPE_P (TREE_TYPE (elt)))
 	  {
 	    class expand_operand ops[2];
 	    create_output_operand (&ops[0], target, mode);
@@ -7016,7 +7237,9 @@ store_constructor (tree exp, rtx target, int cleared, poly_int64 size,
 	/* Compute the size of the elements in the CTOR.  It differs
 	   from the size of the vector type elements only when the
 	   CTOR elements are vectors themselves.  */
-	tree val_type = TREE_TYPE (CONSTRUCTOR_ELT (exp, 0)->value);
+	tree val_type = (CONSTRUCTOR_NELTS (exp) != 0
+			 ? TREE_TYPE (CONSTRUCTOR_ELT (exp, 0)->value)
+			 : elttype);
 	if (VECTOR_TYPE_P (val_type))
 	  bitsize = tree_to_uhwi (TYPE_SIZE (val_type));
 	else
@@ -7211,7 +7434,13 @@ store_field (rtx target, poly_int64 bitsize, poly_int64 bitpos,
 	      || !multiple_p (bitpos, BITS_PER_UNIT)
 	      || !poly_int_tree_p (DECL_SIZE (TREE_OPERAND (exp, 1)),
 				   &decl_bitsize)
-	      || maybe_ne (decl_bitsize, bitsize)))
+	      || maybe_ne (decl_bitsize, bitsize))
+	  /* A call with an addressable return type and return-slot
+	     optimization must not need bitfield operations but we must
+	     pass down the original target.  */
+	  && (TREE_CODE (exp) != CALL_EXPR
+	      || !TREE_ADDRESSABLE (TREE_TYPE (exp))
+	      || !CALL_EXPR_RETURN_SLOT_OPT (exp)))
       /* If we are expanding a MEM_REF of a non-BLKmode non-addressable
          decl we must use bitfield operations.  */
       || (known_size_p (bitsize)
@@ -8195,7 +8424,7 @@ expand_expr_addr_expr_1 (tree exp, rtx target, scalar_int_mode tmode,
 	 array with address of COMPOUND_LITERAL_EXPR in DECL_INITIAL;
 	 the initializers aren't gimplified.  */
       if (COMPOUND_LITERAL_EXPR_DECL (exp)
-	  && TREE_STATIC (COMPOUND_LITERAL_EXPR_DECL (exp)))
+	  && is_global_var (COMPOUND_LITERAL_EXPR_DECL (exp)))
 	return expand_expr_addr_expr_1 (COMPOUND_LITERAL_EXPR_DECL (exp),
 					target, tmode, modifier, as);
       /* FALLTHRU */
@@ -8412,6 +8641,19 @@ expand_constructor (tree exp, rtx target, enum expand_modifier modifier,
       return constructor;
     }
 
+  /* If the CTOR is available in static storage and not mostly
+     zeros and we can move it by pieces prefer to do so since
+     that's usually more efficient than performing a series of
+     stores from immediates.  */
+  if (avoid_temp_mem
+      && TREE_STATIC (exp)
+      && TREE_CONSTANT (exp)
+      && tree_fits_uhwi_p (TYPE_SIZE_UNIT (type))
+      && can_move_by_pieces (tree_to_uhwi (TYPE_SIZE_UNIT (type)),
+			     TYPE_ALIGN (type))
+      && ! mostly_zeros_p (exp))
+    return NULL_RTX;
+
   /* Handle calls that pass values in multiple non-contiguous
      locations.  The Irix 6 ABI has examples of this.  */
   if (target == 0 || ! safe_from_p (target, exp, 1)
@@ -8653,6 +8895,56 @@ expand_misaligned_mem_ref (rtx temp, machine_mode mode, int unsignedp,
   return temp;
 }
 
+/* Helper function of expand_expr_2, expand a division or modulo.
+   op0 and op1 should be already expanded treeop0 and treeop1, using
+   expand_operands.  */
+
+static rtx
+expand_expr_divmod (tree_code code, machine_mode mode, tree treeop0,
+		    tree treeop1, rtx op0, rtx op1, rtx target, int unsignedp)
+{
+  bool mod_p = (code == TRUNC_MOD_EXPR || code == FLOOR_MOD_EXPR
+		|| code == CEIL_MOD_EXPR || code == ROUND_MOD_EXPR);
+  if (SCALAR_INT_MODE_P (mode)
+      && optimize >= 2
+      && get_range_pos_neg (treeop0) == 1
+      && get_range_pos_neg (treeop1) == 1)
+    {
+      /* If both arguments are known to be positive when interpreted
+	 as signed, we can expand it as both signed and unsigned
+	 division or modulo.  Choose the cheaper sequence in that case.  */
+      bool speed_p = optimize_insn_for_speed_p ();
+      do_pending_stack_adjust ();
+      start_sequence ();
+      rtx uns_ret = expand_divmod (mod_p, code, mode, op0, op1, target, 1);
+      rtx_insn *uns_insns = get_insns ();
+      end_sequence ();
+      start_sequence ();
+      rtx sgn_ret = expand_divmod (mod_p, code, mode, op0, op1, target, 0);
+      rtx_insn *sgn_insns = get_insns ();
+      end_sequence ();
+      unsigned uns_cost = seq_cost (uns_insns, speed_p);
+      unsigned sgn_cost = seq_cost (sgn_insns, speed_p);
+
+      /* If costs are the same then use as tie breaker the other other
+	 factor.  */
+      if (uns_cost == sgn_cost)
+	{
+	  uns_cost = seq_cost (uns_insns, !speed_p);
+	  sgn_cost = seq_cost (sgn_insns, !speed_p);
+	}
+
+      if (uns_cost < sgn_cost || (uns_cost == sgn_cost && unsignedp))
+	{
+	  emit_insn (uns_insns);
+	  return uns_ret;
+	}
+      emit_insn (sgn_insns);
+      return sgn_ret;
+    }
+  return expand_divmod (mod_p, code, mode, op0, op1, target, unsignedp);
+}
+
 rtx
 expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode,
 		    enum expand_modifier modifier)
@@ -9190,14 +9482,78 @@ expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode,
 	  if (!REG_P (op0))
 	    op0 = copy_to_mode_reg (mode, op0);
 
-	  return REDUCE_BIT_FIELD (gen_rtx_MULT (mode, op0,
-			       gen_int_mode (tree_to_shwi (exp1),
-					     TYPE_MODE (TREE_TYPE (exp1)))));
+	  op1 = gen_int_mode (tree_to_shwi (exp1),
+			      TYPE_MODE (TREE_TYPE (exp1)));
+	  return REDUCE_BIT_FIELD (gen_rtx_MULT (mode, op0, op1));
 	}
 
       if (modifier == EXPAND_STACK_PARM)
 	target = 0;
 
+      if (SCALAR_INT_MODE_P (mode) && optimize >= 2)
+	{
+	  gimple *def_stmt0 = get_def_for_expr (treeop0, TRUNC_DIV_EXPR);
+	  gimple *def_stmt1 = get_def_for_expr (treeop1, TRUNC_DIV_EXPR);
+	  if (def_stmt0
+	      && !operand_equal_p (treeop1, gimple_assign_rhs2 (def_stmt0), 0))
+	    def_stmt0 = NULL;
+	  if (def_stmt1
+	      && !operand_equal_p (treeop0, gimple_assign_rhs2 (def_stmt1), 0))
+	    def_stmt1 = NULL;
+
+	  if (def_stmt0 || def_stmt1)
+	    {
+	      /* X / Y * Y can be expanded as X - X % Y too.
+		 Choose the cheaper sequence of those two.  */
+	      if (def_stmt0)
+		treeop0 = gimple_assign_rhs1 (def_stmt0);
+	      else
+		{
+		  treeop1 = treeop0;
+		  treeop0 = gimple_assign_rhs1 (def_stmt1);
+		}
+	      expand_operands (treeop0, treeop1, subtarget, &op0, &op1,
+			       EXPAND_NORMAL);
+	      bool speed_p = optimize_insn_for_speed_p ();
+	      do_pending_stack_adjust ();
+	      start_sequence ();
+	      rtx divmul_ret
+		= expand_expr_divmod (TRUNC_DIV_EXPR, mode, treeop0, treeop1,
+				      op0, op1, NULL_RTX, unsignedp);
+	      divmul_ret = expand_mult (mode, divmul_ret, op1, target,
+					unsignedp);
+	      rtx_insn *divmul_insns = get_insns ();
+	      end_sequence ();
+	      start_sequence ();
+	      rtx modsub_ret
+		= expand_expr_divmod (TRUNC_MOD_EXPR, mode, treeop0, treeop1,
+				      op0, op1, NULL_RTX, unsignedp);
+	      this_optab = optab_for_tree_code (MINUS_EXPR, type,
+						optab_default);
+	      modsub_ret = expand_binop (mode, this_optab, op0, modsub_ret,
+					 target, unsignedp, OPTAB_LIB_WIDEN);
+	      rtx_insn *modsub_insns = get_insns ();
+	      end_sequence ();
+	      unsigned divmul_cost = seq_cost (divmul_insns, speed_p);
+	      unsigned modsub_cost = seq_cost (modsub_insns, speed_p);
+	      /* If costs are the same then use as tie breaker the other other
+		 factor.  */
+	      if (divmul_cost == modsub_cost)
+		{
+		  divmul_cost = seq_cost (divmul_insns, !speed_p);
+		  modsub_cost = seq_cost (modsub_insns, !speed_p);
+		}
+
+	      if (divmul_cost <= modsub_cost)
+		{
+		  emit_insn (divmul_insns);
+		  return REDUCE_BIT_FIELD (divmul_ret);
+		}
+	      emit_insn (modsub_insns);
+	      return REDUCE_BIT_FIELD (modsub_ret);
+	    }
+	}
+
       expand_operands (treeop0, treeop1, subtarget, &op0, &op1, EXPAND_NORMAL);
       return REDUCE_BIT_FIELD (expand_mult (mode, op0, op1, target, unsignedp));
 
@@ -9211,61 +9567,21 @@ expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode,
     case CEIL_DIV_EXPR:
     case ROUND_DIV_EXPR:
     case EXACT_DIV_EXPR:
-     {
-       /* If this is a fixed-point operation, then we cannot use the code
-	  below because "expand_divmod" doesn't support sat/no-sat fixed-point
-	  divisions.   */
-       if (ALL_FIXED_POINT_MODE_P (mode))
-	 goto binop;
-
-       if (modifier == EXPAND_STACK_PARM)
-	 target = 0;
-       /* Possible optimization: compute the dividend with EXPAND_SUM
-	  then if the divisor is constant can optimize the case
-	  where some terms of the dividend have coeffs divisible by it.  */
-       expand_operands (treeop0, treeop1,
-			subtarget, &op0, &op1, EXPAND_NORMAL);
-       bool mod_p = code == TRUNC_MOD_EXPR || code == FLOOR_MOD_EXPR
-		    || code == CEIL_MOD_EXPR || code == ROUND_MOD_EXPR;
-       if (SCALAR_INT_MODE_P (mode)
-	   && optimize >= 2
-	   && get_range_pos_neg (treeop0) == 1
-	   && get_range_pos_neg (treeop1) == 1)
-	 {
-	   /* If both arguments are known to be positive when interpreted
-	      as signed, we can expand it as both signed and unsigned
-	      division or modulo.  Choose the cheaper sequence in that case.  */
-	   bool speed_p = optimize_insn_for_speed_p ();
-	   do_pending_stack_adjust ();
-	   start_sequence ();
-	   rtx uns_ret = expand_divmod (mod_p, code, mode, op0, op1, target, 1);
-	   rtx_insn *uns_insns = get_insns ();
-	   end_sequence ();
-	   start_sequence ();
-	   rtx sgn_ret = expand_divmod (mod_p, code, mode, op0, op1, target, 0);
-	   rtx_insn *sgn_insns = get_insns ();
-	   end_sequence ();
-	   unsigned uns_cost = seq_cost (uns_insns, speed_p);
-	   unsigned sgn_cost = seq_cost (sgn_insns, speed_p);
-
-	   /* If costs are the same then use as tie breaker the other
-	      other factor.  */
-	   if (uns_cost == sgn_cost)
-	     {
-		uns_cost = seq_cost (uns_insns, !speed_p);
-		sgn_cost = seq_cost (sgn_insns, !speed_p);
-	     }
-
-	   if (uns_cost < sgn_cost || (uns_cost == sgn_cost && unsignedp))
-	     {
-	       emit_insn (uns_insns);
-	       return uns_ret;
-	     }
-	   emit_insn (sgn_insns);
-	   return sgn_ret;
-	 }
-       return expand_divmod (mod_p, code, mode, op0, op1, target, unsignedp);
-     }
+      /* If this is a fixed-point operation, then we cannot use the code
+	 below because "expand_divmod" doesn't support sat/no-sat fixed-point
+	 divisions.   */
+      if (ALL_FIXED_POINT_MODE_P (mode))
+	goto binop;
+
+      if (modifier == EXPAND_STACK_PARM)
+	target = 0;
+      /* Possible optimization: compute the dividend with EXPAND_SUM
+	 then if the divisor is constant can optimize the case
+	 where some terms of the dividend have coeffs divisible by it.  */
+      expand_operands (treeop0, treeop1, subtarget, &op0, &op1, EXPAND_NORMAL);
+      return expand_expr_divmod (code, mode, treeop0, treeop1, op0, op1,
+				 target, unsignedp);
+
     case RDIV_EXPR:
       goto binop;
 
@@ -9800,6 +10116,7 @@ expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode,
     case VEC_PACK_SAT_EXPR:
     case VEC_PACK_FIX_TRUNC_EXPR:
       mode = TYPE_MODE (TREE_TYPE (treeop0));
+      subtarget = NULL_RTX;
       goto binop;
 
     case VEC_PACK_TRUNC_EXPR:
@@ -9823,6 +10140,7 @@ expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode,
 	  return eops[0].value;
 	}
       mode = TYPE_MODE (TREE_TYPE (treeop0));
+      subtarget = NULL_RTX;
       goto binop;
 
     case VEC_PACK_FLOAT_EXPR:
@@ -10221,8 +10539,19 @@ expand_expr_real_1 (tree exp, rtx target, machine_mode tmode,
       exp = SSA_NAME_VAR (ssa_name);
       goto expand_decl_rtl;
 
-    case PARM_DECL:
     case VAR_DECL:
+      /* Allow accel compiler to handle variables that require special
+	 treatment, e.g. if they have been modified in some way earlier in
+	 compilation by the adjust_private_decl OpenACC hook.  */
+      if (flag_openacc && targetm.goacc.expand_var_decl)
+	{
+	  temp = targetm.goacc.expand_var_decl (exp);
+	  if (temp)
+	    return temp;
+	}
+      /* ... fall through ...  */
+
+    case PARM_DECL:
       /* If a static var's type was incomplete when the decl was written,
 	 but the type is complete now, lay out the decl now.  */
       if (DECL_SIZE (exp) == 0
@@ -10764,61 +11093,8 @@ expand_expr_real_1 (tree exp, rtx target, machine_mode tmode,
       goto normal_inner_ref;
 
     case COMPONENT_REF:
-      /* If the operand is a CONSTRUCTOR, we can just extract the
-	 appropriate field if it is present.  */
-      if (TREE_CODE (treeop0) == CONSTRUCTOR)
-	{
-	  unsigned HOST_WIDE_INT idx;
-	  tree field, value;
-	  scalar_int_mode field_mode;
-
-	  FOR_EACH_CONSTRUCTOR_ELT (CONSTRUCTOR_ELTS (treeop0),
-				    idx, field, value)
-	    if (field == treeop1
-		/* We can normally use the value of the field in the
-		   CONSTRUCTOR.  However, if this is a bitfield in
-		   an integral mode that we can fit in a HOST_WIDE_INT,
-		   we must mask only the number of bits in the bitfield,
-		   since this is done implicitly by the constructor.  If
-		   the bitfield does not meet either of those conditions,
-		   we can't do this optimization.  */
-		&& (! DECL_BIT_FIELD (field)
-		    || (is_int_mode (DECL_MODE (field), &field_mode)
-			&& (GET_MODE_PRECISION (field_mode)
-			    <= HOST_BITS_PER_WIDE_INT))))
-	      {
-		if (DECL_BIT_FIELD (field)
-		    && modifier == EXPAND_STACK_PARM)
-		  target = 0;
-		op0 = expand_expr (value, target, tmode, modifier);
-		if (DECL_BIT_FIELD (field))
-		  {
-		    HOST_WIDE_INT bitsize = TREE_INT_CST_LOW (DECL_SIZE (field));
-		    scalar_int_mode imode
-		      = SCALAR_INT_TYPE_MODE (TREE_TYPE (field));
-
-		    if (TYPE_UNSIGNED (TREE_TYPE (field)))
-		      {
-			op1 = gen_int_mode ((HOST_WIDE_INT_1 << bitsize) - 1,
-					    imode);
-			op0 = expand_and (imode, op0, op1, target);
-		      }
-		    else
-		      {
-			int count = GET_MODE_PRECISION (imode) - bitsize;
-
-			op0 = expand_shift (LSHIFT_EXPR, imode, op0, count,
-					    target, 0);
-			op0 = expand_shift (RSHIFT_EXPR, imode, op0, count,
-					    target, 0);
-		      }
-		  }
-
-		return op0;
-	      }
-	}
-      goto normal_inner_ref;
-
+      gcc_assert (TREE_CODE (treeop0) != CONSTRUCTOR);
+      /* Fall through.  */
     case BIT_FIELD_REF:
     case ARRAY_RANGE_REF:
     normal_inner_ref:
@@ -11235,7 +11511,7 @@ expand_expr_real_1 (tree exp, rtx target, machine_mode tmode,
       /* All valid uses of __builtin_va_arg_pack () are removed during
 	 inlining.  */
       if (CALL_EXPR_VA_ARG_PACK (exp))
-	error ("%Kinvalid use of %<__builtin_va_arg_pack ()%>", exp);
+	error ("invalid use of %<__builtin_va_arg_pack ()%>");
       {
 	tree fndecl = get_callee_fndecl (exp), attr;
 
@@ -11247,7 +11523,7 @@ expand_expr_real_1 (tree exp, rtx target, machine_mode tmode,
 					 DECL_ATTRIBUTES (fndecl))) != NULL)
 	  {
 	    const char *ident = lang_hooks.decl_printable_name (fndecl, 1);
-	    error ("%Kcall to %qs declared with attribute error: %s", exp,
+	    error ("call to %qs declared with attribute error: %s",
 		   identifier_to_locale (ident),
 		   TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr))));
 	  }
@@ -11259,10 +11535,10 @@ expand_expr_real_1 (tree exp, rtx target, machine_mode tmode,
 					 DECL_ATTRIBUTES (fndecl))) != NULL)
 	  {
 	    const char *ident = lang_hooks.decl_printable_name (fndecl, 1);
-	    warning_at (tree_nonartificial_location (exp),
+	    warning_at (EXPR_LOCATION (exp),
 			OPT_Wattribute_warning,
-			"%Kcall to %qs declared with attribute warning: %s",
-			exp, identifier_to_locale (ident),
+			"call to %qs declared with attribute warning: %s",
+			identifier_to_locale (ident),
 			TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr))));
 	  }
 
@@ -12275,6 +12551,37 @@ maybe_optimize_mod_cmp (enum tree_code code, tree *arg0, tree *arg1)
   *arg1 = c4;
   return code == EQ_EXPR ? LE_EXPR : GT_EXPR;
 }
+
+/* Optimize x - y < 0 into x < 0 if x - y has undefined overflow.  */
+
+void
+maybe_optimize_sub_cmp_0 (enum tree_code code, tree *arg0, tree *arg1)
+{
+  gcc_checking_assert (code == GT_EXPR || code == GE_EXPR
+		       || code == LT_EXPR || code == LE_EXPR);
+  gcc_checking_assert (integer_zerop (*arg1));
+
+  if (!optimize)
+    return;
+
+  gimple *stmt = get_def_for_expr (*arg0, MINUS_EXPR);
+  if (stmt == NULL)
+    return;
+
+  tree treeop0 = gimple_assign_rhs1 (stmt);
+  tree treeop1 = gimple_assign_rhs2 (stmt);
+  if (!TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (treeop0)))
+    return;
+
+  if (issue_strict_overflow_warning (WARN_STRICT_OVERFLOW_COMPARISON))
+    warning_at (gimple_location (stmt), OPT_Wstrict_overflow,
+		"assuming signed overflow does not occur when "
+		"simplifying %<X - Y %s 0%> to %<X %s Y%>",
+		op_symbol_code (code), op_symbol_code (code));
+
+  *arg0 = treeop0;
+  *arg1 = treeop1;
+}
 
 /* Generate code to calculate OPS, and exploded expression
    using a store-flag instruction and return an rtx for the result.
@@ -12363,6 +12670,14 @@ do_store_flag (sepops ops, rtx target, machine_mode mode)
 	}
     }
 
+  /* Optimize (x - y) < 0 into x < y if x - y has undefined overflow.  */
+  if (!unsignedp
+      && (ops->code == LT_EXPR || ops->code == LE_EXPR
+	  || ops->code == GT_EXPR || ops->code == GE_EXPR)
+      && integer_zerop (arg1)
+      && TREE_CODE (arg0) == SSA_NAME)
+    maybe_optimize_sub_cmp_0 (ops->code, &arg0, &arg1);
+
   /* Get the rtx comparison code to use.  We know that EXP is a comparison
      operation of some type.  Some comparisons against 1 and -1 can be
      converted to comparisons with zero.  Do so here so that the tests
author	Thomas Koenig <tkoenig@gcc.gnu.org>	2021-09-13 19:49:49 +0200
committer	Thomas Koenig <tkoenig@gcc.gnu.org>	2021-09-13 19:49:49 +0200
commit	b18a97e5dd0935e1c4a626c230f21457d0aad3d5 (patch)
tree	c1818f41af6fe780deafb6cd6a183f32085fe654 /gcc/expr.c
parent	e76a53644c9d70e998c0d050e9a456af388c6b61 (diff)
download	gcc-b18a97e5dd0935e1c4a626c230f21457d0aad3d5.zip gcc-b18a97e5dd0935e1c4a626c230f21457d0aad3d5.tar.gz gcc-b18a97e5dd0935e1c4a626c230f21457d0aad3d5.tar.bz2