expansion: Further improve double-word modulo, division and divmod [PR97459]

The following patch implements what Thomas wrote about, in particular that we can handle also double-word divison by the constants for which the earlier patch optimized modulo (if it would be otherwise a library call) and that we can also easily handle such constants shifted to the left. Unfortunately, seems CSE isn't able to optimize away the two almost identical sequences (one to compute remainder, one to compute quotient), probably because of the ADD_OVERFLOW introduced jumps, so the patch also adjusts expand_DIVMOD. 2020-12-02 Jakub Jelinek <jakub@redhat.com> PR rtl-optimization/97459 * optabs.h (expand_doubleword_divmod): Declare. * optabs.c (expand_doubleword_divmod): New function. (expand_binop): Use it. * internal-fn.c (expand_DIVMOD): Likewise. * gcc.target/i386/pr97282.c (foo): Use 123456 divisor instead of 10. * gcc.dg/pr97459-1.c (TESTS): Add tests for 10, 12 and 6144. * gcc.dg/pr97459-2.c (TESTS): Likewise. * gcc.dg/pr97459-3.c: New test. * gcc.dg/pr97459-4.c: New test. * gcc.dg/pr97459-5.c: New test. * gcc.dg/pr97459-6.c: New test.
author: Jakub Jelinek <jakub@redhat.com> 2020-12-02 11:32:19 +0100
committer: Jakub Jelinek <jakub@redhat.com> 2020-12-02 11:40:47 +0100
commit: 037fe26ee1ac18581bf0ad646d48591c97d10bd3 (patch)
tree: a6f69e57d3e6391e15795c1382babe8437afd09b /gcc/optabs.c
parent: 337d6362458ab033d3bfe287dda37f9da5577406 (diff)
download: gcc-037fe26ee1ac18581bf0ad646d48591c97d10bd3.zip
gcc-037fe26ee1ac18581bf0ad646d48591c97d10bd3.tar.gz
gcc-037fe26ee1ac18581bf0ad646d48591c97d10bd3.tar.bz2
1 files changed, 120 insertions, 12 deletions
diff --git a/gcc/optabs.c b/gcc/optabs.c
index 3b116d3..41daa48 100644
--- a/gcc/optabs.c
+++ b/gcc/optabs.c
@@ -1118,6 +1118,99 @@ expand_doubleword_mod (machine_mode mode, rtx op0, rtx op1, bool unsignedp)
     }
   return NULL_RTX;
 }
+
+/* Similarly to the above function, but compute both quotient and remainder.
+   Quotient can be computed from the remainder as:
+   rem = op0 % op1;  // Handled using expand_doubleword_mod
+   quot = (op0 - rem) * inv; // inv is multiplicative inverse of op1 modulo
+			     // 2 * BITS_PER_WORD
+
+   We can also handle cases where op1 is a multiple of power of two constant
+   and constant handled by expand_doubleword_mod.
+   op11 = 1 << __builtin_ctz (op1);
+   op12 = op1 / op11;
+   rem1 = op0 % op12;  // Handled using expand_doubleword_mod
+   quot1 = (op0 - rem1) * inv; // inv is multiplicative inverse of op12 modulo
+			       // 2 * BITS_PER_WORD
+   rem = (quot1 % op11) * op12 + rem1;
+   quot = quot1 / op11;  */
+
+rtx
+expand_doubleword_divmod (machine_mode mode, rtx op0, rtx op1, rtx *rem,
+			  bool unsignedp)
+{
+  *rem = NULL_RTX;
+
+  /* Negative dividend should have been optimized into positive,
+     similarly modulo by 1 and modulo by power of two is optimized
+     differently too.  */
+  if (INTVAL (op1) <= 1 || pow2p_hwi (INTVAL (op1)))
+    return NULL_RTX;
+
+  rtx op11 = const1_rtx;
+  rtx op12 = op1;
+  if ((INTVAL (op1) & 1) == 0)
+    {
+      int bit = ctz_hwi (INTVAL (op1));
+      op11 = GEN_INT (HOST_WIDE_INT_1 << bit);
+      op12 = GEN_INT (INTVAL (op1) >> bit);
+    }
+
+  rtx rem1 = expand_doubleword_mod (mode, op0, op12, unsignedp);
+  if (rem1 == NULL_RTX)
+    return NULL_RTX;
+
+  int prec = 2 * BITS_PER_WORD;
+  wide_int a = wide_int::from (INTVAL (op12), prec + 1, UNSIGNED);
+  wide_int b = wi::shifted_mask (prec, 1, false, prec + 1);
+  wide_int m = wide_int::from (wi::mod_inv (a, b), prec, UNSIGNED);
+  rtx inv = immed_wide_int_const (m, mode);
+
+  rtx_insn *last = get_last_insn ();
+  rtx quot1 = expand_simple_binop (mode, MINUS, op0, rem1,
+				   NULL_RTX, unsignedp, OPTAB_DIRECT);
+  if (quot1 == NULL_RTX)
+    return NULL_RTX;
+
+  quot1 = expand_simple_binop (mode, MULT, quot1, inv,
+			       NULL_RTX, unsignedp, OPTAB_DIRECT);
+  if (quot1 == NULL_RTX)
+    return NULL_RTX;
+
+  if (op11 != const1_rtx)
+    {
+      rtx rem2 = expand_divmod (1, TRUNC_MOD_EXPR, mode, quot1, op11,
+				NULL_RTX, unsignedp);
+      if (rem2 == NULL_RTX)
+	return NULL_RTX;
+
+      rem2 = expand_simple_binop (mode, MULT, rem2, op12, NULL_RTX,
+				  unsignedp, OPTAB_DIRECT);
+      if (rem2 == NULL_RTX)
+	return NULL_RTX;
+
+      rem2 = expand_simple_binop (mode, PLUS, rem2, rem1, NULL_RTX,
+				  unsignedp, OPTAB_DIRECT);
+      if (rem2 == NULL_RTX)
+	return NULL_RTX;
+
+      rtx quot2 = expand_divmod (0, TRUNC_DIV_EXPR, mode, quot1, op11,
+				 NULL_RTX, unsignedp);
+      if (quot2 == NULL_RTX)
+	return NULL_RTX;
+
+      rem1 = rem2;
+      quot1 = quot2;
+    }
+
+  /* Punt if we need any library calls.  */
+  for (; last; last = NEXT_INSN (last))
+    if (CALL_P (last))
+      return NULL_RTX;
+
+  *rem = rem1;
+  return quot1;
+}
 
 /* Wrapper around expand_binop which takes an rtx code to specify
    the operation to perform, not an optab pointer.  All other
@@ -1999,7 +2092,10 @@ expand_binop (machine_mode mode, optab binoptab, rtx op0, rtx op1,
     }
 
   /* Attempt to synthetize double word modulo by constant divisor.  */
-  if ((binoptab == umod_optab || binoptab == smod_optab)
+  if ((binoptab == umod_optab
+       || binoptab == smod_optab
+       || binoptab == udiv_optab
+       || binoptab == sdiv_optab)
       && optimize
       && CONST_INT_P (op1)
       && is_int_mode (mode, &int_mode)
@@ -2008,21 +2104,33 @@ expand_binop (machine_mode mode, optab binoptab, rtx op0, rtx op1,
       && optab_handler (add_optab, word_mode) != CODE_FOR_nothing
       && optimize_insn_for_speed_p ())
     {
-      rtx remainder = expand_doubleword_mod (int_mode, op0, op1,
-					     binoptab == umod_optab);
-      if (remainder != NULL_RTX)
+      rtx res = NULL_RTX;
+      if ((binoptab == umod_optab || binoptab == smod_optab)
+	  && (INTVAL (op1) & 1) == 0)
+	res = expand_doubleword_mod (int_mode, op0, op1,
+				     binoptab == umod_optab);
+      else
+	{
+	  rtx quot = expand_doubleword_divmod (int_mode, op0, op1, &res,
+					       binoptab == umod_optab
+					       || binoptab == udiv_optab);
+	  if (quot == NULL_RTX)
+	    res = NULL_RTX;
+	  else if (binoptab == udiv_optab || binoptab == sdiv_optab)
+	    res = quot;
+	}
+      if (res != NULL_RTX)
 	{
 	  if (optab_handler (mov_optab, int_mode) != CODE_FOR_nothing)
 	    {
-	      rtx_insn *move = emit_move_insn (target ? target : remainder,
-					       remainder);
-	      set_dst_reg_note (move,
-				REG_EQUAL,
-				gen_rtx_fmt_ee (UMOD, int_mode,
-						copy_rtx (op0), op1),
-				target ? target : remainder);
+	      rtx_insn *move = emit_move_insn (target ? target : res,
+					       res);
+	      set_dst_reg_note (move, REG_EQUAL,
+				gen_rtx_fmt_ee (optab_to_code (binoptab),
+						int_mode, copy_rtx (op0), op1),
+				target ? target : res);
 	    }
-	  return remainder;
+	  return res;
 	}
       else
 	delete_insns_since (last);
author	Jakub Jelinek <jakub@redhat.com>	2020-12-02 11:32:19 +0100
committer	Jakub Jelinek <jakub@redhat.com>	2020-12-02 11:40:47 +0100
commit	037fe26ee1ac18581bf0ad646d48591c97d10bd3 (patch)
tree	a6f69e57d3e6391e15795c1382babe8437afd09b /gcc/optabs.c
parent	337d6362458ab033d3bfe287dda37f9da5577406 (diff)
download	gcc-037fe26ee1ac18581bf0ad646d48591c97d10bd3.zip gcc-037fe26ee1ac18581bf0ad646d48591c97d10bd3.tar.gz gcc-037fe26ee1ac18581bf0ad646d48591c97d10bd3.tar.bz2