aarch64: Fix subs_compare_2.c regression [PR100874]

subs_compare_2.c tests that we can use a SUBS+CSEL sequence for: unsigned int foo (unsigned int a, unsigned int b) { unsigned int x = a - 4; if (a < 4) return x; else return 0; } As Andrew notes in the PR, this is effectively MIN (x, 4) - 4, and it is now recognised as such by phiopt. Previously it was if-converted in RTL instead. I tried to look for ways to generalise this to other situations and to other ?:-style operations, not just max and min. However, for general ?: we tend to push an outer “- CST” into the arms of the ?: -- at least if one of them simplifies -- so I didn't find any useful abstraction. This patch therefore adds a pattern specifically for max/min(a,cst)-cst. I'm not thrilled at having to do this, but it seems like the least worst fix in the circumstances. Also, max(a,cst)-cst for unsigned a is a useful saturating subtraction idiom and so is arguably worth its own code for that reason. gcc/ PR target/100874 * config/aarch64/aarch64-protos.h (aarch64_maxmin_plus_const): Declare. * config/aarch64/aarch64.cc (aarch64_maxmin_plus_const): New function. * config/aarch64/aarch64.md (*aarch64_minmax_plus): New pattern. gcc/testsuite/ * gcc.target/aarch64/max_plus_1.c: New test. * gcc.target/aarch64/max_plus_2.c: Likewise. * gcc.target/aarch64/max_plus_3.c: Likewise. * gcc.target/aarch64/max_plus_4.c: Likewise. * gcc.target/aarch64/max_plus_5.c: Likewise. * gcc.target/aarch64/max_plus_6.c: Likewise. * gcc.target/aarch64/max_plus_7.c: Likewise. * gcc.target/aarch64/min_plus_1.c: Likewise. * gcc.target/aarch64/min_plus_2.c: Likewise. * gcc.target/aarch64/min_plus_3.c: Likewise. * gcc.target/aarch64/min_plus_4.c: Likewise. * gcc.target/aarch64/min_plus_5.c: Likewise. * gcc.target/aarch64/min_plus_6.c: Likewise. * gcc.target/aarch64/min_plus_7.c: Likewise.
author: Richard Sandiford <richard.sandiford@arm.com> 2022-02-15 18:09:35 +0000
committer: Richard Sandiford <richard.sandiford@arm.com> 2022-02-15 18:09:35 +0000
commit: 8e84b2b37a541b27feea69769fc314d534464ebd (patch)
tree: 9a14883947209d80f42453ac8c91ccfdce5090be /gcc/config
parent: 65df3aefaa6c0735bd46ffdc7b5018a8b4020ab8 (diff)
download: gcc-8e84b2b37a541b27feea69769fc314d534464ebd.zip
gcc-8e84b2b37a541b27feea69769fc314d534464ebd.tar.gz
gcc-8e84b2b37a541b27feea69769fc314d534464ebd.tar.bz2
3 files changed, 132 insertions, 0 deletions
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 392efa0..d0e78d6 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -939,6 +939,7 @@ bool aarch64_legitimate_address_p (machine_mode, rtx, bool,
 				   aarch64_addr_query_type = ADDR_QUERY_M);
 machine_mode aarch64_select_cc_mode (RTX_CODE, rtx, rtx);
 rtx aarch64_gen_compare_reg (RTX_CODE, rtx, rtx);
+bool aarch64_maxmin_plus_const (rtx_code, rtx *, bool);
 rtx aarch64_load_tp (rtx);
 
 void aarch64_expand_compare_and_swap (rtx op[]);
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 1a460d4..37ed22bc 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -3781,6 +3781,110 @@ aarch64_gen_compare_reg_maybe_ze (RTX_CODE code, rtx x, rtx y,
   return aarch64_gen_compare_reg (code, x, y);
 }
 
+/* Consider the operation:
+
+     OPERANDS[0] = CODE (OPERANDS[1], OPERANDS[2]) + OPERANDS[3]
+
+   where:
+
+   - CODE is [SU]MAX or [SU]MIN
+   - OPERANDS[2] and OPERANDS[3] are constant integers
+   - OPERANDS[3] is a positive or negative shifted 12-bit immediate
+   - all operands have mode MODE
+
+   Decide whether it is possible to implement the operation using:
+
+     SUBS <tmp>, OPERANDS[1], -OPERANDS[3]
+     or
+     ADDS <tmp>, OPERANDS[1], OPERANDS[3]
+
+   followed by:
+
+     <insn> OPERANDS[0], <tmp>, [wx]zr, <cond>
+
+   where <insn> is one of CSEL, CSINV or CSINC.  Return true if so.
+   If GENERATE_P is true, also update OPERANDS as follows:
+
+     OPERANDS[4] = -OPERANDS[3]
+     OPERANDS[5] = the rtl condition representing <cond>
+     OPERANDS[6] = <tmp>
+     OPERANDS[7] = 0 for CSEL, -1 for CSINV or 1 for CSINC.  */
+bool
+aarch64_maxmin_plus_const (rtx_code code, rtx *operands, bool generate_p)
+{
+  signop sgn = (code == UMAX || code == UMIN ? UNSIGNED : SIGNED);
+  rtx dst = operands[0];
+  rtx maxmin_op = operands[2];
+  rtx add_op = operands[3];
+  machine_mode mode = GET_MODE (dst);
+
+  /* max (x, y) - z == (x >= y + 1 ? x : y) - z
+		    == (x >= y ? x : y) - z
+		    == (x > y ? x : y) - z
+		    == (x > y - 1 ? x : y) - z
+
+     min (x, y) - z == (x <= y - 1 ? x : y) - z
+		    == (x <= y ? x : y) - z
+		    == (x < y ? x : y) - z
+		    == (x < y + 1 ? x : y) - z
+
+     Check whether z is in { y - 1, y, y + 1 } and pick the form(s) for
+     which x is compared with z.  Set DIFF to y - z.  Thus the supported
+     combinations are as follows, with DIFF being the value after the ":":
+
+     max (x, y) - z == x >= y + 1 ? x - (y + 1) : -1   [z == y + 1]
+		    == x >= y ? x - y : 0              [z == y]
+		    == x > y ? x - y : 0               [z == y]
+		    == x > y - 1 ? x - (y - 1) : 1     [z == y - 1]
+
+     min (x, y) - z == x <= y - 1 ? x - (y - 1) : 1    [z == y - 1]
+		    == x <= y ? x - y : 0              [z == y]
+		    == x < y ? x - y : 0               [z == y]
+		    == x < y + 1 ? x - (y + 1) : -1    [z == y + 1].  */
+  auto maxmin_val = rtx_mode_t (maxmin_op, mode);
+  auto add_val = rtx_mode_t (add_op, mode);
+  auto sub_val = wi::neg (add_val);
+  auto diff = wi::sub (maxmin_val, sub_val);
+  if (!(diff == 0
+	|| (diff == 1 && wi::gt_p (maxmin_val, sub_val, sgn))
+	|| (diff == -1 && wi::lt_p (maxmin_val, sub_val, sgn))))
+    return false;
+
+  if (!generate_p)
+    return true;
+
+  rtx_code cmp;
+  switch (code)
+    {
+    case SMAX:
+      cmp = diff == 1 ? GT : GE;
+      break;
+    case UMAX:
+      cmp = diff == 1 ? GTU : GEU;
+      break;
+    case SMIN:
+      cmp = diff == -1 ? LT : LE;
+      break;
+    case UMIN:
+      cmp = diff == -1 ? LTU : LEU;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  rtx cc = gen_rtx_REG (CCmode, CC_REGNUM);
+
+  operands[4] = immed_wide_int_const (sub_val, mode);
+  operands[5] = gen_rtx_fmt_ee (cmp, VOIDmode, cc, const0_rtx);
+  if (can_create_pseudo_p ())
+    operands[6] = gen_reg_rtx (mode);
+  else
+    operands[6] = dst;
+  operands[7] = immed_wide_int_const (diff, mode);
+
+  return true;
+}
+
+
 /* Build the SYMBOL_REF for __tls_get_addr.  */
 
 static GTY(()) rtx tls_get_addr_libfunc;
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 3c72bda..64cc21d 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -4405,6 +4405,33 @@
   }
 )
 
+;; Implement MAX/MIN (A, B) - C using SUBS/ADDS followed by CSEL/CSINV/CSINC.
+;; See aarch64_maxmin_plus_const for details about the supported cases.
+(define_insn_and_split "*aarch64_minmax_plus"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+	(plus:GPI
+	  (MAXMIN:GPI
+	    (match_operand:GPI 1 "register_operand" "r")
+	    (match_operand:GPI 2 "const_int_operand"))
+	  (match_operand:GPI 3 "aarch64_plus_immediate")))
+   (clobber (reg:CC CC_REGNUM))]
+  "aarch64_maxmin_plus_const (<CODE>, operands, false)"
+  "#"
+  "&& 1"
+  [(parallel
+     [(set (reg:CC CC_REGNUM)
+	   (compare:CC (match_dup 1) (match_dup 4)))
+      (set (match_dup 6)
+	   (plus:GPI (match_dup 1) (match_dup 3)))])
+   (set (match_dup 0)
+	(if_then_else:GPI (match_dup 5) (match_dup 6) (match_dup 7)))]
+  {
+    if (!aarch64_maxmin_plus_const (<CODE>, operands, true))
+      gcc_unreachable ();
+  }
+  [(set_attr "length" "8")]
+)
+
 ;; -------------------------------------------------------------------
 ;; Logical operations
 ;; -------------------------------------------------------------------
author	Richard Sandiford <richard.sandiford@arm.com>	2022-02-15 18:09:35 +0000
committer	Richard Sandiford <richard.sandiford@arm.com>	2022-02-15 18:09:35 +0000
commit	8e84b2b37a541b27feea69769fc314d534464ebd (patch)
tree	9a14883947209d80f42453ac8c91ccfdce5090be /gcc/config
parent	65df3aefaa6c0735bd46ffdc7b5018a8b4020ab8 (diff)
download	gcc-8e84b2b37a541b27feea69769fc314d534464ebd.zip gcc-8e84b2b37a541b27feea69769fc314d534464ebd.tar.gz gcc-8e84b2b37a541b27feea69769fc314d534464ebd.tar.bz2