ssa-math-opts, i386: Handle most unordered values rather than just 2 [PR116896]

On Mon, Oct 07, 2024 at 10:32:57AM +0200, Richard Biener wrote: > > They are implementation defined, -1, 0, 1, 2 is defined by libstdc++: > > using type = signed char; > > enum class _Ord : type { equivalent = 0, less = -1, greater = 1 }; > > enum class _Ncmp : type { _Unordered = 2 }; > > https://eel.is/c++draft/cmp#categories.pre-1 documents them as > > enum class ord { equal = 0, equivalent = equal, less = -1, greater = 1 }; // exposition only > > enum class ncmp { unordered = -127 }; // exposition only > > and now looking at it, LLVM's libc++ takes that literally and uses > > -1, 0, 1, -127. One can't use <=> operator without including <compare> > > which provides the enums, so I think if all we care about is libstdc++, > > then just hardcoding -1, 0, 1, 2 is fine, if we want to also optimize > > libc++ when used with gcc, we could support -1, 0, 1, -127 as another > > option. > > Supporting arbitrary 4 values doesn't make sense, at least on x86 the > > only reason to do the conversion to int in an optab is a good sequence > > to turn the flag comparisons to -1, 0, 1. So, either we do nothing > > more than the patch, or add handle both 2 and -127 for unordered, > > or add support for arbitrary value for the unordered case except > > -1, 0, 1 (then -1 could mean signed int, 1 unsigned int, 0 do the jumps > > and any other value what should be returned for unordered. Here is an incremental patch which adds support for (almost) arbitrary unordered constant value. It changes the .SPACESHIP and spaceship<mode>4 optab conventions, so 0 means use branches, floating point, -1, 0, 1, 2 results consumed by tree-ssa-math-opts.cc emitted comparisons, -1 means signed int comparisons, -1, 0, 1 results, 1 means unsigned int comparisons, -1, 0, 1 results, and for constant other than -1, 0, 1 which fit into [-128, 127] converted to the PHI type are otherwise specified as the last argument (then it is -1, 0, 1, C results). 2024-10-08 Jakub Jelinek <jakub@redhat.com> PR middle-end/116896 * tree-ssa-math-opts.cc (optimize_spaceship): Handle unordered values other than 2, but they still need to be signed char range possibly converted to the PHI result and can't be in [-1, 1] range. Use last .SPACESHIP argument of 1 for unsigned int comparisons, -1 for signed int, 0 for floating point branches and any other for floating point with that value as unordered. * config/i386/i386-expand.cc (ix86_expand_fp_spaceship): Use op2 rather const2_rtx if op2 is not const0_rtx for unordered result. (ix86_expand_int_spaceship): Change INTVAL (op2) == 1 tests to INTVAL (op2) != -1. * doc/md.texi (spaceship@var{m}4): Document the above changes. * gcc.target/i386/pr116896.c: New test.
author: Jakub Jelinek <jakub@redhat.com> 2024-10-08 10:40:29 +0200
committer: Jakub Jelinek <jakub@gcc.gnu.org> 2024-10-08 10:44:30 +0200
commit: ff889b35935d5e796cf308fb2368d4e319c60ece (patch)
tree: 92879f260210ccd47eb3a6322c46f59b92fb99b4
parent: 9fd38cc5d636124f0611aa5d26ac4258431f164a (diff)
download: gcc-ff889b35935d5e796cf308fb2368d4e319c60ece.zip
gcc-ff889b35935d5e796cf308fb2368d4e319c60ece.tar.gz
gcc-ff889b35935d5e796cf308fb2368d4e319c60ece.tar.bz2
4 files changed, 100 insertions, 16 deletions
diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index 81dd506..3284011 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -3234,7 +3234,7 @@ ix86_expand_fp_spaceship (rtx dest, rtx op0, rtx op1, rtx op2)
   if (l2)
     {
       emit_label (l2);
-      emit_move_insn (dest, const2_rtx);
+      emit_move_insn (dest, op2 == const0_rtx ? const2_rtx : op2);
     }
   emit_label (lend);
 }
@@ -3250,11 +3250,11 @@ ix86_expand_int_spaceship (rtx dest, rtx op0, rtx op1, rtx op2)
      operands nor optimize CC mode - we need a mode usable for both
      LT and GT resp. LTU and GTU comparisons with the same unswapped
      operands.  */
-  rtx flags = gen_rtx_REG (INTVAL (op2) == 1 ? CCGCmode : CCmode, FLAGS_REG);
+  rtx flags = gen_rtx_REG (INTVAL (op2) != 1 ? CCGCmode : CCmode, FLAGS_REG);
   rtx tmp = gen_rtx_COMPARE (GET_MODE (flags), op0, op1);
   emit_insn (gen_rtx_SET (flags, tmp));
   rtx lt_tmp = gen_reg_rtx (QImode);
-  ix86_expand_setcc (lt_tmp, INTVAL (op2) == 1 ? LT : LTU, flags,
+  ix86_expand_setcc (lt_tmp, INTVAL (op2) != 1 ? LT : LTU, flags,
 		     const0_rtx);
   if (GET_MODE (dest) != QImode)
     {
@@ -3264,7 +3264,7 @@ ix86_expand_int_spaceship (rtx dest, rtx op0, rtx op1, rtx op2)
       lt_tmp = tmp;
     }
   rtx gt_tmp = gen_reg_rtx (QImode);
-  ix86_expand_setcc (gt_tmp, INTVAL (op2) == 1 ? GT : GTU, flags,
+  ix86_expand_setcc (gt_tmp, INTVAL (op2) != 1 ? GT : GTU, flags,
 		     const0_rtx);
   if (GET_MODE (dest) != QImode)
     {
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index c58072e..603f74a 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -8575,8 +8575,11 @@ if operand 1 with mode @var{m} compares less than operand 2, equal to
 operand 2, greater than operand 2 or is unordered with operand 2.
 Operand 3 should be @code{const0_rtx} if the result is used in comparisons,
 @code{const1_rtx} if the result is used as integer value and the comparison
-is signed, @code{const2_rtx} if the result is used as integer value and
-the comparison is unsigned.
+is integral unsigned, @code{constm1_rtx} if the result is used as integer
+value and the comparison is integral signed and some other @code{CONST_INT}
+if the result is used as integer value and the comparison is floating point.
+In the last case, instead of setting output operand 0 to 2 for unordered,
+set it to operand 3.
 @var{m} should be a scalar floating point mode.
 
 This pattern is not allowed to @code{FAIL}.
diff --git a/gcc/testsuite/gcc.target/i386/pr116896.c b/gcc/testsuite/gcc.target/i386/pr116896.c
new file mode 100644
index 0000000..9d1bd88
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr116896.c
@@ -0,0 +1,59 @@
+/* PR middle-end/116896 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -masm=att -fno-stack-protector" } */
+/* { dg-final { scan-assembler-times "\tjp\t" 2 } } */
+/* { dg-final { scan-assembler-not "\tj\[^mp\]\[a-z\]*\t" } } */
+/* { dg-final { scan-assembler-times "\tsbb\[bl\]\t\\\$0, " 4 } } */
+/* { dg-final { scan-assembler-times "\tseta\t" 4 } } */
+
+signed char
+foo (float x, float y)
+{
+  if (x == y)
+    return 0;
+  else if (x < y)
+    return -1;
+  else if (x > y)
+    return 1;
+  else
+    return 2;
+}
+
+__attribute__((optimize ("fast-math"))) signed char
+bar (float x, float y)
+{
+  if (x == y)
+    return 0;
+  else if (x < y)
+    return -1;
+  else if (x > y)
+    return 1;
+  else
+    return 2;
+}
+
+signed char
+baz (float x, float y)
+{
+  if (x == y)
+    return 0;
+  else if (x < y)
+    return -1;
+  else if (x > y)
+    return 1;
+  else
+    return -127;
+}
+
+__attribute__((optimize ("fast-math"))) signed char
+qux (float x, float y)
+{
+  if (x == y)
+    return 0;
+  else if (x < y)
+    return -1;
+  else if (x > y)
+    return 1;
+  else
+    return -127;
+}
diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
index 37a3faa..4584d8c 100644
--- a/gcc/tree-ssa-math-opts.cc
+++ b/gcc/tree-ssa-math-opts.cc
@@ -6023,11 +6023,12 @@ optimize_spaceship (gcond *stmt)
 
   /* Check if there is a single bb into which all failed conditions
      jump to (perhaps through an empty block) and if it results in
-     a single integral PHI which just sets it to -1, 0, 1, 2
+     a single integral PHI which just sets it to -1, 0, 1, X
      (or -1, 0, 1 when NaNs can't happen).  In that case use 1 rather
      than 0 as last .SPACESHIP argument to tell backends it might
      consider different code generation and just cast the result
-     of .SPACESHIP to the PHI result.  */
+     of .SPACESHIP to the PHI result.  X above is some value
+     other than -1, 0, 1, for libstdc++ 2, for libc++ -127.  */
   tree arg3 = integer_zero_node;
   edge e = EDGE_SUCC (bb0, 0);
   if (e->dest == bb1)
@@ -6054,6 +6055,7 @@ optimize_spaceship (gcond *stmt)
       && integer_zerop (gimple_phi_arg_def_from_edge (phi, e))
       && EDGE_COUNT (bbp->preds) == (HONOR_NANS (TREE_TYPE (arg1)) ? 4 : 3))
     {
+      HOST_WIDE_INT argval = SCALAR_FLOAT_TYPE_P (TREE_TYPE (arg1)) ? 2 : -1;
       for (unsigned i = 0; phi && i < EDGE_COUNT (bbp->preds) - 1; ++i)
 	{
 	  edge e3 = i == 0 ? e1 : i == 1 ? em1 : e2;
@@ -6071,16 +6073,29 @@ optimize_spaceship (gcond *stmt)
 	  tree a = gimple_phi_arg_def_from_edge (phi, e3);
 	  if (TREE_CODE (a) != INTEGER_CST
 	      || (i == 0 && !integer_onep (a))
-	      || (i == 1 && !integer_all_onesp (a))
-	      || (i == 2 && wi::to_widest (a) != 2))
+	      || (i == 1 && !integer_all_onesp (a)))
 	    {
 	      phi = NULL;
 	      break;
 	    }
+	  if (i == 2)
+	    {
+	      tree minv = TYPE_MIN_VALUE (signed_char_type_node);
+	      tree maxv = TYPE_MAX_VALUE (signed_char_type_node);
+	      widest_int w = widest_int::from (wi::to_wide (a), SIGNED);
+	      if ((w >= -1 && w <= 1)
+		  || w < wi::to_widest (minv)
+		  || w > wi::to_widest (maxv))
+		{
+		  phi = NULL;
+		  break;
+		}
+	      argval = w.to_shwi ();
+	    }
 	}
       if (phi)
 	arg3 = build_int_cst (integer_type_node,
-			      TYPE_UNSIGNED (TREE_TYPE (arg1)) ? 2 : 1);
+			      TYPE_UNSIGNED (TREE_TYPE (arg1)) ? 1 : argval);
     }
 
   /* For integral <=> comparisons only use .SPACESHIP if it is turned
@@ -6094,11 +6109,18 @@ optimize_spaceship (gcond *stmt)
   gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
   gsi_insert_before (&gsi, gc, GSI_SAME_STMT);
 
-  wide_int wm1 = wi::minus_one (TYPE_PRECISION (integer_type_node));
-  wide_int w2 = (HONOR_NANS (TREE_TYPE (arg1))
-		 ? wi::two (TYPE_PRECISION (integer_type_node))
-		 : wi::one (TYPE_PRECISION (integer_type_node)));
-  int_range<1> vr (TREE_TYPE (lhs), wm1, w2);
+  wide_int wmin = wi::minus_one (TYPE_PRECISION (integer_type_node));
+  wide_int wmax = wi::one (TYPE_PRECISION (integer_type_node));
+  if (HONOR_NANS (TREE_TYPE (arg1)))
+    {
+      if (arg3 == integer_zero_node)
+	wmax = wi::two (TYPE_PRECISION (integer_type_node));
+      else if (tree_int_cst_sgn (arg3) < 0)
+	wmin = wi::to_wide (arg3);
+      else
+	wmax = wi::to_wide (arg3);
+    }
+  int_range<1> vr (TREE_TYPE (lhs), wmin, wmax);
   set_range_info (lhs, vr);
 
   if (arg3 != integer_zero_node)
author	Jakub Jelinek <jakub@redhat.com>	2024-10-08 10:40:29 +0200
committer	Jakub Jelinek <jakub@gcc.gnu.org>	2024-10-08 10:44:30 +0200
commit	ff889b35935d5e796cf308fb2368d4e319c60ece (patch)
tree	92879f260210ccd47eb3a6322c46f59b92fb99b4
parent	9fd38cc5d636124f0611aa5d26ac4258431f164a (diff)
download	gcc-ff889b35935d5e796cf308fb2368d4e319c60ece.zip gcc-ff889b35935d5e796cf308fb2368d4e319c60ece.tar.gz gcc-ff889b35935d5e796cf308fb2368d4e319c60ece.tar.bz2