Add pre_reload splitter to detect fp min/max pattern.

We have ix86_expand_sse_fp_minmax to detect min/max sematics, but it requires rtx_equal_p for cmp_op0/cmp_op1 and if_true/if_false, for the testcase in the PR, there's an extra move from cmp_op0 to if_true, and it failed ix86_expand_sse_fp_minmax. This patch adds pre_reload splitter to detect the min/max pattern. Operands order in MINSS matters for signed zero and NANs, since the instruction always returns second operand when any operand is NAN or both operands are zero. gcc/ChangeLog: PR target/110170 * config/i386/i386.md (*ieee_max<mode>3_1): New pre_reload splitter to detect fp max pattern. (*ieee_min<mode>3_1): Ditto, but for fp min pattern. gcc/testsuite/ChangeLog: * g++.target/i386/pr110170.C: New test. * gcc.target/i386/pr110170.c: New test.
author: liuhongt <hongtao.liu@intel.com> 2023-07-03 18:19:19 +0800
committer: liuhongt <hongtao.liu@intel.com> 2023-07-10 09:06:24 +0800
commit: d41a57c46df6f8f7dae0c0a8b349e734806a837b (patch)
tree: ed9ee22f29d6bef05e99bc4e64a63a820132d982 /gcc
parent: 361a6fc4bc5d3073e8e19fba0af51380720e677a (diff)
download: gcc-d41a57c46df6f8f7dae0c0a8b349e734806a837b.zip
gcc-d41a57c46df6f8f7dae0c0a8b349e734806a837b.tar.gz
gcc-d41a57c46df6f8f7dae0c0a8b349e734806a837b.tar.bz2
3 files changed, 154 insertions, 0 deletions
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index e47ced1..621cdd9 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -23163,6 +23163,49 @@
    (set_attr "type" "sseadd")
    (set_attr "mode" "<MODE>")])
 
+;; Operands order in min/max instruction matters for signed zero and NANs.
+(define_insn_and_split "*ieee_max<mode>3_1"
+  [(set (match_operand:MODEF 0 "register_operand")
+	(unspec:MODEF
+	  [(match_operand:MODEF 1 "register_operand")
+	   (match_operand:MODEF 2 "register_operand")
+	   (lt:MODEF
+	     (match_operand:MODEF 3 "register_operand")
+	     (match_operand:MODEF 4 "register_operand"))]
+	  UNSPEC_BLENDV))]
+  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+  && (rtx_equal_p (operands[1], operands[3])
+      && rtx_equal_p (operands[2], operands[4]))
+  && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+	(unspec:MODEF
+	  [(match_dup 2)
+	   (match_dup 1)]
+	 UNSPEC_IEEE_MAX))])
+
+(define_insn_and_split "*ieee_min<mode>3_1"
+  [(set (match_operand:MODEF 0 "register_operand")
+	(unspec:MODEF
+	  [(match_operand:MODEF 1 "register_operand")
+	   (match_operand:MODEF 2 "register_operand")
+	   (lt:MODEF
+	     (match_operand:MODEF 3 "register_operand")
+	     (match_operand:MODEF 4 "register_operand"))]
+	  UNSPEC_BLENDV))]
+  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+  && (rtx_equal_p (operands[1], operands[4])
+      && rtx_equal_p (operands[2], operands[3]))
+  && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+	(unspec:MODEF
+	  [(match_dup 2)
+	   (match_dup 1)]
+	 UNSPEC_IEEE_MIN))])
+
 ;; Make two stack loads independent:
 ;;   fld aa              fld aa
 ;;   fld %st(0)     ->   fld bb
diff --git a/gcc/testsuite/g++.target/i386/pr110170.C b/gcc/testsuite/g++.target/i386/pr110170.C
new file mode 100644
index 0000000..e638b12
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr110170.C
@@ -0,0 +1,90 @@
+/* { dg-do run { target sse4 } } */
+/* { dg-options " -O2 -msse4.1 -mfpmath=sse -std=gnu++20" } */
+#include <math.h>
+
+#ifndef CHECK_H
+#define CHECK_H "sse4_1-check.h"
+#endif
+
+#ifndef TEST
+#define TEST sse4_1_test
+#endif
+
+#include CHECK_H
+
+void
+__attribute__((noinline))
+__cond_swap(double* __x, double* __y) {
+  bool __r = (*__x < *__y);
+  auto __tmp = __r ? *__x : *__y;
+  *__y = __r ? *__y : *__x;
+  *__x = __tmp;
+}
+
+auto test1() {
+    double nan = -0.0;
+    double x = 0.0;
+    __cond_swap(&nan, &x);
+    return x == -0.0 && nan == 0.0;
+}
+
+auto test1r() {
+    double nan = NAN;
+    double x = 1.0;
+    __cond_swap(&x, &nan);
+    return isnan(x) && signbit(x) == 0 && nan == 1.0;
+}
+
+auto test2() {
+    double nan = NAN;
+    double x = -1.0;
+    __cond_swap(&nan, &x);
+    return isnan(x) && signbit(x) == 0 && nan == -1.0;
+}
+
+auto test2r() {
+    double nan = NAN;
+    double x = -1.0;
+    __cond_swap(&x, &nan);
+    return isnan(x) && signbit(x) == 0 && nan == -1.0;
+}
+
+auto test3() {
+    double nan = -NAN;
+    double x = 1.0;
+    __cond_swap(&nan, &x);
+    return isnan(x) && signbit(x) == 1 && nan == 1.0;
+}
+
+auto test3r() {
+    double nan = -NAN;
+    double x = 1.0;
+    __cond_swap(&x, &nan);
+    return isnan(x) && signbit(x) == 1 && nan == 1.0;
+}
+
+auto test4() {
+    double nan = -NAN;
+    double x = -1.0;
+    __cond_swap(&nan, &x);
+    return isnan(x) && signbit(x) == 1 && nan == -1.0;
+}
+
+auto test4r() {
+    double nan = -NAN;
+    double x = -1.0;
+    __cond_swap(&x, &nan);
+    return isnan(x) && signbit(x) == 1 && nan == -1.0;
+}
+
+
+static void
+TEST()
+{
+  if (
+      !test1() || !test1r()
+      || !test2() || !test2r()
+      || !test3() || !test4r()
+      || !test4() || !test4r()
+      ) __builtin_abort();
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr110170.c b/gcc/testsuite/gcc.target/i386/pr110170.c
new file mode 100644
index 0000000..c72f733
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr110170.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options " -O2 -msse4.1 -mfpmath=sse" } */
+/* { dg-final { scan-assembler-times {(?n)mins[sd]} 2 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times {(?n)maxs[sd]} 2 { target { ! ia32 } } } } */
+/* Ideally cond_swap_df is also optimized to minsd/maxsd.  */
+/* { dg-final { scan-assembler-times {(?n)mins[sd]} 1 { target ia32 } } } */
+/* { dg-final { scan-assembler-times {(?n)maxs[sd]} 1 { target ia32 } } } */
+
+void __cond_swap_df(double* __x, double* __y) {
+  _Bool __r = (*__x < *__y);
+  double __tmp = __r ? *__x : *__y;
+  *__y = __r ? *__y : *__x;
+  *__x = __tmp;
+}
+
+void __cond_swap_sf(float* __x, float* __y) {
+  _Bool __r = (*__x < *__y);
+  float __tmp = __r ? *__x : *__y;
+  *__y = __r ? *__y : *__x;
+  *__x = __tmp;
+}
author	liuhongt <hongtao.liu@intel.com>	2023-07-03 18:19:19 +0800
committer	liuhongt <hongtao.liu@intel.com>	2023-07-10 09:06:24 +0800
commit	d41a57c46df6f8f7dae0c0a8b349e734806a837b (patch)
tree	ed9ee22f29d6bef05e99bc4e64a63a820132d982 /gcc
parent	361a6fc4bc5d3073e8e19fba0af51380720e677a (diff)
download	gcc-d41a57c46df6f8f7dae0c0a8b349e734806a837b.zip gcc-d41a57c46df6f8f7dae0c0a8b349e734806a837b.tar.gz gcc-d41a57c46df6f8f7dae0c0a8b349e734806a837b.tar.bz2