aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Biener <rguenther@suse.de>2023-07-13 08:58:58 +0200
committerRichard Biener <rguenther@suse.de>2023-07-21 09:35:29 +0200
commit9f8f37f5490076b10436993fb90d18092a960922 (patch)
treeb245873ed13d368336da592755a0833451c8bf0c
parent6d449531a60b56ed0f4aeb640aa9e46e4ec35208 (diff)
downloadgcc-9f8f37f5490076b10436993fb90d18092a960922.zip
gcc-9f8f37f5490076b10436993fb90d18092a960922.tar.gz
gcc-9f8f37f5490076b10436993fb90d18092a960922.tar.bz2
tree-optimization/88540 - FP x > y ? x : y if-conversion without -ffast-math
The following makes sure that FP x > y ? x : y style max/min operations are if-converted at the GIMPLE level. While we can neither match it to MAX_EXPR nor .FMAX as both have different semantics with IEEE than the ternary ?: operation we can make sure to maintain this form as a COND_EXPR so backends have the chance to match this to instructions their ISA offers. The patch does this in phiopt where we recognize min/max and instead of giving up when we have to honor NaNs we alter the generated code to a COND_EXPR. This resolves PR88540 and we can then SLP vectorize the min operation for its testcase. It also resolves part of the regressions observed with the change matching bit-inserts of bit-field-refs to vec_perm. Expansion from a COND_EXPR rather than from compare-and-branch gcc.target/i386/pr54855-9.c by producing extra moves while the corresponding min/max operations are now already synthesized by RTL expansion, register selection isn't optimal. This can be also provoked without this change by altering the operand order in the source. I have XFAILed that part of the test. PR tree-optimization/88540 * tree-ssa-phiopt.cc (minmax_replacement): Do not give up with NaNs but handle the simple case by if-converting to a COND_EXPR. * gcc.target/i386/pr88540.c: New testcase. * gcc.target/i386/pr54855-9.c: XFAIL check for redundant moves. * gcc.target/i386/pr54855-12.c: Adjust. * gcc.target/i386/pr54855-13.c: Likewise. * gcc.target/i386/pr110170.c: Likewise. * gcc.dg/tree-ssa/split-path-12.c: Likewise.
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/split-path-12.c4
-rw-r--r--gcc/testsuite/gcc.target/i386/pr110170.c7
-rw-r--r--gcc/testsuite/gcc.target/i386/pr54855-12.c2
-rw-r--r--gcc/testsuite/gcc.target/i386/pr54855-13.c2
-rw-r--r--gcc/testsuite/gcc.target/i386/pr54855-9.c4
-rw-r--r--gcc/testsuite/gcc.target/i386/pr88540.c10
-rw-r--r--gcc/tree-ssa-phiopt.cc21
7 files changed, 35 insertions, 15 deletions
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/split-path-12.c b/gcc/testsuite/gcc.dg/tree-ssa/split-path-12.c
index 19a130d..da00f79 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/split-path-12.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/split-path-12.c
@@ -16,4 +16,6 @@ foo(double *d1, double *d2, double *d3, int num, double *ip)
return dmax[0] + dmax[1] + dmax[2];
}
-/* { dg-final { scan-tree-dump "appears to be optimized to a join point for if-convertable half-diamond" "split-paths" } } */
+/* Split-paths shouldn't do anything here, if there's a diamond it would
+ be if-convertible. */
+/* { dg-final { scan-tree-dump-not "Duplicating join block" "split-paths" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr110170.c b/gcc/testsuite/gcc.target/i386/pr110170.c
index c72f733..9edbf05 100644
--- a/gcc/testsuite/gcc.target/i386/pr110170.c
+++ b/gcc/testsuite/gcc.target/i386/pr110170.c
@@ -1,10 +1,7 @@
/* { dg-do compile } */
/* { dg-options " -O2 -msse4.1 -mfpmath=sse" } */
-/* { dg-final { scan-assembler-times {(?n)mins[sd]} 2 { target { ! ia32 } } } } */
-/* { dg-final { scan-assembler-times {(?n)maxs[sd]} 2 { target { ! ia32 } } } } */
-/* Ideally cond_swap_df is also optimized to minsd/maxsd. */
-/* { dg-final { scan-assembler-times {(?n)mins[sd]} 1 { target ia32 } } } */
-/* { dg-final { scan-assembler-times {(?n)maxs[sd]} 1 { target ia32 } } } */
+/* { dg-final { scan-assembler-times {(?n)mins[sd]} 2 } } */
+/* { dg-final { scan-assembler-times {(?n)maxs[sd]} 2 } } */
void __cond_swap_df(double* __x, double* __y) {
_Bool __r = (*__x < *__y);
diff --git a/gcc/testsuite/gcc.target/i386/pr54855-12.c b/gcc/testsuite/gcc.target/i386/pr54855-12.c
index 2f8af39..09e8ab8 100644
--- a/gcc/testsuite/gcc.target/i386/pr54855-12.c
+++ b/gcc/testsuite/gcc.target/i386/pr54855-12.c
@@ -1,6 +1,6 @@
/* { dg-do compile } */
/* { dg-options "-O2 -mavx512fp16" } */
-/* { dg-final { scan-assembler-times "vmaxsh\[ \\t\]" 1 } } */
+/* { dg-final { scan-assembler-times "vm\[ai\]\[nx\]sh\[ \\t\]" 1 } } */
/* { dg-final { scan-assembler-not "vcomish\[ \\t\]" } } */
/* { dg-final { scan-assembler-not "vmovsh\[ \\t\]" { target { ! ia32 } } } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr54855-13.c b/gcc/testsuite/gcc.target/i386/pr54855-13.c
index 87b4f45..a4f2506 100644
--- a/gcc/testsuite/gcc.target/i386/pr54855-13.c
+++ b/gcc/testsuite/gcc.target/i386/pr54855-13.c
@@ -1,6 +1,6 @@
/* { dg-do compile } */
/* { dg-options "-O2 -mavx512fp16" } */
-/* { dg-final { scan-assembler-times "vmaxsh\[ \\t\]" 1 } } */
+/* { dg-final { scan-assembler-times "vm\[ai\]\[nx\]sh\[ \\t\]" 1 } } */
/* { dg-final { scan-assembler-not "vcomish\[ \\t\]" } } */
/* { dg-final { scan-assembler-not "vmovsh\[ \\t\]" { target { ! ia32 } } } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr54855-9.c b/gcc/testsuite/gcc.target/i386/pr54855-9.c
index 40add5f..fe9302e 100644
--- a/gcc/testsuite/gcc.target/i386/pr54855-9.c
+++ b/gcc/testsuite/gcc.target/i386/pr54855-9.c
@@ -1,8 +1,8 @@
/* { dg-do compile } */
/* { dg-options "-O2 -msse2 -mfpmath=sse" } */
/* { dg-final { scan-assembler-times "minss" 1 } } */
-/* { dg-final { scan-assembler-not "movaps" } } */
-/* { dg-final { scan-assembler-not "movss" } } */
+/* { dg-final { scan-assembler-not "movaps" { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-not "movss" { xfail *-*-* } } } */
typedef float vec __attribute__((vector_size(16)));
diff --git a/gcc/testsuite/gcc.target/i386/pr88540.c b/gcc/testsuite/gcc.target/i386/pr88540.c
new file mode 100644
index 0000000..b927d0c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr88540.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse2" } */
+
+void test(double* __restrict d1, double* __restrict d2, double* __restrict d3)
+{
+ for (int n = 0; n < 2; ++n)
+ d3[n] = d1[n] < d2[n] ? d1[n] : d2[n];
+}
+
+/* { dg-final { scan-assembler "minpd" } } */
diff --git a/gcc/tree-ssa-phiopt.cc b/gcc/tree-ssa-phiopt.cc
index 9d542fd..cb4e2da 100644
--- a/gcc/tree-ssa-phiopt.cc
+++ b/gcc/tree-ssa-phiopt.cc
@@ -1625,10 +1625,6 @@ minmax_replacement (basic_block cond_bb, basic_block middle_bb, basic_block alt_
tree type = TREE_TYPE (PHI_RESULT (phi));
- /* The optimization may be unsafe due to NaNs. */
- if (HONOR_NANS (type) || HONOR_SIGNED_ZEROS (type))
- return false;
-
gcond *cond = as_a <gcond *> (*gsi_last_bb (cond_bb));
enum tree_code cmp = gimple_cond_code (cond);
tree rhs = gimple_cond_rhs (cond);
@@ -1815,6 +1811,9 @@ minmax_replacement (basic_block cond_bb, basic_block middle_bb, basic_block alt_
else
return false;
}
+ else if (HONOR_NANS (type) || HONOR_SIGNED_ZEROS (type))
+ /* The optimization may be unsafe due to NaNs. */
+ return false;
else if (middle_bb != alt_middle_bb && threeway_p)
{
/* Recognize the following case:
@@ -2148,7 +2147,19 @@ minmax_replacement (basic_block cond_bb, basic_block middle_bb, basic_block alt_
/* Emit the statement to compute min/max. */
gimple_seq stmts = NULL;
tree phi_result = PHI_RESULT (phi);
- result = gimple_build (&stmts, minmax, TREE_TYPE (phi_result), arg0, arg1);
+
+ /* When we can't use a MIN/MAX_EXPR still make sure the expression
+ stays in a form to be recognized by ISA that map to IEEE x > y ? x : y
+ semantics (that's not IEEE max semantics). */
+ if (HONOR_NANS (type) || HONOR_SIGNED_ZEROS (type))
+ {
+ result = gimple_build (&stmts, cmp, boolean_type_node,
+ gimple_cond_lhs (cond), rhs);
+ result = gimple_build (&stmts, COND_EXPR, TREE_TYPE (phi_result),
+ result, arg_true, arg_false);
+ }
+ else
+ result = gimple_build (&stmts, minmax, TREE_TYPE (phi_result), arg0, arg1);
gsi = gsi_last_bb (cond_bb);
gsi_insert_seq_before (&gsi, stmts, GSI_NEW_STMT);