aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJakub Jelinek <jakub@redhat.com>2020-12-10 12:03:30 +0100
committerJakub Jelinek <jakub@redhat.com>2020-12-10 12:03:30 +0100
commit680e4202f23ce74f3b26c7f090b9d22a56765554 (patch)
treee246cb295db95cea8703e7efe822b6241a81ba82
parent66dea8899df6475d5cb289491dbbff307c16c1a7 (diff)
downloadgcc-680e4202f23ce74f3b26c7f090b9d22a56765554.zip
gcc-680e4202f23ce74f3b26c7f090b9d22a56765554.tar.gz
gcc-680e4202f23ce74f3b26c7f090b9d22a56765554.tar.bz2
dojump: Improve float != comparisons on x86 [PR98212]
The x86 backend doesn't have EQ or NE floating point comparisons, so splits x != y into x unord y || x <> y. The problem with that is that unord comparison doesn't trap on qNaN operands but LTGT does. The end effect is that it doesn't trap on qNaN operands, because x unord y will be true for those and so LTGT will not be performed, but as the backend is currently unable to merge signalling and non-signalling comparisons (and after all, with this exact exception it shouldn't unless the first one is signalling and the second one is non-signalling) it means we end up with: ucomiss %xmm1, %xmm0 jp .L4 comiss %xmm1, %xmm0 jne .L4 ret .p2align 4,,10 .p2align 3 .L4: xorl %eax, %eax jmp foo where the comiss is the signalling comparison, but we already know that the right flags bits are already computed by the ucomiss insn. The following patch, if target supports UNEQ comparisons, splits NE as x unord y || !(x uneq y) instead, which in the end means we end up with just: ucomiss %xmm1, %xmm0 jp .L4 jne .L4 ret .p2align 4,,10 .p2align 3 .L4: jmp foo because UNEQ is like UNORDERED non-signalling. 2020-12-10 Jakub Jelinek <jakub@redhat.com> PR rtl-optimization/98212 * dojump.c (do_compare_rtx_and_jump): When splitting NE and backend can do UNEQ, prefer splitting x != y into x unord y || !(x uneq y) instead of into x unord y || x ltgt y. * gcc.target/i386/pr98212.c: New test.
-rw-r--r--gcc/dojump.c17
-rw-r--r--gcc/testsuite/gcc.target/i386/pr98212.c21
2 files changed, 38 insertions, 0 deletions
diff --git a/gcc/dojump.c b/gcc/dojump.c
index 4c7fafe..b12bcea 100644
--- a/gcc/dojump.c
+++ b/gcc/dojump.c
@@ -1168,6 +1168,23 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
profile_probability first_prob = prob.split (cprob);
do_compare_rtx_and_jump (op0, op1, first_code, unsignedp, mode,
size, NULL, if_true_label, first_prob);
+ if (orig_code == NE && can_compare_p (UNEQ, mode, ccp_jump))
+ {
+ /* x != y can be split into x unord y || x ltgt y
+ or x unord y || !(x uneq y). The latter has the
+ advantage that both comparisons are non-signalling and
+ so there is a higher chance that the RTL optimizations
+ merge the two comparisons into just one. */
+ code = UNEQ;
+ prob = prob.invert ();
+ if (! if_false_label)
+ {
+ if (! dummy_label)
+ dummy_label = gen_label_rtx ();
+ if_false_label = dummy_label;
+ }
+ std::swap (if_false_label, if_true_label);
+ }
}
}
}
diff --git a/gcc/testsuite/gcc.target/i386/pr98212.c b/gcc/testsuite/gcc.target/i386/pr98212.c
new file mode 100644
index 0000000..b8ed023
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr98212.c
@@ -0,0 +1,21 @@
+/* PR rtl-optimization/98212 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse2 -mfpmath=sse -mno-avx" } */
+/* { dg-final { scan-assembler-times "\tucomiss\t" 2 } } */
+/* { dg-final { scan-assembler-not "\tcomiss\t" } } */
+
+void foo (void);
+
+void
+bar (float a, float b)
+{
+ if (a != b)
+ foo ();
+}
+
+void
+baz (float a, float b)
+{
+ if (a == b)
+ foo ();
+}