diff options
author | Jakub Jelinek <jakub@redhat.com> | 2022-01-17 13:39:05 +0100 |
---|---|---|
committer | Jakub Jelinek <jakub@redhat.com> | 2022-01-17 13:39:05 +0100 |
commit | 463d9108766dcbb6a1051985e6c840a46897fe10 (patch) | |
tree | 5e83cb1ed249317106bf8059c1eb133fdf0cb84f /gcc/config | |
parent | 4152e4ad3f3a67ce30f5e0e01d5eba03fcff10b8 (diff) | |
download | gcc-463d9108766dcbb6a1051985e6c840a46897fe10.zip gcc-463d9108766dcbb6a1051985e6c840a46897fe10.tar.gz gcc-463d9108766dcbb6a1051985e6c840a46897fe10.tar.bz2 |
widening_mul, i386: Improve spaceship expansion on x86 [PR103973]
C++20:
#include <compare>
auto cmp4way(double a, double b)
{
return a <=> b;
}
expands to:
ucomisd %xmm1, %xmm0
jp .L8
movl $0, %eax
jne .L8
.L2:
ret
.p2align 4,,10
.p2align 3
.L8:
comisd %xmm0, %xmm1
movl $-1, %eax
ja .L2
ucomisd %xmm1, %xmm0
setbe %al
addl $1, %eax
ret
That is 3 comparisons of the same operands.
The following patch improves it to just one comparison:
comisd %xmm1, %xmm0
jp .L4
seta %al
movl $0, %edx
leal -1(%rax,%rax), %eax
cmove %edx, %eax
ret
.L4:
movl $2, %eax
ret
While a <=> b expands to a == b ? 0 : a < b ? -1 : a > b ? 1 : 2
where the first comparison is equality and this shouldn't raise
exceptions on qNaN operands, if the operands aren't equal (which
includes unordered cases), then it immediately performs < or >
comparison and that raises exceptions even on qNaNs, so we can just
perform a single comparison that raises exceptions on qNaN.
As the 4 different cases are encoded as
ZF CF PF
1 1 1 a unordered b
0 0 0 a > b
0 1 0 a < b
1 0 0 a == b
we can emit optimal sequence of comparions, first jp
for the unordered case, then je for the == case and finally jb
for the < case.
The patch pattern recognizes spaceship-like comparisons during
widening_mul if the spaceship optab is implemented, and replaces
those comparisons with comparisons of .SPACESHIP ifn which returns
-1/0/1/2 based on the comparison. This seems to work well both for the
case of just returning the -1/0/1/2 (when we have just a common
successor with a PHI) or when the different cases are handled with
various other basic blocks. The testcases cover both of those cases,
the latter with different function calls in those.
2022-01-17 Jakub Jelinek <jakub@redhat.com>
PR target/103973
* tree-cfg.h (cond_only_block_p): Declare.
* tree-ssa-phiopt.c (cond_only_block_p): Move function to ...
* tree-cfg.c (cond_only_block_p): ... here. No longer static.
* optabs.def (spaceship_optab): New optab.
* internal-fn.def (SPACESHIP): New internal function.
* internal-fn.h (expand_SPACESHIP): Declare.
* internal-fn.c (expand_PHI): Formatting fix.
(expand_SPACESHIP): New function.
* tree-ssa-math-opts.c (optimize_spaceship): New function.
(math_opts_dom_walker::after_dom_children): Use it.
* config/i386/i386.md (spaceship<mode>3): New define_expand.
* config/i386/i386-protos.h (ix86_expand_fp_spaceship): Declare.
* config/i386/i386-expand.c (ix86_expand_fp_spaceship): New function.
* doc/md.texi (spaceship@var{m}3): Document.
* gcc.target/i386/pr103973-1.c: New test.
* gcc.target/i386/pr103973-2.c: New test.
* gcc.target/i386/pr103973-3.c: New test.
* gcc.target/i386/pr103973-4.c: New test.
* gcc.target/i386/pr103973-5.c: New test.
* gcc.target/i386/pr103973-6.c: New test.
* gcc.target/i386/pr103973-7.c: New test.
* gcc.target/i386/pr103973-8.c: New test.
* gcc.target/i386/pr103973-9.c: New test.
* gcc.target/i386/pr103973-10.c: New test.
* gcc.target/i386/pr103973-11.c: New test.
* gcc.target/i386/pr103973-12.c: New test.
* gcc.target/i386/pr103973-13.c: New test.
* gcc.target/i386/pr103973-14.c: New test.
* gcc.target/i386/pr103973-15.c: New test.
* gcc.target/i386/pr103973-16.c: New test.
* gcc.target/i386/pr103973-17.c: New test.
* gcc.target/i386/pr103973-18.c: New test.
* gcc.target/i386/pr103973-19.c: New test.
* gcc.target/i386/pr103973-20.c: New test.
* g++.target/i386/pr103973-1.C: New test.
* g++.target/i386/pr103973-2.C: New test.
* g++.target/i386/pr103973-3.C: New test.
* g++.target/i386/pr103973-4.C: New test.
* g++.target/i386/pr103973-5.C: New test.
* g++.target/i386/pr103973-6.C: New test.
* g++.target/i386/pr103973-7.C: New test.
* g++.target/i386/pr103973-8.C: New test.
* g++.target/i386/pr103973-9.C: New test.
* g++.target/i386/pr103973-10.C: New test.
* g++.target/i386/pr103973-11.C: New test.
* g++.target/i386/pr103973-12.C: New test.
* g++.target/i386/pr103973-13.C: New test.
* g++.target/i386/pr103973-14.C: New test.
* g++.target/i386/pr103973-15.C: New test.
* g++.target/i386/pr103973-16.C: New test.
* g++.target/i386/pr103973-17.C: New test.
* g++.target/i386/pr103973-18.C: New test.
* g++.target/i386/pr103973-19.C: New test.
* g++.target/i386/pr103973-20.C: New test.
Diffstat (limited to 'gcc/config')
-rw-r--r-- | gcc/config/i386/i386-expand.c | 49 | ||||
-rw-r--r-- | gcc/config/i386/i386-protos.h | 1 | ||||
-rw-r--r-- | gcc/config/i386/i386.md | 22 |
3 files changed, 72 insertions, 0 deletions
diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c index 138580d..5fae422 100644 --- a/gcc/config/i386/i386-expand.c +++ b/gcc/config/i386/i386-expand.c @@ -2879,6 +2879,55 @@ ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1) emit_insn (gen_rtx_SET (dest, ret)); } +/* Expand floating point op0 <=> op1, i.e. + dest = op0 == op1 ? 0 : op0 < op1 ? -1 : op0 > op1 ? 1 : 2. */ + +void +ix86_expand_fp_spaceship (rtx dest, rtx op0, rtx op1) +{ + gcc_checking_assert (ix86_fp_comparison_strategy (GT) != IX86_FPCMP_ARITH); + rtx gt = ix86_expand_fp_compare (GT, op0, op1); + rtx l0 = gen_label_rtx (); + rtx l1 = gen_label_rtx (); + rtx l2 = TARGET_IEEE_FP ? gen_label_rtx () : NULL_RTX; + rtx lend = gen_label_rtx (); + rtx tmp; + rtx_insn *jmp; + if (l2) + { + rtx un = gen_rtx_fmt_ee (UNORDERED, VOIDmode, + gen_rtx_REG (CCFPmode, FLAGS_REG), const0_rtx); + tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, un, + gen_rtx_LABEL_REF (VOIDmode, l2), pc_rtx); + jmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp)); + add_reg_br_prob_note (jmp, profile_probability:: very_unlikely ()); + } + rtx eq = gen_rtx_fmt_ee (UNEQ, VOIDmode, + gen_rtx_REG (CCFPmode, FLAGS_REG), const0_rtx); + tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, eq, + gen_rtx_LABEL_REF (VOIDmode, l0), pc_rtx); + jmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp)); + add_reg_br_prob_note (jmp, profile_probability::unlikely ()); + tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, gt, + gen_rtx_LABEL_REF (VOIDmode, l1), pc_rtx); + jmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp)); + add_reg_br_prob_note (jmp, profile_probability::even ()); + emit_move_insn (dest, constm1_rtx); + emit_jump (lend); + emit_label (l0); + emit_move_insn (dest, const0_rtx); + emit_jump (lend); + emit_label (l1); + emit_move_insn (dest, const1_rtx); + emit_jump (lend); + if (l2) + { + emit_label (l2); + emit_move_insn (dest, const2_rtx); + } + emit_label (lend); +} + /* Expand comparison setting or clearing carry flag. Return true when successful and set pop for the operation. */ static bool diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index 6d14a40..a697dd2 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -150,6 +150,7 @@ extern bool ix86_expand_int_vec_cmp (rtx[]); extern bool ix86_expand_fp_vec_cmp (rtx[]); extern void ix86_expand_sse_movcc (rtx, rtx, rtx, rtx); extern void ix86_expand_sse_unpack (rtx, rtx, bool, bool); +extern void ix86_expand_fp_spaceship (rtx, rtx, rtx); extern bool ix86_expand_int_addcc (rtx[]); extern rtx_insn *ix86_expand_call (rtx, rtx, rtx, rtx, rtx, bool); extern bool ix86_call_use_plt_p (rtx); diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 7b16943..a2f095e 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -23886,6 +23886,28 @@ [(set_attr "type" "other") (set_attr "length" "4")]) +;; Spaceship optimization +(define_expand "spaceship<mode>3" + [(match_operand:SI 0 "register_operand") + (match_operand:MODEF 1 "cmp_fp_expander_operand") + (match_operand:MODEF 2 "cmp_fp_expander_operand")] + "(TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)) + && (TARGET_CMOVE || (TARGET_SAHF && TARGET_USE_SAHF))" +{ + ix86_expand_fp_spaceship (operands[0], operands[1], operands[2]); + DONE; +}) + +(define_expand "spaceshipxf3" + [(match_operand:SI 0 "register_operand") + (match_operand:XF 1 "nonmemory_operand") + (match_operand:XF 2 "nonmemory_operand")] + "TARGET_80387 && (TARGET_CMOVE || (TARGET_SAHF && TARGET_USE_SAHF))" +{ + ix86_expand_fp_spaceship (operands[0], operands[1], operands[2]); + DONE; +}) + (include "mmx.md") (include "sse.md") (include "sync.md") |