aboutsummaryrefslogtreecommitdiff
path: root/gcc/config
diff options
context:
space:
mode:
authorJakub Jelinek <jakub@redhat.com>2022-01-17 13:39:05 +0100
committerJakub Jelinek <jakub@redhat.com>2022-01-17 13:39:05 +0100
commit463d9108766dcbb6a1051985e6c840a46897fe10 (patch)
tree5e83cb1ed249317106bf8059c1eb133fdf0cb84f /gcc/config
parent4152e4ad3f3a67ce30f5e0e01d5eba03fcff10b8 (diff)
downloadgcc-463d9108766dcbb6a1051985e6c840a46897fe10.zip
gcc-463d9108766dcbb6a1051985e6c840a46897fe10.tar.gz
gcc-463d9108766dcbb6a1051985e6c840a46897fe10.tar.bz2
widening_mul, i386: Improve spaceship expansion on x86 [PR103973]
C++20: #include <compare> auto cmp4way(double a, double b) { return a <=> b; } expands to: ucomisd %xmm1, %xmm0 jp .L8 movl $0, %eax jne .L8 .L2: ret .p2align 4,,10 .p2align 3 .L8: comisd %xmm0, %xmm1 movl $-1, %eax ja .L2 ucomisd %xmm1, %xmm0 setbe %al addl $1, %eax ret That is 3 comparisons of the same operands. The following patch improves it to just one comparison: comisd %xmm1, %xmm0 jp .L4 seta %al movl $0, %edx leal -1(%rax,%rax), %eax cmove %edx, %eax ret .L4: movl $2, %eax ret While a <=> b expands to a == b ? 0 : a < b ? -1 : a > b ? 1 : 2 where the first comparison is equality and this shouldn't raise exceptions on qNaN operands, if the operands aren't equal (which includes unordered cases), then it immediately performs < or > comparison and that raises exceptions even on qNaNs, so we can just perform a single comparison that raises exceptions on qNaN. As the 4 different cases are encoded as ZF CF PF 1 1 1 a unordered b 0 0 0 a > b 0 1 0 a < b 1 0 0 a == b we can emit optimal sequence of comparions, first jp for the unordered case, then je for the == case and finally jb for the < case. The patch pattern recognizes spaceship-like comparisons during widening_mul if the spaceship optab is implemented, and replaces those comparisons with comparisons of .SPACESHIP ifn which returns -1/0/1/2 based on the comparison. This seems to work well both for the case of just returning the -1/0/1/2 (when we have just a common successor with a PHI) or when the different cases are handled with various other basic blocks. The testcases cover both of those cases, the latter with different function calls in those. 2022-01-17 Jakub Jelinek <jakub@redhat.com> PR target/103973 * tree-cfg.h (cond_only_block_p): Declare. * tree-ssa-phiopt.c (cond_only_block_p): Move function to ... * tree-cfg.c (cond_only_block_p): ... here. No longer static. * optabs.def (spaceship_optab): New optab. * internal-fn.def (SPACESHIP): New internal function. * internal-fn.h (expand_SPACESHIP): Declare. * internal-fn.c (expand_PHI): Formatting fix. (expand_SPACESHIP): New function. * tree-ssa-math-opts.c (optimize_spaceship): New function. (math_opts_dom_walker::after_dom_children): Use it. * config/i386/i386.md (spaceship<mode>3): New define_expand. * config/i386/i386-protos.h (ix86_expand_fp_spaceship): Declare. * config/i386/i386-expand.c (ix86_expand_fp_spaceship): New function. * doc/md.texi (spaceship@var{m}3): Document. * gcc.target/i386/pr103973-1.c: New test. * gcc.target/i386/pr103973-2.c: New test. * gcc.target/i386/pr103973-3.c: New test. * gcc.target/i386/pr103973-4.c: New test. * gcc.target/i386/pr103973-5.c: New test. * gcc.target/i386/pr103973-6.c: New test. * gcc.target/i386/pr103973-7.c: New test. * gcc.target/i386/pr103973-8.c: New test. * gcc.target/i386/pr103973-9.c: New test. * gcc.target/i386/pr103973-10.c: New test. * gcc.target/i386/pr103973-11.c: New test. * gcc.target/i386/pr103973-12.c: New test. * gcc.target/i386/pr103973-13.c: New test. * gcc.target/i386/pr103973-14.c: New test. * gcc.target/i386/pr103973-15.c: New test. * gcc.target/i386/pr103973-16.c: New test. * gcc.target/i386/pr103973-17.c: New test. * gcc.target/i386/pr103973-18.c: New test. * gcc.target/i386/pr103973-19.c: New test. * gcc.target/i386/pr103973-20.c: New test. * g++.target/i386/pr103973-1.C: New test. * g++.target/i386/pr103973-2.C: New test. * g++.target/i386/pr103973-3.C: New test. * g++.target/i386/pr103973-4.C: New test. * g++.target/i386/pr103973-5.C: New test. * g++.target/i386/pr103973-6.C: New test. * g++.target/i386/pr103973-7.C: New test. * g++.target/i386/pr103973-8.C: New test. * g++.target/i386/pr103973-9.C: New test. * g++.target/i386/pr103973-10.C: New test. * g++.target/i386/pr103973-11.C: New test. * g++.target/i386/pr103973-12.C: New test. * g++.target/i386/pr103973-13.C: New test. * g++.target/i386/pr103973-14.C: New test. * g++.target/i386/pr103973-15.C: New test. * g++.target/i386/pr103973-16.C: New test. * g++.target/i386/pr103973-17.C: New test. * g++.target/i386/pr103973-18.C: New test. * g++.target/i386/pr103973-19.C: New test. * g++.target/i386/pr103973-20.C: New test.
Diffstat (limited to 'gcc/config')
-rw-r--r--gcc/config/i386/i386-expand.c49
-rw-r--r--gcc/config/i386/i386-protos.h1
-rw-r--r--gcc/config/i386/i386.md22
3 files changed, 72 insertions, 0 deletions
diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
index 138580d..5fae422 100644
--- a/gcc/config/i386/i386-expand.c
+++ b/gcc/config/i386/i386-expand.c
@@ -2879,6 +2879,55 @@ ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
emit_insn (gen_rtx_SET (dest, ret));
}
+/* Expand floating point op0 <=> op1, i.e.
+ dest = op0 == op1 ? 0 : op0 < op1 ? -1 : op0 > op1 ? 1 : 2. */
+
+void
+ix86_expand_fp_spaceship (rtx dest, rtx op0, rtx op1)
+{
+ gcc_checking_assert (ix86_fp_comparison_strategy (GT) != IX86_FPCMP_ARITH);
+ rtx gt = ix86_expand_fp_compare (GT, op0, op1);
+ rtx l0 = gen_label_rtx ();
+ rtx l1 = gen_label_rtx ();
+ rtx l2 = TARGET_IEEE_FP ? gen_label_rtx () : NULL_RTX;
+ rtx lend = gen_label_rtx ();
+ rtx tmp;
+ rtx_insn *jmp;
+ if (l2)
+ {
+ rtx un = gen_rtx_fmt_ee (UNORDERED, VOIDmode,
+ gen_rtx_REG (CCFPmode, FLAGS_REG), const0_rtx);
+ tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, un,
+ gen_rtx_LABEL_REF (VOIDmode, l2), pc_rtx);
+ jmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
+ add_reg_br_prob_note (jmp, profile_probability:: very_unlikely ());
+ }
+ rtx eq = gen_rtx_fmt_ee (UNEQ, VOIDmode,
+ gen_rtx_REG (CCFPmode, FLAGS_REG), const0_rtx);
+ tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, eq,
+ gen_rtx_LABEL_REF (VOIDmode, l0), pc_rtx);
+ jmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
+ add_reg_br_prob_note (jmp, profile_probability::unlikely ());
+ tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, gt,
+ gen_rtx_LABEL_REF (VOIDmode, l1), pc_rtx);
+ jmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
+ add_reg_br_prob_note (jmp, profile_probability::even ());
+ emit_move_insn (dest, constm1_rtx);
+ emit_jump (lend);
+ emit_label (l0);
+ emit_move_insn (dest, const0_rtx);
+ emit_jump (lend);
+ emit_label (l1);
+ emit_move_insn (dest, const1_rtx);
+ emit_jump (lend);
+ if (l2)
+ {
+ emit_label (l2);
+ emit_move_insn (dest, const2_rtx);
+ }
+ emit_label (lend);
+}
+
/* Expand comparison setting or clearing carry flag. Return true when
successful and set pop for the operation. */
static bool
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index 6d14a40..a697dd2 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -150,6 +150,7 @@ extern bool ix86_expand_int_vec_cmp (rtx[]);
extern bool ix86_expand_fp_vec_cmp (rtx[]);
extern void ix86_expand_sse_movcc (rtx, rtx, rtx, rtx);
extern void ix86_expand_sse_unpack (rtx, rtx, bool, bool);
+extern void ix86_expand_fp_spaceship (rtx, rtx, rtx);
extern bool ix86_expand_int_addcc (rtx[]);
extern rtx_insn *ix86_expand_call (rtx, rtx, rtx, rtx, rtx, bool);
extern bool ix86_call_use_plt_p (rtx);
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 7b16943..a2f095e 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -23886,6 +23886,28 @@
[(set_attr "type" "other")
(set_attr "length" "4")])
+;; Spaceship optimization
+(define_expand "spaceship<mode>3"
+ [(match_operand:SI 0 "register_operand")
+ (match_operand:MODEF 1 "cmp_fp_expander_operand")
+ (match_operand:MODEF 2 "cmp_fp_expander_operand")]
+ "(TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH))
+ && (TARGET_CMOVE || (TARGET_SAHF && TARGET_USE_SAHF))"
+{
+ ix86_expand_fp_spaceship (operands[0], operands[1], operands[2]);
+ DONE;
+})
+
+(define_expand "spaceshipxf3"
+ [(match_operand:SI 0 "register_operand")
+ (match_operand:XF 1 "nonmemory_operand")
+ (match_operand:XF 2 "nonmemory_operand")]
+ "TARGET_80387 && (TARGET_CMOVE || (TARGET_SAHF && TARGET_USE_SAHF))"
+{
+ ix86_expand_fp_spaceship (operands[0], operands[1], operands[2]);
+ DONE;
+})
+
(include "mmx.md")
(include "sse.md")
(include "sync.md")