diff options
author | Jakub Jelinek <jakub@redhat.com> | 2022-01-17 13:39:05 +0100 |
---|---|---|
committer | Jakub Jelinek <jakub@redhat.com> | 2022-01-17 13:39:05 +0100 |
commit | 463d9108766dcbb6a1051985e6c840a46897fe10 (patch) | |
tree | 5e83cb1ed249317106bf8059c1eb133fdf0cb84f /gcc/tree-ssa-math-opts.c | |
parent | 4152e4ad3f3a67ce30f5e0e01d5eba03fcff10b8 (diff) | |
download | gcc-463d9108766dcbb6a1051985e6c840a46897fe10.zip gcc-463d9108766dcbb6a1051985e6c840a46897fe10.tar.gz gcc-463d9108766dcbb6a1051985e6c840a46897fe10.tar.bz2 |
widening_mul, i386: Improve spaceship expansion on x86 [PR103973]
C++20:
#include <compare>
auto cmp4way(double a, double b)
{
return a <=> b;
}
expands to:
ucomisd %xmm1, %xmm0
jp .L8
movl $0, %eax
jne .L8
.L2:
ret
.p2align 4,,10
.p2align 3
.L8:
comisd %xmm0, %xmm1
movl $-1, %eax
ja .L2
ucomisd %xmm1, %xmm0
setbe %al
addl $1, %eax
ret
That is 3 comparisons of the same operands.
The following patch improves it to just one comparison:
comisd %xmm1, %xmm0
jp .L4
seta %al
movl $0, %edx
leal -1(%rax,%rax), %eax
cmove %edx, %eax
ret
.L4:
movl $2, %eax
ret
While a <=> b expands to a == b ? 0 : a < b ? -1 : a > b ? 1 : 2
where the first comparison is equality and this shouldn't raise
exceptions on qNaN operands, if the operands aren't equal (which
includes unordered cases), then it immediately performs < or >
comparison and that raises exceptions even on qNaNs, so we can just
perform a single comparison that raises exceptions on qNaN.
As the 4 different cases are encoded as
ZF CF PF
1 1 1 a unordered b
0 0 0 a > b
0 1 0 a < b
1 0 0 a == b
we can emit optimal sequence of comparions, first jp
for the unordered case, then je for the == case and finally jb
for the < case.
The patch pattern recognizes spaceship-like comparisons during
widening_mul if the spaceship optab is implemented, and replaces
those comparisons with comparisons of .SPACESHIP ifn which returns
-1/0/1/2 based on the comparison. This seems to work well both for the
case of just returning the -1/0/1/2 (when we have just a common
successor with a PHI) or when the different cases are handled with
various other basic blocks. The testcases cover both of those cases,
the latter with different function calls in those.
2022-01-17 Jakub Jelinek <jakub@redhat.com>
PR target/103973
* tree-cfg.h (cond_only_block_p): Declare.
* tree-ssa-phiopt.c (cond_only_block_p): Move function to ...
* tree-cfg.c (cond_only_block_p): ... here. No longer static.
* optabs.def (spaceship_optab): New optab.
* internal-fn.def (SPACESHIP): New internal function.
* internal-fn.h (expand_SPACESHIP): Declare.
* internal-fn.c (expand_PHI): Formatting fix.
(expand_SPACESHIP): New function.
* tree-ssa-math-opts.c (optimize_spaceship): New function.
(math_opts_dom_walker::after_dom_children): Use it.
* config/i386/i386.md (spaceship<mode>3): New define_expand.
* config/i386/i386-protos.h (ix86_expand_fp_spaceship): Declare.
* config/i386/i386-expand.c (ix86_expand_fp_spaceship): New function.
* doc/md.texi (spaceship@var{m}3): Document.
* gcc.target/i386/pr103973-1.c: New test.
* gcc.target/i386/pr103973-2.c: New test.
* gcc.target/i386/pr103973-3.c: New test.
* gcc.target/i386/pr103973-4.c: New test.
* gcc.target/i386/pr103973-5.c: New test.
* gcc.target/i386/pr103973-6.c: New test.
* gcc.target/i386/pr103973-7.c: New test.
* gcc.target/i386/pr103973-8.c: New test.
* gcc.target/i386/pr103973-9.c: New test.
* gcc.target/i386/pr103973-10.c: New test.
* gcc.target/i386/pr103973-11.c: New test.
* gcc.target/i386/pr103973-12.c: New test.
* gcc.target/i386/pr103973-13.c: New test.
* gcc.target/i386/pr103973-14.c: New test.
* gcc.target/i386/pr103973-15.c: New test.
* gcc.target/i386/pr103973-16.c: New test.
* gcc.target/i386/pr103973-17.c: New test.
* gcc.target/i386/pr103973-18.c: New test.
* gcc.target/i386/pr103973-19.c: New test.
* gcc.target/i386/pr103973-20.c: New test.
* g++.target/i386/pr103973-1.C: New test.
* g++.target/i386/pr103973-2.C: New test.
* g++.target/i386/pr103973-3.C: New test.
* g++.target/i386/pr103973-4.C: New test.
* g++.target/i386/pr103973-5.C: New test.
* g++.target/i386/pr103973-6.C: New test.
* g++.target/i386/pr103973-7.C: New test.
* g++.target/i386/pr103973-8.C: New test.
* g++.target/i386/pr103973-9.C: New test.
* g++.target/i386/pr103973-10.C: New test.
* g++.target/i386/pr103973-11.C: New test.
* g++.target/i386/pr103973-12.C: New test.
* g++.target/i386/pr103973-13.C: New test.
* g++.target/i386/pr103973-14.C: New test.
* g++.target/i386/pr103973-15.C: New test.
* g++.target/i386/pr103973-16.C: New test.
* g++.target/i386/pr103973-17.C: New test.
* g++.target/i386/pr103973-18.C: New test.
* g++.target/i386/pr103973-19.C: New test.
* g++.target/i386/pr103973-20.C: New test.
Diffstat (limited to 'gcc/tree-ssa-math-opts.c')
-rw-r--r-- | gcc/tree-ssa-math-opts.c | 223 |
1 files changed, 223 insertions, 0 deletions
diff --git a/gcc/tree-ssa-math-opts.c b/gcc/tree-ssa-math-opts.c index 1b6a57b..e3c3bd8 100644 --- a/gcc/tree-ssa-math-opts.c +++ b/gcc/tree-ssa-math-opts.c @@ -4637,6 +4637,227 @@ convert_mult_to_highpart (gassign *stmt, gimple_stmt_iterator *gsi) return true; } +/* If target has spaceship<MODE>3 expander, pattern recognize + <bb 2> [local count: 1073741824]: + if (a_2(D) == b_3(D)) + goto <bb 6>; [34.00%] + else + goto <bb 3>; [66.00%] + + <bb 3> [local count: 708669601]: + if (a_2(D) < b_3(D)) + goto <bb 6>; [1.04%] + else + goto <bb 4>; [98.96%] + + <bb 4> [local count: 701299439]: + if (a_2(D) > b_3(D)) + goto <bb 5>; [48.89%] + else + goto <bb 6>; [51.11%] + + <bb 5> [local count: 342865295]: + + <bb 6> [local count: 1073741824]: + and turn it into: + <bb 2> [local count: 1073741824]: + _1 = .SPACESHIP (a_2(D), b_3(D)); + if (_1 == 0) + goto <bb 6>; [34.00%] + else + goto <bb 3>; [66.00%] + + <bb 3> [local count: 708669601]: + if (_1 == -1) + goto <bb 6>; [1.04%] + else + goto <bb 4>; [98.96%] + + <bb 4> [local count: 701299439]: + if (_1 == 1) + goto <bb 5>; [48.89%] + else + goto <bb 6>; [51.11%] + + <bb 5> [local count: 342865295]: + + <bb 6> [local count: 1073741824]: + so that the backend can emit optimal comparison and + conditional jump sequence. */ + +static void +optimize_spaceship (gimple *stmt) +{ + enum tree_code code = gimple_cond_code (stmt); + if (code != EQ_EXPR && code != NE_EXPR) + return; + tree arg1 = gimple_cond_lhs (stmt); + tree arg2 = gimple_cond_rhs (stmt); + if (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (arg1)) + || optab_handler (spaceship_optab, + TYPE_MODE (TREE_TYPE (arg1))) == CODE_FOR_nothing + || operand_equal_p (arg1, arg2, 0)) + return; + + basic_block bb0 = gimple_bb (stmt), bb1, bb2 = NULL; + edge em1 = NULL, e1 = NULL, e2 = NULL; + bb1 = EDGE_SUCC (bb0, 1)->dest; + if (((EDGE_SUCC (bb0, 0)->flags & EDGE_TRUE_VALUE) != 0) ^ (code == EQ_EXPR)) + bb1 = EDGE_SUCC (bb0, 0)->dest; + + gimple *g = last_stmt (bb1); + if (g == NULL + || gimple_code (g) != GIMPLE_COND + || !single_pred_p (bb1) + || (operand_equal_p (gimple_cond_lhs (g), arg1, 0) + ? !operand_equal_p (gimple_cond_rhs (g), arg2, 0) + : (!operand_equal_p (gimple_cond_lhs (g), arg2, 0) + || !operand_equal_p (gimple_cond_rhs (g), arg1, 0))) + || !cond_only_block_p (bb1)) + return; + + enum tree_code ccode = (operand_equal_p (gimple_cond_lhs (g), arg1, 0) + ? LT_EXPR : GT_EXPR); + switch (gimple_cond_code (g)) + { + case LT_EXPR: + case LE_EXPR: + break; + case GT_EXPR: + case GE_EXPR: + ccode = ccode == LT_EXPR ? GT_EXPR : LT_EXPR; + break; + default: + return; + } + + for (int i = 0; i < 2; ++i) + { + /* With NaNs, </<=/>/>= are false, so we need to look for the + third comparison on the false edge from whatever non-equality + comparison the second comparison is. */ + if (HONOR_NANS (TREE_TYPE (arg1)) + && (EDGE_SUCC (bb1, i)->flags & EDGE_TRUE_VALUE) != 0) + continue; + + bb2 = EDGE_SUCC (bb1, i)->dest; + g = last_stmt (bb2); + if (g == NULL + || gimple_code (g) != GIMPLE_COND + || !single_pred_p (bb2) + || (operand_equal_p (gimple_cond_lhs (g), arg1, 0) + ? !operand_equal_p (gimple_cond_rhs (g), arg2, 0) + : (!operand_equal_p (gimple_cond_lhs (g), arg2, 0) + || !operand_equal_p (gimple_cond_rhs (g), arg1, 0))) + || !cond_only_block_p (bb2) + || EDGE_SUCC (bb2, 0)->dest == EDGE_SUCC (bb2, 1)->dest) + continue; + + enum tree_code ccode2 + = (operand_equal_p (gimple_cond_lhs (g), arg1, 0) ? LT_EXPR : GT_EXPR); + switch (gimple_cond_code (g)) + { + case LT_EXPR: + case LE_EXPR: + break; + case GT_EXPR: + case GE_EXPR: + ccode2 = ccode2 == LT_EXPR ? GT_EXPR : LT_EXPR; + break; + default: + continue; + } + if (HONOR_NANS (TREE_TYPE (arg1)) && ccode == ccode2) + continue; + + if ((ccode == LT_EXPR) + ^ ((EDGE_SUCC (bb1, i)->flags & EDGE_TRUE_VALUE) != 0)) + { + em1 = EDGE_SUCC (bb1, 1 - i); + e1 = EDGE_SUCC (bb2, 0); + e2 = EDGE_SUCC (bb2, 1); + if ((ccode2 == LT_EXPR) ^ ((e1->flags & EDGE_TRUE_VALUE) == 0)) + std::swap (e1, e2); + } + else + { + e1 = EDGE_SUCC (bb1, 1 - i); + em1 = EDGE_SUCC (bb2, 0); + e2 = EDGE_SUCC (bb2, 1); + if ((ccode2 != LT_EXPR) ^ ((em1->flags & EDGE_TRUE_VALUE) == 0)) + std::swap (em1, e2); + } + break; + } + + if (em1 == NULL) + { + if ((ccode == LT_EXPR) + ^ ((EDGE_SUCC (bb1, 0)->flags & EDGE_TRUE_VALUE) != 0)) + { + em1 = EDGE_SUCC (bb1, 1); + e1 = EDGE_SUCC (bb1, 0); + e2 = (e1->flags & EDGE_TRUE_VALUE) ? em1 : e1; + } + else + { + em1 = EDGE_SUCC (bb1, 0); + e1 = EDGE_SUCC (bb1, 1); + e2 = (e1->flags & EDGE_TRUE_VALUE) ? em1 : e1; + } + } + + g = gimple_build_call_internal (IFN_SPACESHIP, 2, arg1, arg2); + tree lhs = make_ssa_name (integer_type_node); + gimple_call_set_lhs (g, lhs); + gimple_stmt_iterator gsi = gsi_for_stmt (stmt); + gsi_insert_before (&gsi, g, GSI_SAME_STMT); + + gcond *cond = as_a <gcond *> (stmt); + gimple_cond_set_lhs (cond, lhs); + gimple_cond_set_rhs (cond, integer_zero_node); + update_stmt (stmt); + + g = last_stmt (bb1); + cond = as_a <gcond *> (g); + gimple_cond_set_lhs (cond, lhs); + if (em1->src == bb1 && e2 != em1) + { + gimple_cond_set_rhs (cond, integer_minus_one_node); + gimple_cond_set_code (cond, (em1->flags & EDGE_TRUE_VALUE) + ? EQ_EXPR : NE_EXPR); + } + else + { + gcc_assert (e1->src == bb1 && e2 != e1); + gimple_cond_set_rhs (cond, integer_one_node); + gimple_cond_set_code (cond, (e1->flags & EDGE_TRUE_VALUE) + ? EQ_EXPR : NE_EXPR); + } + update_stmt (g); + + if (e2 != e1 && e2 != em1) + { + g = last_stmt (bb2); + cond = as_a <gcond *> (g); + gimple_cond_set_lhs (cond, lhs); + if (em1->src == bb2) + gimple_cond_set_rhs (cond, integer_minus_one_node); + else + { + gcc_assert (e1->src == bb2); + gimple_cond_set_rhs (cond, integer_one_node); + } + gimple_cond_set_code (cond, + (e2->flags & EDGE_TRUE_VALUE) ? NE_EXPR : EQ_EXPR); + update_stmt (g); + } + + wide_int wm1 = wi::minus_one (TYPE_PRECISION (integer_type_node)); + wide_int w2 = wi::two (TYPE_PRECISION (integer_type_node)); + set_range_info (lhs, VR_RANGE, wm1, w2); +} + /* Find integer multiplications where the operands are extended from smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR @@ -4798,6 +5019,8 @@ math_opts_dom_walker::after_dom_children (basic_block bb) break; } } + else if (gimple_code (stmt) == GIMPLE_COND) + optimize_spaceship (stmt); gsi_next (&gsi); } if (fma_state.m_deferring_p |