aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorAndrew Pinski <quic_apinski@quicinc.com>2024-08-29 12:10:44 -0700
committerAndrew Pinski <quic_apinski@quicinc.com>2024-09-09 07:38:55 -0700
commit75a4143d69a842d691c14a477b0ba277d8708fc7 (patch)
tree51b69cdadd5f91ff29d089a700cc9340f76ca6ae /gcc
parent8f3b402b6fca3e4b018e99f65bf22f478e888c16 (diff)
downloadgcc-75a4143d69a842d691c14a477b0ba277d8708fc7.zip
gcc-75a4143d69a842d691c14a477b0ba277d8708fc7.tar.gz
gcc-75a4143d69a842d691c14a477b0ba277d8708fc7.tar.bz2
middle-end: also optimized `popcount(a) <= 1` [PR90693]
This expands on optimizing `popcount(a) == 1` to also handle `popcount(a) <= 1`. `<= 1` can be expanded as `(a & -a) == 0` like what is done for `== 1` if we know that a was nonzero. We have to do the optimization in 2 places due to if we have an optab entry for popcount or not. Built and tested for aarch64-linux-gnu. PR middle-end/90693 gcc/ChangeLog: * internal-fn.cc (expand_POPCOUNT): Handle the second argument being `-1` for `<= 1`. * tree-ssa-math-opts.cc (match_single_bit_test): Handle LE/GT cases. (math_opts_dom_walker::after_dom_children): Call match_single_bit_test for LE_EXPR/GT_EXPR also. gcc/testsuite/ChangeLog: * gcc.target/aarch64/popcnt-le-1.c: New test. * gcc.target/aarch64/popcnt-le-2.c: New test. * gcc.target/aarch64/popcnt-le-3.c: New test. Signed-off-by: Andrew Pinski <quic_apinski@quicinc.com>
Diffstat (limited to 'gcc')
-rw-r--r--gcc/internal-fn.cc20
-rw-r--r--gcc/testsuite/gcc.target/aarch64/popcnt-le-1.c29
-rw-r--r--gcc/testsuite/gcc.target/aarch64/popcnt-le-2.c31
-rw-r--r--gcc/testsuite/gcc.target/aarch64/popcnt-le-3.c31
-rw-r--r--gcc/tree-ssa-math-opts.cc25
5 files changed, 129 insertions, 7 deletions
diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
index 4e33db3..b55f089 100644
--- a/gcc/internal-fn.cc
+++ b/gcc/internal-fn.cc
@@ -5304,11 +5304,16 @@ expand_POPCOUNT (internal_fn fn, gcall *stmt)
Use rtx costs in that case to determine if .POPCOUNT (arg) == 1
or (arg ^ (arg - 1)) > arg - 1 is cheaper.
If .POPCOUNT second argument is 0, we additionally know that arg
- is non-zero, so use arg & (arg - 1) == 0 instead. */
+ is non-zero, so use arg & (arg - 1) == 0 instead.
+ If .POPCOUNT second argument is -1, the comparison was either `<= 1`
+ or `> 1`. */
bool speed_p = optimize_insn_for_speed_p ();
tree lhs = gimple_call_lhs (stmt);
tree arg = gimple_call_arg (stmt, 0);
bool nonzero_arg = integer_zerop (gimple_call_arg (stmt, 1));
+ bool was_le = integer_minus_onep (gimple_call_arg (stmt, 1));
+ if (was_le)
+ nonzero_arg = true;
tree type = TREE_TYPE (arg);
machine_mode mode = TYPE_MODE (type);
machine_mode lhsmode = TYPE_MODE (TREE_TYPE (lhs));
@@ -5360,10 +5365,23 @@ expand_POPCOUNT (internal_fn fn, gcall *stmt)
emit_insn (popcount_insns);
else
{
+ start_sequence ();
emit_insn (cmp_insns);
plhs = expand_normal (lhs);
if (GET_MODE (cmp) != GET_MODE (plhs))
cmp = convert_to_mode (GET_MODE (plhs), cmp, 1);
+ /* For `<= 1`, we need to produce `2 - cmp` or `cmp ? 1 : 2` as that
+ then gets compared against 1 and we need the false case to be 2. */
+ if (was_le)
+ {
+ cmp = expand_simple_binop (GET_MODE (cmp), MINUS, const2_rtx,
+ cmp, NULL_RTX, 1, OPTAB_WIDEN);
+ if (!cmp)
+ goto fail;
+ }
emit_move_insn (plhs, cmp);
+ rtx_insn *all_insns = get_insns ();
+ end_sequence ();
+ emit_insn (all_insns);
}
}
diff --git a/gcc/testsuite/gcc.target/aarch64/popcnt-le-1.c b/gcc/testsuite/gcc.target/aarch64/popcnt-le-1.c
new file mode 100644
index 0000000..b4141da
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/popcnt-le-1.c
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-rtl-expand-details" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+/* PR middle-end/90693 */
+
+#pragma GCC target "+nocssc"
+
+/*
+** le32:
+** sub w([0-9]+), w0, #1
+** tst w0, w\1
+** cset w0, eq
+** ret
+*/
+
+unsigned le32 (const unsigned int a) {
+ return __builtin_popcountg (a) <= 1;
+}
+
+/*
+** gt32:
+** sub w([0-9]+), w0, #1
+** tst w0, w\1
+** cset w0, ne
+** ret
+*/
+unsigned gt32 (const unsigned int a) {
+ return __builtin_popcountg (a) > 1;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/popcnt-le-2.c b/gcc/testsuite/gcc.target/aarch64/popcnt-le-2.c
new file mode 100644
index 0000000..975552c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/popcnt-le-2.c
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mgeneral-regs-only -fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump-not "POPCOUNT \\\(" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "__builtin_popcount \\\(" "optimized" } } */
+/* { dg-final { check-function-bodies "**" "" } } */
+/* PR middle-end/90693 */
+
+#pragma GCC target "+nocssc"
+
+/*
+** le32:
+** sub w([0-9]+), w0, #1
+** tst w\1, w0
+** cset w0, eq
+** ret
+*/
+
+unsigned le32 (const unsigned int a) {
+ return __builtin_popcountg (a) <= 1;
+}
+
+/*
+** gt32:
+** sub w([0-9]+), w0, #1
+** tst w\1, w0
+** cset w0, ne
+** ret
+*/
+unsigned gt32 (const unsigned int a) {
+ return __builtin_popcountg (a) > 1;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/popcnt-le-3.c b/gcc/testsuite/gcc.target/aarch64/popcnt-le-3.c
new file mode 100644
index 0000000..b811e6f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/popcnt-le-3.c
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-rtl-expand-details" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+/* PR middle-end/90693 */
+
+#pragma GCC target "+nocssc"
+
+/*
+** le16:
+** sub w([0-9]+), w0, #1
+** and w([0-9]+), w0, w\1
+** tst w\2, 65535
+** cset w0, eq
+** ret
+*/
+
+unsigned le16 (const unsigned short a) {
+ return __builtin_popcountg (a) <= 1;
+}
+
+/*
+** gt16:
+** sub w([0-9]+), w0, #1
+** and w([0-9]+), w0, w\1
+** tst w\2, 65535
+** cset w0, ne
+** ret
+*/
+unsigned gt16 (const unsigned short a) {
+ return __builtin_popcountg (a) > 1;
+}
diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
index 3c93fca..d61668a 100644
--- a/gcc/tree-ssa-math-opts.cc
+++ b/gcc/tree-ssa-math-opts.cc
@@ -5433,13 +5433,15 @@ match_uaddc_usubc (gimple_stmt_iterator *gsi, gimple *stmt, tree_code code)
/* Replace .POPCOUNT (x) == 1 or .POPCOUNT (x) != 1 with
(x & (x - 1)) > x - 1 or (x & (x - 1)) <= x - 1 if .POPCOUNT
- isn't a direct optab. */
+ isn't a direct optab. Also handle `<=`/`>` to be
+ `x & (x - 1) !=/== x`. */
static void
match_single_bit_test (gimple_stmt_iterator *gsi, gimple *stmt)
{
tree clhs, crhs;
enum tree_code code;
+ bool was_le = false;
if (gimple_code (stmt) == GIMPLE_COND)
{
clhs = gimple_cond_lhs (stmt);
@@ -5452,8 +5454,11 @@ match_single_bit_test (gimple_stmt_iterator *gsi, gimple *stmt)
crhs = gimple_assign_rhs2 (stmt);
code = gimple_assign_rhs_code (stmt);
}
- if (code != EQ_EXPR && code != NE_EXPR)
+ if (code != LE_EXPR && code != GT_EXPR
+ && code != EQ_EXPR && code != NE_EXPR)
return;
+ if (code == LE_EXPR || code == GT_EXPR)
+ was_le = true;
if (TREE_CODE (clhs) != SSA_NAME || !integer_onep (crhs))
return;
gimple *call = SSA_NAME_DEF_STMT (clhs);
@@ -5477,8 +5482,9 @@ match_single_bit_test (gimple_stmt_iterator *gsi, gimple *stmt)
/* Tell expand_POPCOUNT the popcount result is only used in equality
comparison with one, so that it can decide based on rtx costs. */
gimple *g = gimple_build_call_internal (IFN_POPCOUNT, 2, arg,
- nonzero_arg ? integer_zero_node
- : integer_one_node);
+ was_le ? integer_minus_one_node
+ : (nonzero_arg ? integer_zero_node
+ : integer_one_node));
gimple_call_set_lhs (g, gimple_call_lhs (call));
gimple_stmt_iterator gsi2 = gsi_for_stmt (call);
gsi_replace (&gsi2, g, true);
@@ -5489,11 +5495,16 @@ match_single_bit_test (gimple_stmt_iterator *gsi, gimple *stmt)
build_int_cst (type, -1));
gsi_insert_before (gsi, g, GSI_SAME_STMT);
g = gimple_build_assign (make_ssa_name (type),
- nonzero_arg ? BIT_AND_EXPR : BIT_XOR_EXPR,
+ (nonzero_arg || was_le) ? BIT_AND_EXPR : BIT_XOR_EXPR,
arg, argm1);
gsi_insert_before (gsi, g, GSI_SAME_STMT);
tree_code cmpcode;
- if (nonzero_arg)
+ if (was_le)
+ {
+ argm1 = build_zero_cst (type);
+ cmpcode = code == LE_EXPR ? EQ_EXPR : NE_EXPR;
+ }
+ else if (nonzero_arg)
{
argm1 = build_zero_cst (type);
cmpcode = code;
@@ -6188,6 +6199,8 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
case EQ_EXPR:
case NE_EXPR:
+ case LE_EXPR:
+ case GT_EXPR:
match_single_bit_test (&gsi, stmt);
break;