aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorJakub Jelinek <jakub@redhat.com>2024-01-05 11:16:58 +0100
committerJakub Jelinek <jakub@redhat.com>2024-01-05 11:16:58 +0100
commit0152637c74c9eab7658483b1cca4e3d584dd4262 (patch)
tree682cf53b5033c7ec1e8ceaa36aec06f0f9870d81 /gcc
parent397bdd1ac59a556a9619c2b2bf203f88ff5e97c2 (diff)
downloadgcc-0152637c74c9eab7658483b1cca4e3d584dd4262.zip
gcc-0152637c74c9eab7658483b1cca4e3d584dd4262.tar.gz
gcc-0152637c74c9eab7658483b1cca4e3d584dd4262.tar.bz2
Improve __builtin_popcount* (x) == 1 generation if x is known != 0 [PR90693]
We expand __builtin_popcount* (x) == 1 as x ^ (x - 1) > x - 1, either unconditionally in tree-ssa-math-opts.cc if we don't have a direct optab support for popcount, or during expansion where we compare the costs of comparison of the popcount against one vs. the above expression. As mentioned in the PR, if we know from ranger that the argument is not zero, we can emit x & (x - 1) == 0 test which is same number of GIMPLE statements, but on many targets cheaper (e.g. whenever an AND instruction can also set flags on whether result was zero or not). The following patch does that. 2024-01-05 Jakub Jelinek <jakub@redhat.com> PR tree-optimization/90693 * tree-ssa-math-opts.cc (match_single_bit_test): If tree_expr_nonzero_p (arg), remember it in the second argument to IFN_POPCOUNT or lower it as arg & (arg - 1) == 0 rather than arg ^ (arg - 1) > arg - 1. * internal-fn.cc (expand_POPCOUNT): If second argument to IFN_POPCOUNT suggests arg is non-zero, try to expand it as arg & (arg - 1) == 0 rather than arg ^ (arg - 1) > arg - 1. * gcc.target/i386/pr90693-2.c: New test.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/internal-fn.cc15
-rw-r--r--gcc/testsuite/gcc.target/i386/pr90693-2.c33
-rw-r--r--gcc/tree-ssa-math-opts.cc20
3 files changed, 60 insertions, 8 deletions
diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
index 80efd2f..a07f25f 100644
--- a/gcc/internal-fn.cc
+++ b/gcc/internal-fn.cc
@@ -5118,10 +5118,13 @@ expand_POPCOUNT (internal_fn fn, gcall *stmt)
/* If .POPCOUNT call has 2 arguments, match_single_bit_test marked it
because the result is only used in an equality comparison against 1.
Use rtx costs in that case to determine if .POPCOUNT (arg) == 1
- or (arg ^ (arg - 1)) > arg - 1 is cheaper. */
+ or (arg ^ (arg - 1)) > arg - 1 is cheaper.
+ If .POPCOUNT second argument is 0, we additionally know that arg
+ is non-zero, so use arg & (arg - 1) == 0 instead. */
bool speed_p = optimize_insn_for_speed_p ();
tree lhs = gimple_call_lhs (stmt);
tree arg = gimple_call_arg (stmt, 0);
+ bool nonzero_arg = integer_zerop (gimple_call_arg (stmt, 1));
tree type = TREE_TYPE (arg);
machine_mode mode = TYPE_MODE (type);
do_pending_stack_adjust ();
@@ -5147,11 +5150,15 @@ expand_POPCOUNT (internal_fn fn, gcall *stmt)
1, OPTAB_DIRECT);
if (argm1 == NULL_RTX)
goto fail;
- rtx argxorargm1 = expand_simple_binop (mode, XOR, op0, argm1, NULL_RTX,
- 1, OPTAB_DIRECT);
+ rtx argxorargm1 = expand_simple_binop (mode, nonzero_arg ? AND : XOR, op0,
+ argm1, NULL_RTX, 1, OPTAB_DIRECT);
if (argxorargm1 == NULL_RTX)
goto fail;
- rtx cmp = emit_store_flag (NULL_RTX, GTU, argxorargm1, argm1, mode, 1, 1);
+ rtx cmp;
+ if (nonzero_arg)
+ cmp = emit_store_flag (NULL_RTX, EQ, argxorargm1, const0_rtx, mode, 1, 1);
+ else
+ cmp = emit_store_flag (NULL_RTX, GTU, argxorargm1, argm1, mode, 1, 1);
if (cmp == NULL_RTX)
goto fail;
rtx_insn *cmp_insns = get_insns ();
diff --git a/gcc/testsuite/gcc.target/i386/pr90693-2.c b/gcc/testsuite/gcc.target/i386/pr90693-2.c
new file mode 100644
index 0000000..3cfeccb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr90693-2.c
@@ -0,0 +1,33 @@
+/* PR tree-optimization/90693 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-abm -mno-popcnt -fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump-not "POPCOUNT \\\(" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "__builtin_popcount(ll)? \\\(" "optimized" } } */
+
+int
+foo (unsigned int x)
+{
+ if (x == 0)
+ __builtin_unreachable ();
+ return __builtin_popcount (x) == 1;
+}
+
+int
+bar (unsigned int x)
+{
+ return (x & (x - 1)) == 0;
+}
+
+int
+baz (unsigned long long x)
+{
+ if (x == 0)
+ __builtin_unreachable ();
+ return __builtin_popcountll (x) == 1;
+}
+
+int
+qux (unsigned long long x)
+{
+ return (x & (x - 1)) == 0;
+}
diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
index 05d3b92..2db26e4 100644
--- a/gcc/tree-ssa-math-opts.cc
+++ b/gcc/tree-ssa-math-opts.cc
@@ -5194,12 +5194,14 @@ match_single_bit_test (gimple_stmt_iterator *gsi, gimple *stmt)
tree type = TREE_TYPE (arg);
if (!INTEGRAL_TYPE_P (type))
return;
+ bool nonzero_arg = tree_expr_nonzero_p (arg);
if (direct_internal_fn_supported_p (IFN_POPCOUNT, type, OPTIMIZE_FOR_BOTH))
{
/* Tell expand_POPCOUNT the popcount result is only used in equality
comparison with one, so that it can decide based on rtx costs. */
gimple *g = gimple_build_call_internal (IFN_POPCOUNT, 2, arg,
- integer_one_node);
+ nonzero_arg ? integer_zero_node
+ : integer_one_node);
gimple_call_set_lhs (g, gimple_call_lhs (call));
gimple_stmt_iterator gsi2 = gsi_for_stmt (call);
gsi_replace (&gsi2, g, true);
@@ -5209,19 +5211,29 @@ match_single_bit_test (gimple_stmt_iterator *gsi, gimple *stmt)
gimple *g = gimple_build_assign (argm1, PLUS_EXPR, arg,
build_int_cst (type, -1));
gsi_insert_before (gsi, g, GSI_SAME_STMT);
- g = gimple_build_assign (make_ssa_name (type), BIT_XOR_EXPR, arg, argm1);
+ g = gimple_build_assign (make_ssa_name (type),
+ nonzero_arg ? BIT_AND_EXPR : BIT_XOR_EXPR,
+ arg, argm1);
gsi_insert_before (gsi, g, GSI_SAME_STMT);
+ tree_code cmpcode;
+ if (nonzero_arg)
+ {
+ argm1 = build_zero_cst (type);
+ cmpcode = code;
+ }
+ else
+ cmpcode = code == EQ_EXPR ? GT_EXPR : LE_EXPR;
if (gcond *cond = dyn_cast <gcond *> (stmt))
{
gimple_cond_set_lhs (cond, gimple_assign_lhs (g));
gimple_cond_set_rhs (cond, argm1);
- gimple_cond_set_code (cond, code == EQ_EXPR ? GT_EXPR : LE_EXPR);
+ gimple_cond_set_code (cond, cmpcode);
}
else
{
gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (g));
gimple_assign_set_rhs2 (stmt, argm1);
- gimple_assign_set_rhs_code (stmt, code == EQ_EXPR ? GT_EXPR : LE_EXPR);
+ gimple_assign_set_rhs_code (stmt, cmpcode);
}
update_stmt (stmt);
gimple_stmt_iterator gsi2 = gsi_for_stmt (call);