aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJakub Jelinek <jakub@redhat.com>2021-01-04 14:36:06 +0100
committerJakub Jelinek <jakub@redhat.com>2021-01-04 14:36:06 +0100
commit24cd9afe617a39801d190418cf3fbab3bc3742a7 (patch)
treebf580b45d997d0ebbe977b7a323cb5e64c945cb2
parent39bd65faee3bafe2dc067e5fedb5079896551a8a (diff)
downloadgcc-24cd9afe617a39801d190418cf3fbab3bc3742a7.zip
gcc-24cd9afe617a39801d190418cf3fbab3bc3742a7.tar.gz
gcc-24cd9afe617a39801d190418cf3fbab3bc3742a7.tar.bz2
loop-niter: Recognize popcount idioms even with char, short and __int128 [PR95771]
As the testcase shows, we punt unnecessarily on popcount loop idioms if the type is smaller than int or larger than long long. Smaller type than int can be handled by zero-extending the argument to unsigned int, and types twice as long as long long by doing __builtin_popcountll on both halves of the __int128. 2020-01-04 Jakub Jelinek <jakub@redhat.com> PR tree-optimization/95771 * tree-ssa-loop-niter.c (number_of_iterations_popcount): Handle types with precision smaller than int's precision and types with precision twice as large as long long. Formatting fixes. * gcc.target/i386/pr95771.c: New test.
-rw-r--r--gcc/testsuite/gcc.target/i386/pr95771.c67
-rw-r--r--gcc/tree-ssa-loop-niter.c45
2 files changed, 98 insertions, 14 deletions
diff --git a/gcc/testsuite/gcc.target/i386/pr95771.c b/gcc/testsuite/gcc.target/i386/pr95771.c
new file mode 100644
index 0000000..d7b67017
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr95771.c
@@ -0,0 +1,67 @@
+/* PR tree-optimization/95771 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mpopcnt -fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump-times " = __builtin_popcount" 6 "optimized" { target int128 } } } */
+/* { dg-final { scan-tree-dump-times " = __builtin_popcount" 4 "optimized" { target { ! int128 } } } } */
+
+int
+foo (unsigned char x)
+{
+ int i = 0;
+ while (x)
+ {
+ x &= x - 1;
+ ++i;
+ }
+ return i;
+}
+
+int
+bar (unsigned short x)
+{
+ int i = 0;
+ while (x)
+ {
+ x &= x - 1;
+ ++i;
+ }
+ return i;
+}
+
+int
+baz (unsigned int x)
+{
+ int i = 0;
+ while (x)
+ {
+ x &= x - 1;
+ ++i;
+ }
+ return i;
+}
+
+int
+qux (unsigned long long x)
+{
+ int i = 0;
+ while (x)
+ {
+ x &= x - 1;
+ ++i;
+ }
+ return i;
+}
+
+#ifdef __SIZEOF_INT128__
+int
+corge (unsigned __int128 x)
+{
+ int i = 0;
+ while (x)
+ {
+ x &= x - 1;
+ ++i;
+ }
+ return i;
+}
+#endif
diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c
index 055aec5..98978bc 100644
--- a/gcc/tree-ssa-loop-niter.c
+++ b/gcc/tree-ssa-loop-niter.c
@@ -2666,27 +2666,45 @@ number_of_iterations_popcount (loop_p loop, edge exit,
/* We found a match. Get the corresponding popcount builtin. */
tree src = gimple_phi_arg_def (phi, loop_preheader_edge (loop)->dest_idx);
- if (TYPE_PRECISION (TREE_TYPE (src)) == TYPE_PRECISION (integer_type_node))
+ if (TYPE_PRECISION (TREE_TYPE (src)) <= TYPE_PRECISION (integer_type_node))
fn = builtin_decl_implicit (BUILT_IN_POPCOUNT);
- else if (TYPE_PRECISION (TREE_TYPE (src)) == TYPE_PRECISION
- (long_integer_type_node))
+ else if (TYPE_PRECISION (TREE_TYPE (src))
+ == TYPE_PRECISION (long_integer_type_node))
fn = builtin_decl_implicit (BUILT_IN_POPCOUNTL);
- else if (TYPE_PRECISION (TREE_TYPE (src)) == TYPE_PRECISION
- (long_long_integer_type_node))
+ else if (TYPE_PRECISION (TREE_TYPE (src))
+ == TYPE_PRECISION (long_long_integer_type_node)
+ || (TYPE_PRECISION (TREE_TYPE (src))
+ == 2 * TYPE_PRECISION (long_long_integer_type_node)))
fn = builtin_decl_implicit (BUILT_IN_POPCOUNTLL);
- /* ??? Support promoting char/short to int. */
if (!fn)
return false;
/* Update NITER params accordingly */
tree utype = unsigned_type_for (TREE_TYPE (src));
src = fold_convert (utype, src);
- tree call = fold_convert (utype, build_call_expr (fn, 1, src));
+ if (TYPE_PRECISION (TREE_TYPE (src)) < TYPE_PRECISION (integer_type_node))
+ src = fold_convert (unsigned_type_node, src);
+ tree call;
+ if (TYPE_PRECISION (TREE_TYPE (src))
+ == 2 * TYPE_PRECISION (long_long_integer_type_node))
+ {
+ int prec = TYPE_PRECISION (long_long_integer_type_node);
+ tree src1 = fold_convert (long_long_unsigned_type_node,
+ fold_build2 (RSHIFT_EXPR, TREE_TYPE (src),
+ unshare_expr (src),
+ build_int_cst (integer_type_node,
+ prec)));
+ tree src2 = fold_convert (long_long_unsigned_type_node, src);
+ call = build_call_expr (fn, 1, src1);
+ call = fold_build2 (PLUS_EXPR, TREE_TYPE (call), call,
+ build_call_expr (fn, 1, src2));
+ call = fold_convert (utype, call);
+ }
+ else
+ call = fold_convert (utype, build_call_expr (fn, 1, src));
if (adjust)
- iter = fold_build2 (MINUS_EXPR, utype,
- call,
- build_int_cst (utype, 1));
+ iter = fold_build2 (MINUS_EXPR, utype, call, build_int_cst (utype, 1));
else
iter = call;
@@ -2703,10 +2721,9 @@ number_of_iterations_popcount (loop_p loop, edge exit,
if (adjust)
{
tree may_be_zero = fold_build2 (EQ_EXPR, boolean_type_node, src,
- build_zero_cst
- (TREE_TYPE (src)));
- niter->may_be_zero =
- simplify_using_initial_conditions (loop, may_be_zero);
+ build_zero_cst (TREE_TYPE (src)));
+ niter->may_be_zero
+ = simplify_using_initial_conditions (loop, may_be_zero);
}
else
niter->may_be_zero = boolean_false_node;