aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJakub Jelinek <jakub@redhat.com>2025-01-27 10:22:28 +0100
committerJakub Jelinek <jakub@gcc.gnu.org>2025-01-27 10:22:28 +0100
commit92a5c5100c25190622ca86b63586a598952546bf (patch)
tree5c9f7f02578ba68b4dd2ba2fe6af12d6b9214283
parentd8a7f07f7cf008e359dad631aaae1028776b9e18 (diff)
downloadgcc-92a5c5100c25190622ca86b63586a598952546bf.zip
gcc-92a5c5100c25190622ca86b63586a598952546bf.tar.gz
gcc-92a5c5100c25190622ca86b63586a598952546bf.tar.bz2
match.pd: Canonicalize unsigned division by power of two into right shift [PR118637]
We already do this canonicalization in simplify_using_ranges::simplify_div_or_mod_using_ranges, but that means that it is not done at -O1 or when vrp is otherwise disabled, and that it can be done too late in some cases when e.g. the r8-2064 "X / C1 op C2 into a simple range test." optimization triggers first. Note, for unsigned modulo we already have (simplify (mod @0 (convert? (power_of_two_cand@1 @2))) (if ((TYPE_UNSIGNED (type) || tree_expr_nonnegative_p (@0)) ... optimization which duplicates what simplify_using_ranges::simplify_div_or_mod_using_ranges does in case ranges aren't needed. For GCC 16 I think we should improve the niters pattern recognition and handle even what r8-2064 comes with, after all as I've tried to show in the PR the user could have written it that way. I've guarded this optimization on #if GIMPLE just in case this would stand in any way to the various divmult etc. simplification, guess that can be lifted for GCC 16 too. In the modulo case we also handle unsigned % (power_of_two << n), but not really sure if we could do that for the division, because unsigned / (power_of_two << n) is not simple unsigned >> (log2 (power_of_two) + n), one can shift the bit out and then it becomes just 0. 2025-01-27 Jakub Jelinek <jakub@redhat.com> PR tree-optimization/118637 * match.pd: Canonicalize unsigned division by power of two to right shift. * gcc.dg/tree-ssa/pr118637.c: New test.
-rw-r--r--gcc/match.pd9
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/pr118637.c22
2 files changed, 31 insertions, 0 deletions
diff --git a/gcc/match.pd b/gcc/match.pd
index efc82d7..6991868 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -965,6 +965,15 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
(bit_and @0 (negate @1))))
(for div (trunc_div ceil_div floor_div round_div exact_div)
+#if GIMPLE
+ /* Canonicalize unsigned t / 4 to t >> 2. */
+ (simplify
+ (div @0 integer_pow2p@1)
+ (if (INTEGRAL_TYPE_P (type)
+ && (TYPE_UNSIGNED (type) || tree_expr_nonnegative_p (@0)))
+ (rshift @0 { build_int_cst (integer_type_node,
+ wi::exact_log2 (wi::to_wide (@1))); })))
+#endif
/* Simplify (t * u) / u -> t. */
(simplify
(div (mult:c @0 @1) @1)
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr118637.c b/gcc/testsuite/gcc.dg/tree-ssa/pr118637.c
new file mode 100644
index 0000000..d612b1d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr118637.c
@@ -0,0 +1,22 @@
+/* PR tree-optimization/118637 */
+/* { dg-do compile { target clz } } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump-times "__builtin_clz|\\.CLZ" 2 "optimized" } } */
+
+__attribute__((noipa)) unsigned
+foo (unsigned x)
+{
+ unsigned result = 0;
+ while (x /= 2)
+ ++result;
+ return result;
+}
+
+__attribute__((noipa)) unsigned
+bar (unsigned x)
+{
+ unsigned result = 0;
+ while (x >>= 1)
+ ++result;
+ return result;
+}