aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorRichard Sandiford <richard.sandiford@arm.com>2024-10-24 14:22:33 +0100
committerRichard Sandiford <richard.sandiford@arm.com>2024-10-24 14:22:33 +0100
commitaf19e46c88fd75e31127cde239b8f28d8f9c4040 (patch)
treec4d249e8c6d45031aacf92945a07cc5d8501abd8 /gcc
parentd6c4badffafa295f6082b7d74de314e131f30a96 (diff)
downloadgcc-af19e46c88fd75e31127cde239b8f28d8f9c4040.zip
gcc-af19e46c88fd75e31127cde239b8f28d8f9c4040.tar.gz
gcc-af19e46c88fd75e31127cde239b8f28d8f9c4040.tar.bz2
Try to simplify (X >> C1) * (C2 << C1) -> X * C2
This patch adds a rule to simplify (X >> C1) * (C2 << C1) -> X * C2 when the low C1 bits of X are known to be zero. As with the earlier X >> C1 << (C2 + C1) patch, any single conversion is allowed between the shift and the multiplication. gcc/ * match.pd: Simplify (X >> C1) * (C2 << C1) -> X * C2 if the low C1 bits of X are zero. gcc/testsuite/ * gcc.dg/tree-ssa/shifts-3.c: New test. * gcc.dg/tree-ssa/shifts-4.c: Likewise. * gcc.target/aarch64/sve/cnt_fold_5.c: Likewise.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/match.pd13
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/shifts-3.c65
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/shifts-4.c23
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/cnt_fold_5.c38
4 files changed, 139 insertions, 0 deletions
diff --git a/gcc/match.pd b/gcc/match.pd
index 391c60b..148d0bc 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -4919,6 +4919,19 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
&& wi::to_widest (@2) >= wi::to_widest (@1)
&& wi::to_widest (@1) <= wi::ctz (get_nonzero_bits (@0)))
(lshift (convert @0) (minus @2 @1))))
+
+/* (X >> C1) * (C2 << C1) -> X * C2 if the low C1 bits of X are zero. */
+(simplify
+ (mult (convert? (rshift (with_possible_nonzero_bits2 @0) INTEGER_CST@1))
+ poly_int_tree_p@2)
+ (with { poly_widest_int factor; }
+ (if (INTEGRAL_TYPE_P (type)
+ && wi::ltu_p (wi::to_wide (@1), element_precision (type))
+ && wi::to_widest (@1) <= wi::ctz (get_nonzero_bits (@0))
+ && multiple_p (wi::to_poly_widest (@2),
+ widest_int (1) << tree_to_uhwi (@1),
+ &factor))
+ (mult (convert @0) { wide_int_to_tree (type, factor); }))))
#endif
/* For (x << c) >> c, optimize into x & ((unsigned)-1 >> c) for
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/shifts-3.c b/gcc/testsuite/gcc.dg/tree-ssa/shifts-3.c
new file mode 100644
index 0000000..dcff518
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/shifts-3.c
@@ -0,0 +1,65 @@
+/* { dg-options "-O2 -fdump-tree-optimized-raw" } */
+
+unsigned int
+f1 (unsigned int x)
+{
+ if (x & 3)
+ __builtin_unreachable ();
+ x >>= 2;
+ return x * 20;
+}
+
+unsigned int
+f2 (unsigned int x)
+{
+ if (x & 3)
+ __builtin_unreachable ();
+ unsigned char y = x;
+ y >>= 2;
+ return y * 36;
+}
+
+unsigned long
+f3 (unsigned int x)
+{
+ if (x & 3)
+ __builtin_unreachable ();
+ x >>= 2;
+ return (unsigned long) x * 88;
+}
+
+int
+f4 (int x)
+{
+ if (x & 15)
+ __builtin_unreachable ();
+ x >>= 4;
+ return x * 48;
+}
+
+unsigned int
+f5 (int x)
+{
+ if (x & 31)
+ __builtin_unreachable ();
+ x >>= 5;
+ return x * 3200;
+}
+
+unsigned int
+f6 (unsigned int x)
+{
+ if (x & 1)
+ __builtin_unreachable ();
+ x >>= 1;
+ return x * (~0U / 3 & -2);
+}
+
+/* { dg-final { scan-tree-dump-not {<[a-z]*_div_expr,} "optimized" } } */
+/* { dg-final { scan-tree-dump-not {<rshift_expr,} "optimized" } } */
+/* { dg-final { scan-tree-dump {<mult_expr, [^,]*, [^,]*, 5,} "optimized" } } */
+/* { dg-final { scan-tree-dump {<mult_expr, [^,]*, [^,]*, 9,} "optimized" } } */
+/* { dg-final { scan-tree-dump {<(?:widen_)?mult_expr, [^,]*, [^,]*, 22,} "optimized" } } */
+/* { dg-final { scan-tree-dump {<mult_expr, [^,]*, [^,]*, 3,} "optimized" } } */
+/* { dg-final { scan-tree-dump {<mult_expr, [^,]*, [^,]*, 100,} "optimized" } } */
+/* { dg-final { scan-tree-dump {<mult_expr, [^,]*, [^,]*, 715827882,} "optimized" { target int32 } } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/shifts-4.c b/gcc/testsuite/gcc.dg/tree-ssa/shifts-4.c
new file mode 100644
index 0000000..5638653
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/shifts-4.c
@@ -0,0 +1,23 @@
+/* { dg-options "-O2 -fdump-tree-optimized-raw" } */
+
+unsigned int
+f1 (unsigned int x)
+{
+ if (x & 3)
+ __builtin_unreachable ();
+ x >>= 2;
+ return x * 10;
+}
+
+unsigned int
+f2 (unsigned int x)
+{
+ if (x & 3)
+ __builtin_unreachable ();
+ x >>= 3;
+ return x * 24;
+}
+
+/* { dg-final { scan-tree-dump-times {<rshift_expr,} 2 "optimized" } } */
+/* { dg-final { scan-tree-dump {<mult_expr, [^,]*, [^,]*, 10,} "optimized" } } */
+/* { dg-final { scan-tree-dump {<mult_expr, [^,]*, [^,]*, 24,} "optimized" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cnt_fold_5.c b/gcc/testsuite/gcc.target/aarch64/sve/cnt_fold_5.c
new file mode 100644
index 0000000..3f60e9b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/cnt_fold_5.c
@@ -0,0 +1,38 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include <arm_sve.h>
+
+/*
+** f1:
+** ...
+** cntd [^\n]+
+** ...
+** mul [^\n]+
+** ret
+*/
+uint64_t
+f1 (int x)
+{
+ if (x & 3)
+ __builtin_unreachable ();
+ x >>= 2;
+ return (uint64_t) x * svcnth ();
+}
+
+/*
+** f2:
+** ...
+** asr [^\n]+
+** ...
+** ret
+*/
+uint64_t
+f2 (int x)
+{
+ if (x & 3)
+ __builtin_unreachable ();
+ x >>= 2;
+ return (uint64_t) x * svcntw ();
+}