aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRoger Sayle <roger@nextmovesoftware.com>2022-08-15 17:32:26 +0100
committerRoger Sayle <roger@nextmovesoftware.com>2022-08-15 17:32:26 +0100
commit03acd8b6429e22068330dce5abf129291d3f26de (patch)
tree13582c7a6f978a641fa653a6b108f86f41b9b47f
parentd2d189985d1f9fa09802c4a14856d442786f4bf8 (diff)
downloadgcc-03acd8b6429e22068330dce5abf129291d3f26de.zip
gcc-03acd8b6429e22068330dce5abf129291d3f26de.tar.gz
gcc-03acd8b6429e22068330dce5abf129291d3f26de.tar.bz2
PR tree-optimization/71343: Optimize (X<<C)&(Y<<C) as (X&Y)<<C.
This patch is the first part of a solution to PR tree-optimization/71343, a missed-optimization enhancement request where GCC fails to see that (a<<2)+(b<<2) == a*4+b*4. This piece is that (X<<C) op (Y<<C) can be simplified to (X op Y) << C, for many binary operators, including AND, IOR, XOR, and (if overflow isn't an issue) PLUS and MINUS. Likewise, the right shifts (both logical and arithmetic) and bit-wise logical operators can be simplified in a similar fashion. These all reduce the number of GIMPLE binary operations from 3 to 2, by combining/eliminating a shift operation. 2022-08-15 Roger Sayle <roger@nextmovesoftware.com> Richard Biener <rguenther@suse.de> gcc/ChangeLog PR tree-optimization/71343 * match.pd (op (lshift @0 @1) (lshift @2 @1)): Optimize the expression (X<<C) + (Y<<C) to (X+Y)<<C for multiple operators. (op (rshift @0 @1) (rshift @2 @1)): Likewise, simplify (X>>C)^(Y>>C) to (X^Y)>>C for binary logical operators, AND, IOR and XOR. gcc/testsuite/ChangeLog PR tree-optimization/71343 * gcc.dg/pr71343-1.c: New test case.
-rw-r--r--gcc/match.pd20
-rw-r--r--gcc/testsuite/gcc.dg/pr71343-1.c56
2 files changed, 76 insertions, 0 deletions
diff --git a/gcc/match.pd b/gcc/match.pd
index c22bc2c..e7d10f4 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -982,6 +982,26 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
&& tree_nop_conversion_p (type, TREE_TYPE (@1)))
(lshift @0 @2)))
+/* Shifts by constants distribute over several binary operations,
+ hence (X << C) + (Y << C) can be simplified to (X + Y) << C. */
+(for op (plus minus)
+ (simplify
+ (op (lshift:s @0 @1) (lshift:s @2 @1))
+ (if (INTEGRAL_TYPE_P (type)
+ && TYPE_OVERFLOW_WRAPS (type)
+ && !TYPE_SATURATING (type))
+ (lshift (op @0 @2) @1))))
+
+(for op (bit_and bit_ior bit_xor)
+ (simplify
+ (op (lshift:s @0 @1) (lshift:s @2 @1))
+ (if (INTEGRAL_TYPE_P (type))
+ (lshift (op @0 @2) @1)))
+ (simplify
+ (op (rshift:s @0 @1) (rshift:s @2 @1))
+ (if (INTEGRAL_TYPE_P (type))
+ (rshift (op @0 @2) @1))))
+
/* Fold (1 << (C - x)) where C = precision(type) - 1
into ((1 << C) >> x). */
(simplify
diff --git a/gcc/testsuite/gcc.dg/pr71343-1.c b/gcc/testsuite/gcc.dg/pr71343-1.c
new file mode 100644
index 0000000..146f5fc
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr71343-1.c
@@ -0,0 +1,56 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+unsigned int foo_plus(unsigned int a, unsigned int b)
+{
+ return (a << 2) + (b << 2);
+}
+
+unsigned int foo_and(unsigned int a, unsigned int b)
+{
+ return (a << 2) & (b << 2);
+}
+
+unsigned int foo_ior(unsigned int a, unsigned int b)
+{
+ return (a << 2) | (b << 2);
+}
+
+unsigned int foo_xor(unsigned int a, unsigned int b)
+{
+ return (a << 2) ^ (b << 2);
+}
+
+unsigned int bar_and(unsigned int a, unsigned int b)
+{
+ return (a >> 2) & (b >> 2);
+}
+
+unsigned int bar_ior(unsigned int a, unsigned int b)
+{
+ return (a >> 2) | (b >> 2);
+}
+
+unsigned int bar_xor(unsigned int a, unsigned int b)
+{
+ return (a >> 2) ^ (b >> 2);
+}
+
+int baz_and(int a, int b)
+{
+ return (a >> 2) & (b >> 2);
+}
+
+int baz_ior(int a, int b)
+{
+ return (a >> 2) | (b >> 2);
+}
+
+int baz_xor(int a, int b)
+{
+ return (a >> 2) ^ (b >> 2);
+}
+
+/* { dg-final { scan-tree-dump-times " << " 4 "optimized" } } */
+/* { dg-final { scan-tree-dump-times " >> " 6 "optimized" } } */
+