aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorJakub Jelinek <jakub@redhat.com>2020-05-05 11:36:47 +0200
committerJakub Jelinek <jakub@redhat.com>2020-05-05 11:36:47 +0200
commit144aee70b80de50f96a97ee64edd2f1c237c4906 (patch)
treea0b80710604088be014e425ee8ef0cb9a64cf5bc /gcc
parent7f916201ac390a5e1c88562bb91b1b4ab2852f22 (diff)
downloadgcc-144aee70b80de50f96a97ee64edd2f1c237c4906.zip
gcc-144aee70b80de50f96a97ee64edd2f1c237c4906.tar.gz
gcc-144aee70b80de50f96a97ee64edd2f1c237c4906.tar.bz2
match.pd: Canonicalize (x + (x << cst)) into (x * cst2) [PR94800]
The popcount* testcases show yet another creative way to write popcount, but rather than adjusting the popcount matcher to deal with it, I think we just should canonicalize those (X + (X << C) to X * (1 + (1 << C)) and (X << C1) + (X << C2) to X * ((1 << C1) + (1 << C2)), because for multiplication we already have simplification rules that can handle nested multiplication (X * CST1 * CST2), while the the shifts and adds we have nothing like that. And user could have written the multiplication anyway, so if we don't emit the fastest or smallest code for the multiplication by constant, we should improve that. At least on the testcases seems the emitted code is reasonable according to cost, except that perhaps we could in some cases try to improve expansion of vector multiplication by uniform constant. 2020-05-05 Jakub Jelinek <jakub@redhat.com> PR tree-optimization/94800 * match.pd (X + (X << C) to X * (1 + (1 << C)), (X << C1) + (X << C2) to X * ((1 << C1) + (1 << C2))): New canonicalizations. * gcc.dg/tree-ssa/pr94800.c: New test. * gcc.dg/tree-ssa/popcount5.c: New test. * gcc.dg/tree-ssa/popcount5l.c: New test. * gcc.dg/tree-ssa/popcount5ll.c: New test.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog5
-rw-r--r--gcc/match.pd35
-rw-r--r--gcc/testsuite/ChangeLog6
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/popcount5.c22
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/popcount5l.c32
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/popcount5ll.c22
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/pr94800.c80
7 files changed, 202 insertions, 0 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index b796203..cc076c8 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,10 @@
2020-05-05 Jakub Jelinek <jakub@redhat.com>
+ PR tree-optimization/94800
+ * match.pd (X + (X << C) to X * (1 + (1 << C)),
+ (X << C1) + (X << C2) to X * ((1 << C1) + (1 << C2))): New
+ canonicalizations.
+
PR target/94942
* config/i386/mmx.md (*vec_dupv4hi): Use xYw constraints instead of Yv.
diff --git a/gcc/match.pd b/gcc/match.pd
index c45e94c..d957517 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -2570,6 +2570,41 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
&& single_use (@3))
(mult (plusminus @2 { build_one_cst (type); }) @0))))))
+#if GIMPLE
+/* Canonicalize X + (X << C) into X * (1 + (1 << C)) and
+ (X << C1) + (X << C2) into X * ((1 << C1) + (1 << C2)). */
+(simplify
+ (plus:c @0 (lshift:s @0 INTEGER_CST@1))
+ (if (ANY_INTEGRAL_TYPE_P (TREE_TYPE (@0))
+ && tree_fits_uhwi_p (@1)
+ && tree_to_uhwi (@1) < element_precision (type))
+ (with { tree t = type;
+ if (!TYPE_OVERFLOW_WRAPS (t)) t = unsigned_type_for (t);
+ wide_int w = wi::set_bit_in_zero (tree_to_uhwi (@1),
+ element_precision (type));
+ w += 1;
+ tree cst = wide_int_to_tree (VECTOR_TYPE_P (t) ? TREE_TYPE (t)
+ : t, w);
+ cst = build_uniform_cst (t, cst); }
+ (convert (mult (convert:t @0) { cst; })))))
+(simplify
+ (plus (lshift:s @0 INTEGER_CST@1) (lshift:s @0 INTEGER_CST@2))
+ (if (ANY_INTEGRAL_TYPE_P (TREE_TYPE (@0))
+ && tree_fits_uhwi_p (@1)
+ && tree_to_uhwi (@1) < element_precision (type)
+ && tree_fits_uhwi_p (@2)
+ && tree_to_uhwi (@2) < element_precision (type))
+ (with { tree t = type;
+ if (!TYPE_OVERFLOW_WRAPS (t)) t = unsigned_type_for (t);
+ unsigned int prec = element_precision (type);
+ wide_int w = wi::set_bit_in_zero (tree_to_uhwi (@1), prec);
+ w += wi::set_bit_in_zero (tree_to_uhwi (@2), prec);
+ tree cst = wide_int_to_tree (VECTOR_TYPE_P (t) ? TREE_TYPE (t)
+ : t, w);
+ cst = build_uniform_cst (t, cst); }
+ (convert (mult (convert:t @0) { cst; })))))
+#endif
+
/* Simplifications of MIN_EXPR, MAX_EXPR, fmin() and fmax(). */
(for minmax (min max FMIN_ALL FMAX_ALL)
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 6475b7d..2b48741 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,5 +1,11 @@
2020-05-05 Jakub Jelinek <jakub@redhat.com>
+ PR tree-optimization/94800
+ * gcc.dg/tree-ssa/pr94800.c: New test.
+ * gcc.dg/tree-ssa/popcount5.c: New test.
+ * gcc.dg/tree-ssa/popcount5l.c: New test.
+ * gcc.dg/tree-ssa/popcount5ll.c: New test.
+
PR target/94942
* gcc.target/i386/pr94942.c: New test.
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/popcount5.c b/gcc/testsuite/gcc.dg/tree-ssa/popcount5.c
new file mode 100644
index 0000000..6358d4f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/popcount5.c
@@ -0,0 +1,22 @@
+/* PR tree-optimization/94800 */
+/* { dg-do compile } */
+/* { dg-require-effective-target popcount } */
+/* { dg-require-effective-target int32plus } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+const unsigned m1 = 0x55555555UL;
+const unsigned m2 = 0x33333333UL;
+const unsigned m4 = 0x0F0F0F0FUL;
+const int shift = 24;
+
+int popcount64c(unsigned x)
+{
+ x -= (x >> 1) & m1;
+ x = (x & m2) + ((x >> 2) & m2);
+ x = (x + (x >> 4)) & m4;
+ x += (x << 8);
+ x += (x << 16);
+ return x >> shift;
+}
+
+/* { dg-final { scan-tree-dump-times "\.POPCOUNT" 1 "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/popcount5l.c b/gcc/testsuite/gcc.dg/tree-ssa/popcount5l.c
new file mode 100644
index 0000000..b761667
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/popcount5l.c
@@ -0,0 +1,32 @@
+/* PR tree-optimization/94800 */
+/* { dg-do compile { target int32plus } } */
+/* { dg-require-effective-target popcountl } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+#if __SIZEOF_LONG__ == 4
+const unsigned long m1 = 0x55555555UL;
+const unsigned long m2 = 0x33333333UL;
+const unsigned long m4 = 0x0F0F0F0FUL;
+const int shift = 24;
+#else
+const unsigned long m1 = 0x5555555555555555UL;
+const unsigned long m2 = 0x3333333333333333UL;
+const unsigned long m4 = 0x0f0f0f0f0f0f0f0fUL;
+const int shift = 56;
+#endif
+
+
+int popcount64c(unsigned long x)
+{
+ x -= (x >> 1) & m1;
+ x = (x & m2) + ((x >> 2) & m2);
+ x = (x + (x >> 4)) & m4;
+ x += (x << 8);
+ x += (x << 16);
+#if __SIZEOF_LONG__ != 4
+ x += (x << 32);
+#endif
+ return x >> shift;
+}
+
+/* { dg-final { scan-tree-dump-times "\.POPCOUNT" 1 "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/popcount5ll.c b/gcc/testsuite/gcc.dg/tree-ssa/popcount5ll.c
new file mode 100644
index 0000000..831d5e17
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/popcount5ll.c
@@ -0,0 +1,22 @@
+/* PR tree-optimization/94800 */
+/* { dg-do compile } */
+/* { dg-require-effective-target popcountll } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+const unsigned long long m1 = 0x5555555555555555ULL;
+const unsigned long long m2 = 0x3333333333333333ULL;
+const unsigned long long m4 = 0x0F0F0F0F0F0F0F0FULL;
+const int shift = 56;
+
+int popcount64c(unsigned long long x)
+{
+ x -= (x >> 1) & m1;
+ x = (x & m2) + ((x >> 2) & m2);
+ x = (x + (x >> 4)) & m4;
+ x += (x << 8);
+ x += (x << 16);
+ x += (x << 32);
+ return x >> shift;
+}
+
+/* { dg-final { scan-tree-dump-times "\.POPCOUNT" 1 "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr94800.c b/gcc/testsuite/gcc.dg/tree-ssa/pr94800.c
new file mode 100644
index 0000000..4f92df3
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr94800.c
@@ -0,0 +1,80 @@
+/* PR tree-optimization/94800 */
+/* { dg-do compile { target { ilp32 || lp64 } } } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump-times " \* 72340172838076673" 2 "optimized" } } */
+/* { dg-final { scan-tree-dump-times " \* 16843009" 2 "optimized" } } */
+/* { dg-final { scan-tree-dump-times " \* 289360691352306692" 2 "optimized" } } */
+/* { dg-final { scan-tree-dump-times " \* 1229782938247303441" 2 "optimized" } } */
+/* { dg-final { scan-tree-dump-not "<<" "optimized" } } */
+
+unsigned long long int
+foo (unsigned long long int x)
+{
+ unsigned long long int a = x + (x << 8);
+ unsigned long long int b = a + (a << 16);
+ unsigned long long int c = b + (b << 32);
+ return c;
+}
+
+unsigned int
+bar (unsigned int x)
+{
+ unsigned int a = x + (x << 8);
+ unsigned int b = a + (a << 16);
+ return b;
+}
+
+unsigned long long int
+baz (unsigned long long int x)
+{
+ unsigned long long int a = (x << 2) + (x << 10);
+ unsigned long long int b = a + (a << 16);
+ unsigned long long int c = b + (b << 32);
+ return c;
+}
+
+unsigned long long int
+qux (unsigned long long int x)
+{
+ unsigned long long int a = x + (x << 4);
+ unsigned long long int b = a + (a << 8);
+ unsigned long long int c = b + (b << 16);
+ unsigned long long int d = c + (c << 32);
+ return d;
+}
+
+long long int
+quux (long long int x)
+{
+ long long int a = x + (x << 8);
+ long long int b = a + (a << 16);
+ long long int c = b + (b << 32);
+ return c;
+}
+
+int
+corge (int x)
+{
+ int a = x + (x << 8);
+ int b = a + (a << 16);
+ return b;
+}
+
+long long int
+garply (long long int x)
+{
+ long long int a = (x << 2) + (x << 10);
+ long long int b = a + (a << 16);
+ long long int c = b + (b << 32);
+ return c;
+}
+
+long long int
+waldo (long long int x)
+{
+ long long int a = x + (x << 4);
+ long long int b = a + (a << 8);
+ long long int c = b + (b << 16);
+ long long int d = c + (c << 32);
+ return d;
+}