diff options
author | Roger Sayle <roger@nextmovesoftware.com> | 2023-05-11 08:10:04 +0100 |
---|---|---|
committer | Roger Sayle <roger@nextmovesoftware.com> | 2023-05-11 08:10:04 +0100 |
commit | c0dd80e4c4c332ed8c65dd96528cc2dc9e9e5ef7 (patch) | |
tree | d38ac71917fbbd86b13e0f51feb49d9054e75e22 | |
parent | 003f388cddbbd3c8ff936f98355a90acda619030 (diff) | |
download | gcc-c0dd80e4c4c332ed8c65dd96528cc2dc9e9e5ef7.zip gcc-c0dd80e4c4c332ed8c65dd96528cc2dc9e9e5ef7.tar.gz gcc-c0dd80e4c4c332ed8c65dd96528cc2dc9e9e5ef7.tar.bz2 |
match.pd: Simplify popcount/parity of bswap/rotate.
This is the latest iteration of my patch from August 2020
https://gcc.gnu.org/pipermail/gcc-patches/2020-August/552391.html
incorperating feedback and suggestions from reviewers.
This patch to match.pd optimizes away bit permutation operations,
specifically bswap and rotate, in calls to popcount and parity.
2023-05-11 Roger Sayle <roger@nextmovesoftware.com>
gcc/ChangeLog
* match.pd <popcount optimizations>: Simplify popcount(bswap(x))
as popcount(x). Simplify popcount(rotate(x,y)) as popcount(x).
<parity optimizations>: Simplify parity(bswap(x)) as parity(x).
Simplify parity(rotate(x,y)) as parity(x).
gcc/testsuite/ChangeLog
* gcc.dg/fold-parity-6.c: New test.
* gcc.dg/fold-parity-7.c: Likewise.
* gcc.dg/fold-popcount-6.c: Likewise.
* gcc.dg/fold-popcount-7.c: Likewise.
-rw-r--r-- | gcc/match.pd | 50 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/fold-parity-6.c | 37 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/fold-parity-7.c | 43 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/fold-popcount-6.c | 37 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/fold-popcount-7.c | 43 |
5 files changed, 210 insertions, 0 deletions
diff --git a/gcc/match.pd b/gcc/match.pd index ceae1c3..bc083be 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -7766,6 +7766,32 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (cmp (popcount @0) integer_zerop) (rep @0 { build_zero_cst (TREE_TYPE (@0)); })))) +/* popcount(bswap(x)) is popcount(x). */ +(for popcount (POPCOUNT) + (for bswap (BUILT_IN_BSWAP16 BUILT_IN_BSWAP32 + BUILT_IN_BSWAP64 BUILT_IN_BSWAP128) + (simplify + (popcount (convert?@0 (bswap:s@1 @2))) + (if (INTEGRAL_TYPE_P (TREE_TYPE (@0)) + && INTEGRAL_TYPE_P (TREE_TYPE (@1))) + (with { unsigned int prec0 = TYPE_PRECISION (TREE_TYPE (@0)); + unsigned int prec1 = TYPE_PRECISION (TREE_TYPE (@1)); } + (if (prec0 == prec1 || (prec0 > prec1 && TYPE_UNSIGNED (@1))) + (popcount @2))))))) + +/* popcount(rotate(X Y)) is popcount(X). */ +(for popcount (POPCOUNT) + (for rot (lrotate rrotate) + (simplify + (popcount (convert?@0 (rot:s@1 @2 @3))) + (if (INTEGRAL_TYPE_P (TREE_TYPE (@0)) + && INTEGRAL_TYPE_P (TREE_TYPE (@1)) + && (GIMPLE || !TREE_SIDE_EFFECTS (@3))) + (with { unsigned int prec0 = TYPE_PRECISION (TREE_TYPE (@0)); + unsigned int prec1 = TYPE_PRECISION (TREE_TYPE (@1)); } + (if (prec0 == prec1 || (prec0 > prec1 && TYPE_UNSIGNED (@1))) + (popcount @2))))))) + /* Canonicalize POPCOUNT(x)&1 as PARITY(X). */ (simplify (bit_and (POPCOUNT @0) integer_onep) @@ -7777,6 +7803,30 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (PARITY (bit_not @0)) (PARITY @0)) +/* parity(bswap(x)) is parity(x). */ +(for parity (PARITY) + (for bswap (BUILT_IN_BSWAP16 BUILT_IN_BSWAP32 + BUILT_IN_BSWAP64 BUILT_IN_BSWAP128) + (simplify + (parity (convert?@0 (bswap:s@1 @2))) + (if (INTEGRAL_TYPE_P (TREE_TYPE (@0)) + && INTEGRAL_TYPE_P (TREE_TYPE (@1)) + && TYPE_PRECISION (TREE_TYPE (@0)) + >= TYPE_PRECISION (TREE_TYPE (@1))) + (parity @2))))) + +/* parity(rotate(X Y)) is parity(X). */ +(for parity (PARITY) + (for rot (lrotate rrotate) + (simplify + (parity (convert?@0 (rot:s@1 @2 @3))) + (if (INTEGRAL_TYPE_P (TREE_TYPE (@0)) + && INTEGRAL_TYPE_P (TREE_TYPE (@1)) + && (GIMPLE || !TREE_SIDE_EFFECTS (@3)) + && TYPE_PRECISION (TREE_TYPE (@0)) + >= TYPE_PRECISION (TREE_TYPE (@1))) + (parity @2))))) + /* parity(X)^parity(Y) is parity(X^Y). */ (simplify (bit_xor (PARITY:s @0) (PARITY:s @1)) diff --git a/gcc/testsuite/gcc.dg/fold-parity-6.c b/gcc/testsuite/gcc.dg/fold-parity-6.c new file mode 100644 index 0000000..a407597 --- /dev/null +++ b/gcc/testsuite/gcc.dg/fold-parity-6.c @@ -0,0 +1,37 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-optimized" } */ + +int foo(unsigned int x) +{ +#if __SIZEOF_INT__ == 4 + return __builtin_parity (__builtin_bswap32(x)); +#elif __SIZEOF_INT__ == 2 + return __builtin_parity (__builtin_bswap16(x)); +#else + return x; +#endif +} + +int bar(unsigned long x) +{ +#if __SIZEOF_LONG__ == 8 + return __builtin_parityl (__builtin_bswap64(x)); +#elif __SIZEOF_LONG__ == 4 + return __builtin_parityl (__builtin_bswap32(x)); +#else + return x; +#endif +} + +int baz(unsigned long long x) +{ +#if __SIZEOF_LONG_LONG__ == 8 + return __builtin_parityll (__builtin_bswap64(x)); +#elif __SIZEOF_LONG_LONG__ == 4 + return __builtin_parityll (__builtin_bswap32(x)); +#else + return x; +#endif +} + +/* { dg-final { scan-tree-dump-not "bswap" "optimized" } } */ diff --git a/gcc/testsuite/gcc.dg/fold-parity-7.c b/gcc/testsuite/gcc.dg/fold-parity-7.c new file mode 100644 index 0000000..3b117da --- /dev/null +++ b/gcc/testsuite/gcc.dg/fold-parity-7.c @@ -0,0 +1,43 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-optimized" } */ + +int foo(unsigned int x) +{ +#if __SIZEOF_INT__ == 4 + unsigned int y = (x>>4) | (x<<28); + return __builtin_parity(y); +#elif __SIZEOF_INT__ == 2 + unsigned int y = (x>>4) | (x<<12); + return __builtin_parity(y); +#else + return x; +#endif +} + +int bar(unsigned long x) +{ +#if __SIZEOF_LONG__ == 8 + unsigned long y = (x>>4) | (x<<60); + return __builtin_parityl (y); +#elif __SIZEOF_LONG__ == 4 + unsigned long y = (x>>4) | (x<<28); + return __builtin_parityl (y); +#else + return x; +#endif +} + +int baz(unsigned long long x) +{ +#if __SIZEOF_LONG_LONG__ == 8 + unsigned long long y = (x>>4) | (x<<60); + return __builtin_parityll (y); +#elif __SIZEOF_LONG_LONG__ == 4 + unsigned long long y = (x>>4) | (x<<28); + return __builtin_parityll (y); +#else + return x; +#endif +} + +/* { dg-final { scan-tree-dump-not " r>> " "optimized" } } */ diff --git a/gcc/testsuite/gcc.dg/fold-popcount-6.c b/gcc/testsuite/gcc.dg/fold-popcount-6.c new file mode 100644 index 0000000..7e67c6e --- /dev/null +++ b/gcc/testsuite/gcc.dg/fold-popcount-6.c @@ -0,0 +1,37 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-optimized" } */ + +int foo(unsigned int x) +{ +#if __SIZEOF_INT__ == 4 + return __builtin_popcount (__builtin_bswap32(x)); +#elif __SIZEOF_INT__ == 2 + return __builtin_popcount (__builtin_bswap16(x)); +#else + return x; +#endif +} + +int bar(unsigned long x) +{ +#if __SIZEOF_LONG__ == 8 + return __builtin_popcountl (__builtin_bswap64(x)); +#elif __SIZEOF_LONG__ == 4 + return __builtin_popcountl (__builtin_bswap32(x)); +#else + return x; +#endif +} + +int baz(unsigned long long x) +{ +#if __SIZEOF_LONG_LONG__ == 8 + return __builtin_popcountll (__builtin_bswap64(x)); +#elif __SIZEOF_LONG_LONG__ == 4 + return __builtin_popcountll (__builtin_bswap32(x)); +#else + return x; +#endif +} + +/* { dg-final { scan-tree-dump-not "bswap" "optimized" } } */ diff --git a/gcc/testsuite/gcc.dg/fold-popcount-7.c b/gcc/testsuite/gcc.dg/fold-popcount-7.c new file mode 100644 index 0000000..141137e6 --- /dev/null +++ b/gcc/testsuite/gcc.dg/fold-popcount-7.c @@ -0,0 +1,43 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-optimized" } */ + +int foo(unsigned int x) +{ +#if __SIZEOF_INT__ == 4 + unsigned int y = (x>>4) | (x<<28); + return __builtin_popcount(y); +#elif __SIZEOF_INT__ == 2 + unsigned int y = (x>>4) | (x<<12); + return __builtin_popcount(y); +#else + return x; +#endif +} + +int bar(unsigned long x) +{ +#if __SIZEOF_LONG__ == 8 + unsigned long y = (x>>4) | (x<<60); + return __builtin_popcountl (y); +#elif __SIZEOF_LONG__ == 4 + unsigned long y = (x>>4) | (x<<28); + return __builtin_popcountl (y); +#else + return x; +#endif +} + +int baz(unsigned long long x) +{ +#if __SIZEOF_LONG_LONG__ == 8 + unsigned long long y = (x>>4) | (x<<60); + return __builtin_popcountll (y); +#elif __SIZEOF_LONG_LONG__ == 4 + unsigned long long y = (x>>4) | (x<<28); + return __builtin_popcountll (y); +#else + return x; +#endif +} + +/* { dg-final { scan-tree-dump-not " r>> " "optimized" } } */ |