diff options
author | Roger Sayle <roger@nextmovesoftware.com> | 2020-07-28 22:55:12 +0100 |
---|---|---|
committer | Roger Sayle <roger@nextmovesoftware.com> | 2020-07-28 22:56:24 +0100 |
commit | 33bf56ddc6a757d2066a50dd9ce8323b379a2a0a (patch) | |
tree | c874ed6ad71a51bca6395d031550dd4595a65a8b /gcc | |
parent | f3665bd1111c1799c0421490b5e655f977570354 (diff) | |
download | gcc-33bf56ddc6a757d2066a50dd9ce8323b379a2a0a.zip gcc-33bf56ddc6a757d2066a50dd9ce8323b379a2a0a.tar.gz gcc-33bf56ddc6a757d2066a50dd9ce8323b379a2a0a.tar.bz2 |
middle-end: Parity and popcount folding optimizations.
This patch implements several constant folding optimizations
for __builtin_parity and friends. We canonicalize popcount(x)&1
as parity(x) in gimple, and potentially convert back again when
we expand to RTL. parity(~x) is simplified to parity(x), which
is true for all integer modes with an even number of bits.
But probably most usefully, parity(x)^parity(y) can be simplified
to a parity(x^y), requiring only a single libcall or popcount.
This patch optimizes popcount and parity of an argument known to have
at most a single bit set, to be that single bit. Hence, popcount(x&8)
is simplified to (x>>3)&1. This generalizes the existing optimization
of popcount(x&1) being simplified to x&1, which is cleaned up with
this patch.
2020-07-28 Roger Sayle <roger@nextmovesoftware.com>
Richard Biener <rguenther@suse.de>
gcc/ChangeLog
* match.pd (popcount(x)&1 -> parity(x)): New simplification.
(parity(~x) -> parity(x)): New simplification.
(parity(x)^parity(y) -> parity(x^y)): New simplification.
(parity(x&1) -> x&1): New simplification.
(popcount(x) -> x>>C): New simplification.
gcc/testsuite/ChangeLog
* gcc.dg/fold-popcount-5.c: New test.
* gcc.dg/fold-parity-1.c: Likewise.
* gcc.dg/fold-parity-2.c: Likewise.
* gcc.dg/fold-parity-3.c: Likewise.
* gcc.dg/fold-parity-4.c: Likewise.
* gcc.dg/fold-parity-5.c: Likewise.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/match.pd | 52 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/fold-parity-1.c | 21 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/fold-parity-2.c | 20 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/fold-parity-3.c | 20 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/fold-parity-4.c | 20 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/fold-parity-5.c | 38 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/fold-popcount-5.c | 38 |
7 files changed, 196 insertions, 13 deletions
diff --git a/gcc/match.pd b/gcc/match.pd index c6ae7a7..a052c9e 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -5964,25 +5964,51 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (IFN_FMA @0 @1 @2)))) /* POPCOUNT simplifications. */ -(for popcount (BUILT_IN_POPCOUNT BUILT_IN_POPCOUNTL BUILT_IN_POPCOUNTLL - BUILT_IN_POPCOUNTIMAX) - /* popcount(X&1) is nop_expr(X&1). */ - (simplify - (popcount @0) - (if (tree_nonzero_bits (@0) == 1) - (convert @0))) - /* popcount(X) + popcount(Y) is popcount(X|Y) when X&Y must be zero. */ - (simplify - (plus (popcount:s @0) (popcount:s @1)) - (if (wi::bit_and (tree_nonzero_bits (@0), tree_nonzero_bits (@1)) == 0) - (popcount (bit_ior @0 @1)))) - /* popcount(X) == 0 is X == 0, and related (in)equalities. */ +/* popcount(X) + popcount(Y) is popcount(X|Y) when X&Y must be zero. */ +(simplify + (plus (POPCOUNT:s @0) (POPCOUNT:s @1)) + (if (wi::bit_and (tree_nonzero_bits (@0), tree_nonzero_bits (@1)) == 0) + (POPCOUNT (bit_ior @0 @1)))) + +/* popcount(X) == 0 is X == 0, and related (in)equalities. */ +(for popcount (POPCOUNT) (for cmp (le eq ne gt) rep (eq eq ne ne) (simplify (cmp (popcount @0) integer_zerop) (rep @0 { build_zero_cst (TREE_TYPE (@0)); })))) +/* Canonicalize POPCOUNT(x)&1 as PARITY(X). */ +(simplify + (bit_and (POPCOUNT @0) integer_onep) + (PARITY @0)) + +/* PARITY simplifications. */ +/* parity(~X) is parity(X). */ +(simplify + (PARITY (bit_not @0)) + (PARITY @0)) + +/* parity(X)^parity(Y) is parity(X^Y). */ +(simplify + (bit_xor (PARITY:s @0) (PARITY:s @1)) + (PARITY (bit_xor @0 @1))) + +/* Common POPCOUNT/PARITY simplifications. */ +/* popcount(X&C1) is (X>>C2)&1 when C1 == 1<<C2. Same for parity(X&C1). */ +(for pfun (POPCOUNT PARITY) + (simplify + (pfun @0) + (with { wide_int nz = tree_nonzero_bits (@0); } + (switch + (if (nz == 1) + (convert @0)) + (if (wi::popcount (nz) == 1) + (with { tree utype = unsigned_type_for (TREE_TYPE (@0)); } + (convert (rshift:utype (convert:utype @0) + { build_int_cst (integer_type_node, + wi::ctz (nz)); })))))))) + #if GIMPLE /* 64- and 32-bits branchless implementations of popcount are detected: diff --git a/gcc/testsuite/gcc.dg/fold-parity-1.c b/gcc/testsuite/gcc.dg/fold-parity-1.c new file mode 100644 index 0000000..3ba56c7 --- /dev/null +++ b/gcc/testsuite/gcc.dg/fold-parity-1.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-original" } */ + +int foo(unsigned int x) +{ + return __builtin_popcount(x) & 1; +} + +int fool(unsigned long x) +{ + return __builtin_popcountl(x) & 1; +} + +int fooll(unsigned long long x) +{ + return __builtin_popcountll(x) & 1; +} + +/* { dg-final { scan-tree-dump-times "__builtin_popcount" 0 "original" } } */ +/* { dg-final { scan-tree-dump-times "__builtin_parity" 3 "original" } } */ + diff --git a/gcc/testsuite/gcc.dg/fold-parity-2.c b/gcc/testsuite/gcc.dg/fold-parity-2.c new file mode 100644 index 0000000..8c7acbf --- /dev/null +++ b/gcc/testsuite/gcc.dg/fold-parity-2.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-optimized" } */ + +int foo(unsigned int x) +{ + return __builtin_parity(~x); +} + +int fool(unsigned long x) +{ + return __builtin_parityl(~x); +} + +int fooll(unsigned long long x) +{ + return __builtin_parityll(~x); +} + +/* { dg-final { scan-tree-dump-times "~" 0 "optimized" } } */ + diff --git a/gcc/testsuite/gcc.dg/fold-parity-3.c b/gcc/testsuite/gcc.dg/fold-parity-3.c new file mode 100644 index 0000000..e0355cc --- /dev/null +++ b/gcc/testsuite/gcc.dg/fold-parity-3.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-optimized" } */ + +int foo(unsigned int x) +{ + return __builtin_parity(x&1); +} + +int fool(unsigned long x) +{ + return __builtin_parityl(x&1); +} + +int fooll(unsigned long long x) +{ + return __builtin_parityll(x&1); +} + +/* { dg-final { scan-tree-dump-times "__builtin_parity" 0 "optimized" } } */ + diff --git a/gcc/testsuite/gcc.dg/fold-parity-4.c b/gcc/testsuite/gcc.dg/fold-parity-4.c new file mode 100644 index 0000000..5dfedab --- /dev/null +++ b/gcc/testsuite/gcc.dg/fold-parity-4.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-optimized" } */ + +int foo(unsigned int x, unsigned int y) +{ + return __builtin_parity(x) ^ __builtin_parity(y); +} + +int fool(unsigned long x, unsigned long y) +{ + return __builtin_parityl(x) ^ __builtin_parityl(y); +} + +int fooll(unsigned long long x, unsigned long long y) +{ + return __builtin_parityll(x) ^ __builtin_parityll(y); +} + +/* { dg-final { scan-tree-dump-times "__builtin_parity" 3 "optimized" } } */ + diff --git a/gcc/testsuite/gcc.dg/fold-parity-5.c b/gcc/testsuite/gcc.dg/fold-parity-5.c new file mode 100644 index 0000000..69d3a6a --- /dev/null +++ b/gcc/testsuite/gcc.dg/fold-parity-5.c @@ -0,0 +1,38 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-optimized" } */ + +int test_and4(unsigned int a) +{ + return __builtin_parity(a&4); +} + +int test_and4l(unsigned long b) +{ + return __builtin_parityl(b&4); +} + +int test_and4ll(unsigned long long c) +{ + return __builtin_parityll(c&4); +} + +int test_shift(unsigned int d) +{ + int bits = 8*sizeof(unsigned int)-1; + return __builtin_parity(d<<31); +} + +int test_shiftl(unsigned long e) +{ + int bits = 8*sizeof(unsigned long)-1; + return __builtin_parityl(e<<bits); +} + +int test_shiftll(unsigned long long f) +{ + int bits = 8*sizeof(unsigned long long)-1; + return __builtin_parityll(f<<bits); +} + +/* { dg-final { scan-tree-dump-times "parity" 0 "optimized" } } */ + diff --git a/gcc/testsuite/gcc.dg/fold-popcount-5.c b/gcc/testsuite/gcc.dg/fold-popcount-5.c new file mode 100644 index 0000000..943726f --- /dev/null +++ b/gcc/testsuite/gcc.dg/fold-popcount-5.c @@ -0,0 +1,38 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-optimized" } */ + +int test_and4(unsigned int a) +{ + return __builtin_popcount(a&4); +} + +int test_and4l(unsigned long b) +{ + return __builtin_popcountl(b&4); +} + +int test_and4ll(unsigned long long c) +{ + return __builtin_popcountll(c&4); +} + +int test_shift(unsigned int d) +{ + int bits = 8*sizeof(unsigned int)-1; + return __builtin_popcount(d<<31); +} + +int test_shiftl(unsigned long e) +{ + int bits = 8*sizeof(unsigned long)-1; + return __builtin_popcountl(e<<bits); +} + +int test_shiftll(unsigned long long f) +{ + int bits = 8*sizeof(unsigned long long)-1; + return __builtin_popcountll(f<<bits); +} + +/* { dg-final { scan-tree-dump-times "popcount" 0 "optimized" } } */ + |