diff options
author | Roger Sayle <roger@nextmovesoftware.com> | 2021-08-04 14:19:14 +0100 |
---|---|---|
committer | Roger Sayle <roger@nextmovesoftware.com> | 2021-08-04 14:22:51 +0100 |
commit | 96146e61cd7aee62c21c2845916ec42152918ab7 (patch) | |
tree | 99914e14e7b67d97aa6e4789f3159c9e4ad02058 /gcc/match.pd | |
parent | 0d04fe49239d91787850036599164788f1c87785 (diff) | |
download | gcc-96146e61cd7aee62c21c2845916ec42152918ab7.zip gcc-96146e61cd7aee62c21c2845916ec42152918ab7.tar.gz gcc-96146e61cd7aee62c21c2845916ec42152918ab7.tar.bz2 |
Fold (X<<C1)^(X<<C2) to a multiplication when possible.
The easiest way to motivate these additions to match.pd is with the
following example:
unsigned int foo(unsigned char i) {
return i | (i<<8) | (i<<16) | (i<<24);
}
which mainline with -O2 on x86_64 currently generates:
foo: movzbl %dil, %edi
movl %edi, %eax
movl %edi, %edx
sall $8, %eax
sall $16, %edx
orl %edx, %eax
orl %edi, %eax
sall $24, %edi
orl %edi, %eax
ret
but with this patch now becomes:
foo: movzbl %dil, %eax
imull $16843009, %eax, %eax
ret
Interestingly, this transformation is already applied when using
addition, allowing synth_mult to select an optimal sequence, but
not when using the equivalent bit-wise ior or xor operators.
The solution is to use tree_nonzero_bits to check that the
potentially non-zero bits of each operand don't overlap, which
ensures that BIT_IOR_EXPR and BIT_XOR_EXPR produce the same
results as PLUS_EXPR, which effectively generalizes the old
fold_plusminus_mult_expr. Technically, the transformation
is to canonicalize (X*C1)|(X*C2) and (X*C1)^(X*C2) to
X*(C1+C2) where X and X<<C are considered special cases.
2021-08-04 Roger Sayle <roger@nextmovesoftware.com>
Marc Glisse <marc.glisse@inria.fr>
gcc/ChangeLog
* match.pd (bit_ior, bit_xor): Canonicalize (X*C1)|(X*C2) and
(X*C1)^(X*C2) as X*(C1+C2), and related variants, using
tree_nonzero_bits to ensure that operands are bit-wise disjoint.
gcc/testsuite/ChangeLog
* gcc.dg/fold-ior-4.c: New test.
Diffstat (limited to 'gcc/match.pd')
-rw-r--r-- | gcc/match.pd | 56 |
1 files changed, 56 insertions, 0 deletions
diff --git a/gcc/match.pd b/gcc/match.pd index 19cbad7..0fcfd0e 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -2833,6 +2833,62 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (convert (mult (convert:t @0) { cst; }))))) #endif +/* Canonicalize (X*C1)|(X*C2) and (X*C1)^(X*C2) to (C1+C2)*X when + tree_nonzero_bits allows IOR and XOR to be treated like PLUS. + Likewise, handle (X<<C3) and X as legitimate variants of X*C. */ +(for op (bit_ior bit_xor) + (simplify + (op (mult:s@0 @1 INTEGER_CST@2) + (mult:s@3 @1 INTEGER_CST@4)) + (if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type) + && (tree_nonzero_bits (@0) & tree_nonzero_bits (@3)) == 0) + (mult @1 + { wide_int_to_tree (type, wi::to_wide (@2) + wi::to_wide (@4)); }))) + (simplify + (op:c (mult:s@0 @1 INTEGER_CST@2) + (lshift:s@3 @1 INTEGER_CST@4)) + (if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type) + && tree_int_cst_sgn (@4) > 0 + && (tree_nonzero_bits (@0) & tree_nonzero_bits (@3)) == 0) + (with { wide_int wone = wi::one (TYPE_PRECISION (type)); + wide_int c = wi::add (wi::to_wide (@2), + wi::lshift (wone, wi::to_wide (@4))); } + (mult @1 { wide_int_to_tree (type, c); })))) + (simplify + (op:c (mult:s@0 @1 INTEGER_CST@2) + @1) + (if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type) + && (tree_nonzero_bits (@0) & tree_nonzero_bits (@1)) == 0) + (mult @1 + { wide_int_to_tree (type, + wi::add (wi::to_wide (@2), 1)); }))) + (simplify + (op (lshift:s@0 @1 INTEGER_CST@2) + (lshift:s@3 @1 INTEGER_CST@4)) + (if (INTEGRAL_TYPE_P (type) + && tree_int_cst_sgn (@2) > 0 + && tree_int_cst_sgn (@4) > 0 + && (tree_nonzero_bits (@0) & tree_nonzero_bits (@3)) == 0) + (with { tree t = type; + if (!TYPE_OVERFLOW_WRAPS (t)) + t = unsigned_type_for (t); + wide_int wone = wi::one (TYPE_PRECISION (t)); + wide_int c = wi::add (wi::lshift (wone, wi::to_wide (@2)), + wi::lshift (wone, wi::to_wide (@4))); } + (convert (mult:t (convert:t @1) { wide_int_to_tree (t,c); }))))) + (simplify + (op:c (lshift:s@0 @1 INTEGER_CST@2) + @1) + (if (INTEGRAL_TYPE_P (type) + && tree_int_cst_sgn (@2) > 0 + && (tree_nonzero_bits (@0) & tree_nonzero_bits (@1)) == 0) + (with { tree t = type; + if (!TYPE_OVERFLOW_WRAPS (t)) + t = unsigned_type_for (t); + wide_int wone = wi::one (TYPE_PRECISION (t)); + wide_int c = wi::add (wi::lshift (wone, wi::to_wide (@2)), wone); } + (convert (mult:t (convert:t @1) { wide_int_to_tree (t, c); })))))) + /* Simplifications of MIN_EXPR, MAX_EXPR, fmin() and fmax(). */ (for minmax (min max FMIN_ALL FMAX_ALL) |