Fold (X<<C1)^(X<<C2) to a multiplication when possible.

The easiest way to motivate these additions to match.pd is with the following example: unsigned int foo(unsigned char i) { return i | (i<<8) | (i<<16) | (i<<24); } which mainline with -O2 on x86_64 currently generates: foo: movzbl %dil, %edi movl %edi, %eax movl %edi, %edx sall $8, %eax sall $16, %edx orl %edx, %eax orl %edi, %eax sall $24, %edi orl %edi, %eax ret but with this patch now becomes: foo: movzbl %dil, %eax imull $16843009, %eax, %eax ret Interestingly, this transformation is already applied when using addition, allowing synth_mult to select an optimal sequence, but not when using the equivalent bit-wise ior or xor operators. The solution is to use tree_nonzero_bits to check that the potentially non-zero bits of each operand don't overlap, which ensures that BIT_IOR_EXPR and BIT_XOR_EXPR produce the same results as PLUS_EXPR, which effectively generalizes the old fold_plusminus_mult_expr. Technically, the transformation is to canonicalize (X*C1)|(X*C2) and (X*C1)^(X*C2) to X*(C1+C2) where X and X<<C are considered special cases. 2021-08-04 Roger Sayle <roger@nextmovesoftware.com> Marc Glisse <marc.glisse@inria.fr> gcc/ChangeLog * match.pd (bit_ior, bit_xor): Canonicalize (X*C1)|(X*C2) and (X*C1)^(X*C2) as X*(C1+C2), and related variants, using tree_nonzero_bits to ensure that operands are bit-wise disjoint. gcc/testsuite/ChangeLog * gcc.dg/fold-ior-4.c: New test.
author: Roger Sayle <roger@nextmovesoftware.com> 2021-08-04 14:19:14 +0100
committer: Roger Sayle <roger@nextmovesoftware.com> 2021-08-04 14:22:51 +0100
commit: 96146e61cd7aee62c21c2845916ec42152918ab7 (patch)
tree: 99914e14e7b67d97aa6e4789f3159c9e4ad02058 /gcc/match.pd
parent: 0d04fe49239d91787850036599164788f1c87785 (diff)
download: gcc-96146e61cd7aee62c21c2845916ec42152918ab7.zip
gcc-96146e61cd7aee62c21c2845916ec42152918ab7.tar.gz
gcc-96146e61cd7aee62c21c2845916ec42152918ab7.tar.bz2
1 files changed, 56 insertions, 0 deletions
diff --git a/gcc/match.pd b/gcc/match.pd
index 19cbad7..0fcfd0e 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -2833,6 +2833,62 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
     (convert (mult (convert:t @0) { cst; })))))
 #endif
 
+/* Canonicalize (X*C1)|(X*C2) and (X*C1)^(X*C2) to (C1+C2)*X when
+   tree_nonzero_bits allows IOR and XOR to be treated like PLUS.
+   Likewise, handle (X<<C3) and X as legitimate variants of X*C.  */
+(for op (bit_ior bit_xor)
+ (simplify
+  (op (mult:s@0 @1 INTEGER_CST@2)
+      (mult:s@3 @1 INTEGER_CST@4))
+  (if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type)
+       && (tree_nonzero_bits (@0) & tree_nonzero_bits (@3)) == 0)
+   (mult @1
+	 { wide_int_to_tree (type, wi::to_wide (@2) + wi::to_wide (@4)); })))
+ (simplify
+  (op:c (mult:s@0 @1 INTEGER_CST@2)
+	(lshift:s@3 @1 INTEGER_CST@4))
+  (if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type)
+       && tree_int_cst_sgn (@4) > 0
+       && (tree_nonzero_bits (@0) & tree_nonzero_bits (@3)) == 0)
+   (with { wide_int wone = wi::one (TYPE_PRECISION (type));
+	   wide_int c = wi::add (wi::to_wide (@2),
+				 wi::lshift (wone, wi::to_wide (@4))); }
+    (mult @1 { wide_int_to_tree (type, c); }))))
+ (simplify
+  (op:c (mult:s@0 @1 INTEGER_CST@2)
+	@1)
+  (if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type)
+       && (tree_nonzero_bits (@0) & tree_nonzero_bits (@1)) == 0)
+   (mult @1
+	 { wide_int_to_tree (type,
+			     wi::add (wi::to_wide (@2), 1)); })))
+ (simplify
+  (op (lshift:s@0 @1 INTEGER_CST@2)
+      (lshift:s@3 @1 INTEGER_CST@4))
+  (if (INTEGRAL_TYPE_P (type)
+       && tree_int_cst_sgn (@2) > 0
+       && tree_int_cst_sgn (@4) > 0
+       && (tree_nonzero_bits (@0) & tree_nonzero_bits (@3)) == 0)
+   (with { tree t = type;
+	   if (!TYPE_OVERFLOW_WRAPS (t))
+	     t = unsigned_type_for (t);
+	   wide_int wone = wi::one (TYPE_PRECISION (t));
+	   wide_int c = wi::add (wi::lshift (wone, wi::to_wide (@2)),
+				 wi::lshift (wone, wi::to_wide (@4))); }
+    (convert (mult:t (convert:t @1) { wide_int_to_tree (t,c); })))))
+ (simplify
+  (op:c (lshift:s@0 @1 INTEGER_CST@2)
+	@1)
+  (if (INTEGRAL_TYPE_P (type)
+       && tree_int_cst_sgn (@2) > 0
+       && (tree_nonzero_bits (@0) & tree_nonzero_bits (@1)) == 0)
+   (with { tree t = type;
+	   if (!TYPE_OVERFLOW_WRAPS (t))
+	     t = unsigned_type_for (t);
+	   wide_int wone = wi::one (TYPE_PRECISION (t));
+	   wide_int c = wi::add (wi::lshift (wone, wi::to_wide (@2)), wone); }
+    (convert (mult:t (convert:t @1) { wide_int_to_tree (t, c); }))))))
+
 /* Simplifications of MIN_EXPR, MAX_EXPR, fmin() and fmax().  */
 
 (for minmax (min max FMIN_ALL FMAX_ALL)
author	Roger Sayle <roger@nextmovesoftware.com>	2021-08-04 14:19:14 +0100
committer	Roger Sayle <roger@nextmovesoftware.com>	2021-08-04 14:22:51 +0100
commit	96146e61cd7aee62c21c2845916ec42152918ab7 (patch)
tree	99914e14e7b67d97aa6e4789f3159c9e4ad02058 /gcc/match.pd
parent	0d04fe49239d91787850036599164788f1c87785 (diff)
download	gcc-96146e61cd7aee62c21c2845916ec42152918ab7.zip gcc-96146e61cd7aee62c21c2845916ec42152918ab7.tar.gz gcc-96146e61cd7aee62c21c2845916ec42152918ab7.tar.bz2