aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRoger Sayle <roger@nextmovesoftware.com>2024-07-16 07:58:28 +0100
committerRoger Sayle <roger@nextmovesoftware.com>2024-07-16 07:58:28 +0100
commitdf9451936c6c9e4faea371e3f188e1fc6b6d39e3 (patch)
treef47fe44bf0245d3b73bef1185c7061b01c58fdfe
parenta902e35396d68f10bd27477153fafa4f5ac9c319 (diff)
downloadgcc-df9451936c6c9e4faea371e3f188e1fc6b6d39e3.zip
gcc-df9451936c6c9e4faea371e3f188e1fc6b6d39e3.tar.gz
gcc-df9451936c6c9e4faea371e3f188e1fc6b6d39e3.tar.bz2
PR tree-optimization/114661: Generalize MULT_EXPR recognition in match.pd.
This patch resolves PR tree-optimization/114661, by generalizing the set of expressions that we canonicalize to multiplication. This extends the optimization(s) contributed (by me) back in July 2021. https://gcc.gnu.org/pipermail/gcc-patches/2021-July/575999.html The existing transformation folds (X*C1)^(X<<C2) into X*C3 when allowed. A subtlety is that for non-wrapping integer types, we actually fold this into (int)((unsigned)X*C3) so that we don't introduce an undefined overflow that wasn't in the original. Unfortunately, this transformation confuses itself, as the type-cast multiplication isn't recognized when further combining bit operations. Fixed here by allowing optional useless type conversions in transforms to turn (int)((unsigned)X*C1)^(X<<C2) into (int)((unsigned)X*C3) so that match.pd and EVRP can continue to construct multiplications. For the example given in the PR: unsigned mul(unsigned char c) { if (c > 3) __builtin_unreachable(); return c << 18 | c << 15 | c << 12 | c << 9 | c << 6 | c << 3 | c; } GCC on x86_64 with -O2 previously generated: mul: movzbl %dil, %edi leal (%rdi,%rdi,8), %edx leal 0(,%rdx,8), %eax movl %edx, %ecx sall $15, %edx orl %edi, %eax sall $9, %ecx orl %ecx, %eax orl %edx, %eax ret with this patch we now generate: mul: movzbl %dil, %eax imull $299593, %eax, %eax ret 2024-07-16 Roger Sayle <roger@nextmovesoftware.com> Richard Biener <rguenther@suse.de> gcc/ChangeLog PR tree-optimization/114661 * match.pd ((X*C1)|(X*C2) to X*(C1+C2)): Allow optional useless type conversions around multiplications, such as those inserted by this transformation. gcc/testsuite/ChangeLog PR tree-optimization/114661 * gcc.dg/pr114661.c: New test case.
-rw-r--r--gcc/match.pd43
-rw-r--r--gcc/testsuite/gcc.dg/pr114661.c10
2 files changed, 36 insertions, 17 deletions
diff --git a/gcc/match.pd b/gcc/match.pd
index 3759c64..24a0bbe 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -4171,30 +4171,39 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
Likewise, handle (X<<C3) and X as legitimate variants of X*C. */
(for op (bit_ior bit_xor)
(simplify
- (op (mult:s@0 @1 INTEGER_CST@2)
- (mult:s@3 @1 INTEGER_CST@4))
- (if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type)
- && (tree_nonzero_bits (@0) & tree_nonzero_bits (@3)) == 0)
- (mult @1
- { wide_int_to_tree (type, wi::to_wide (@2) + wi::to_wide (@4)); })))
+ (op (nop_convert?:s@6 (mult:s@0 (nop_convert? @1) INTEGER_CST@2))
+ (nop_convert?:s@7 (mult:s@3 (nop_convert? @5) INTEGER_CST@4)))
+ (if (INTEGRAL_TYPE_P (type)
+ && operand_equal_p (@1, @5, 0)
+ && (tree_nonzero_bits (@6) & tree_nonzero_bits (@7)) == 0)
+ (with { tree t = type;
+ if (!TYPE_OVERFLOW_WRAPS (t))
+ t = unsigned_type_for (t);
+ wide_int c = wi::add (wi::to_wide (@2), wi::to_wide (@4)); }
+ (convert (mult:t (convert:t @1) { wide_int_to_tree (t, c); })))))
(simplify
- (op:c (mult:s@0 @1 INTEGER_CST@2)
+ (op:c (nop_convert?:s@5 (mult:s@0 (nop_convert? @1) INTEGER_CST@2))
(lshift:s@3 @1 INTEGER_CST@4))
- (if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type)
+ (if (INTEGRAL_TYPE_P (type)
&& tree_int_cst_sgn (@4) > 0
- && (tree_nonzero_bits (@0) & tree_nonzero_bits (@3)) == 0)
- (with { wide_int wone = wi::one (TYPE_PRECISION (type));
+ && (tree_nonzero_bits (@5) & tree_nonzero_bits (@3)) == 0)
+ (with { tree t = type;
+ if (!TYPE_OVERFLOW_WRAPS (t))
+ t = unsigned_type_for (t);
+ wide_int wone = wi::one (TYPE_PRECISION (type));
wide_int c = wi::add (wi::to_wide (@2),
wi::lshift (wone, wi::to_wide (@4))); }
- (mult @1 { wide_int_to_tree (type, c); }))))
+ (convert (mult:t (convert:t @1) { wide_int_to_tree (t, c); })))))
(simplify
- (op:c (mult:s@0 @1 INTEGER_CST@2)
+ (op:c (nop_convert?:s@3 (mult:s@0 (nop_convert? @1) INTEGER_CST@2))
@1)
- (if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type)
- && (tree_nonzero_bits (@0) & tree_nonzero_bits (@1)) == 0)
- (mult @1
- { wide_int_to_tree (type,
- wi::add (wi::to_wide (@2), 1)); })))
+ (if (INTEGRAL_TYPE_P (type)
+ && (tree_nonzero_bits (@3) & tree_nonzero_bits (@1)) == 0)
+ (with { tree t = type;
+ if (!TYPE_OVERFLOW_WRAPS (t))
+ t = unsigned_type_for (t);
+ wide_int c = wi::add (wi::to_wide (@2), 1); }
+ (convert (mult:t (convert:t @1) { wide_int_to_tree (t, c); })))))
(simplify
(op (lshift:s@0 @1 INTEGER_CST@2)
(lshift:s@3 @1 INTEGER_CST@4))
diff --git a/gcc/testsuite/gcc.dg/pr114661.c b/gcc/testsuite/gcc.dg/pr114661.c
new file mode 100644
index 0000000..e6b5c69
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr114661.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-evrp" } */
+
+unsigned mul(unsigned char c) {
+ if (c > 3) __builtin_unreachable();
+ return c << 18 | c << 15 |
+ c << 12 | c << 9 |
+ c << 6 | c << 3 | c;
+}
+/* { dg-final { scan-tree-dump-times " \\* 299593" 1 "evrp" } } */