diff options
author | Zhao Wei Liew <zhaoweiliew@gmail.com> | 2022-01-28 13:36:39 -0500 |
---|---|---|
committer | Jeff Law <jeffreyalaw@gmail.com> | 2022-01-28 13:36:39 -0500 |
commit | c2b610e7c6c89fd422c5c31f01023bcddf3cf4a5 (patch) | |
tree | 5f0d362b89de28da00f3b2adeeb8d10dfa1e98d3 | |
parent | a591c71b41e18e4ff86852a974592af4962aef57 (diff) | |
download | gcc-c2b610e7c6c89fd422c5c31f01023bcddf3cf4a5.zip gcc-c2b610e7c6c89fd422c5c31f01023bcddf3cf4a5.tar.gz gcc-c2b610e7c6c89fd422c5c31f01023bcddf3cf4a5.tar.bz2 |
match.pd: Simplify 1 / X for integer X [PR95424]
This patch implements an optimization for the following C++ code:
int f(int x) {
return 1 / x;
}
int f(unsigned int x) {
return 1 / x;
}
Before this patch, x86-64 gcc -std=c++20 -O3 produces the following assembly:
f(int):
xor edx, edx
mov eax, 1
idiv edi
ret
f(unsigned int):
xor edx, edx
mov eax, 1
div edi
ret
In comparison, clang++ -std=c++20 -O3 produces the following assembly:
f(int):
lea ecx, [rdi + 1]
xor eax, eax
cmp ecx, 3
cmovb eax, edi
ret
f(unsigned int):
xor eax, eax
cmp edi, 1
sete al
ret
Clang's output is more efficient as it avoids expensive div operations.
With this patch, GCC now produces the following assembly:
f(int):
lea eax, [rdi + 1]
cmp eax, 2
mov eax, 0
cmovbe eax, edi
ret
f(unsigned int):
xor eax, eax
cmp edi, 1
sete al
ret
which is virtually identical to Clang's assembly output. Any slight differences
in the output for f(int) is possibly related to a different missed optimization.
v2: https://gcc.gnu.org/pipermail/gcc-patches/2022-January/587751.html
Changes from v2:
1. Refactor from using a switch statement to using the built-in
if-else statement.
v1: https://gcc.gnu.org/pipermail/gcc-patches/2022-January/587634.html
Changes from v1:
1. Refactor common if conditions.
2. Use build_[minus_]one_cst (type) to get -1/1 of the correct type.
3. Match only for TRUNC_DIV_EXPR and TYPE_PRECISION (type) > 1.
gcc/ChangeLog:
PR tree-optimization/95424
* match.pd: Simplify 1 / X where X is an integer.
-rw-r--r-- | gcc/match.pd | 13 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/tree-ssa/divide-6.c | 9 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/tree-ssa/divide-7.c | 9 |
3 files changed, 31 insertions, 0 deletions
diff --git a/gcc/match.pd b/gcc/match.pd index c68eed7..bd76da6 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -435,6 +435,19 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) && TYPE_UNSIGNED (type)) (trunc_divmod @0 @1)))) + /* 1 / X -> X == 1 for unsigned integer X. + 1 / X -> X >= -1 && X <= 1 ? X : 0 for signed integer X. + But not for 1 / 0 so that we can get proper warnings and errors, + and not for 1-bit integers as they are edge cases better handled elsewhere. */ +(simplify + (trunc_div integer_onep@0 @1) + (if (INTEGRAL_TYPE_P (type) && !integer_zerop (@1) && TYPE_PRECISION (type) > 1) + (if (TYPE_UNSIGNED (type)) + (eq @1 { build_one_cst (type); }) + (with { tree utype = unsigned_type_for (type); } + (cond (le (plus (convert:utype @1) { build_one_cst (utype); }) { build_int_cst (utype, 2); }) + @1 { build_zero_cst (type); }))))) + /* Combine two successive divisions. Note that combining ceil_div and floor_div is trickier and combining round_div even more so. */ (for div (trunc_div exact_div) diff --git a/gcc/testsuite/gcc.dg/tree-ssa/divide-6.c b/gcc/testsuite/gcc.dg/tree-ssa/divide-6.c new file mode 100644 index 0000000..a9fc4c0 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/divide-6.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options "-O -fdump-tree-optimized" } */ + +unsigned int f(unsigned int x) { + return 1 / x; +} + +/* { dg-final { scan-tree-dump-not "1 / x_..D.;" "optimized" } } */ +/* { dg-final { scan-tree-dump "x_..D. == 1;" "optimized" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/divide-7.c b/gcc/testsuite/gcc.dg/tree-ssa/divide-7.c new file mode 100644 index 0000000..285279a --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/divide-7.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options "-O -fdump-tree-optimized" } */ + +int f(int x) { + return 1 / x; +} + +/* { dg-final { scan-tree-dump-not "1 / x_..D.;" "optimized" } } */ +/* { dg-final { scan-tree-dump ".. <= 2 ? x_..D. : 0;" "optimized" } } */ |