diff options
author | Xionghu Luo <luoxhu@linux.ibm.com> | 2020-05-14 21:03:24 -0500 |
---|---|---|
committer | Xionghu Luo <luoxhu@linux.ibm.com> | 2020-05-14 21:06:50 -0500 |
commit | 8a15faa730f99100f6f3ed12663563356ec5a2c0 (patch) | |
tree | 534a282a2cf01a9b3f1dca437eb0480bfa5aefd7 /gcc | |
parent | 98aad12cd2e618286a36fed9bc870f19bdbc0f07 (diff) | |
download | gcc-8a15faa730f99100f6f3ed12663563356ec5a2c0.zip gcc-8a15faa730f99100f6f3ed12663563356ec5a2c0.tar.gz gcc-8a15faa730f99100f6f3ed12663563356ec5a2c0.tar.bz2 |
Fold (add -1; zero_ext; add +1) operations to zero_ext when not overflow(PR37451, PR61837)
This "subtract/extend/add" existed for a long time and still annoying us
(PR37451, part of PR61837) when converting from 32bits to 64bits, as the ctr
register is used as 64bits on powerpc64, Andraw Pinski had a patch but
caused some issue and reverted by Joseph S. Myers(PR37451, PR37782).
Andraw:
http://gcc.gnu.org/ml/gcc-patches/2008-09/msg01070.html
http://gcc.gnu.org/ml/gcc-patches/2008-10/msg01321.html
Joseph:
https://gcc.gnu.org/legacy-ml/gcc-patches/2011-11/msg02405.html
We still can do the simplification from "subtract/zero_ext/add" to "zero_ext"
when loop iterations is known to be LT than MODE_MAX (only do simplify
when counter+0x1 NOT overflow).
Bootstrap and regression tested pass on Power8-LE.
gcc/ChangeLog
2020-05-15 Xiong Hu Luo <luoxhu@linux.ibm.com>
PR rtl-optimization/37451, part of PR target/61837
* loop-doloop.c (doloop_simplify_count): New function. Simplify
(add -1; zero_ext; add +1) to zero_ext when not wrapping.
(doloop_modify): Call doloop_simplify_count.
gcc/testsuite/ChangeLog
2020-05-15 Xiong Hu Luo <luoxhu@linux.ibm.com>
PR rtl-optimization/37451, part of PR target/61837
* gcc.target/powerpc/doloop-2.c: New test.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 7 | ||||
-rw-r--r-- | gcc/loop-doloop.c | 38 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 5 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/powerpc/doloop-2.c | 29 |
4 files changed, 78 insertions, 1 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index d855ee6..c7b080f 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,10 @@ +2020-05-14 Xiong Hu Luo <luoxhu@linux.ibm.com> + + PR rtl-optimization/37451, part of PR target/61837 + * loop-doloop.c (doloop_simplify_count): New function. Simplify + (add -1; zero_ext; add +1) to zero_ext when not wrapping. + (doloop_modify): Call doloop_simplify_count. + 2020-05-14 H.J. Lu <hongjiu.lu@intel.com> PR jit/94778 diff --git a/gcc/loop-doloop.c b/gcc/loop-doloop.c index db6a014..02282d4 100644 --- a/gcc/loop-doloop.c +++ b/gcc/loop-doloop.c @@ -397,6 +397,42 @@ add_test (rtx cond, edge *e, basic_block dest) return true; } +/* Fold (add -1; zero_ext; add +1) operations to zero_ext if not wrapping. i.e: + + 73: r145:SI=r123:DI#0-0x1 + 74: r144:DI=zero_extend (r145:SI) + 75: r143:DI=r144:DI+0x1 + ... + 31: r135:CC=cmp (r123:DI,0) + 72: {pc={(r143:DI!=0x1)?L70:pc};r143:DI=r143:DI-0x1;...} + + r123:DI#0-0x1 is param count derived from loop->niter_expr equal to number of + loop iterations, if loop iterations expression doesn't overflow, then + (zero_extend (r123:DI#0-1))+1 can be simplified to zero_extend. */ + +static rtx +doloop_simplify_count (class loop *loop, scalar_int_mode mode, rtx count) +{ + widest_int iterations; + if (GET_CODE (count) == ZERO_EXTEND) + { + rtx extop0 = XEXP (count, 0); + if (GET_CODE (extop0) == PLUS) + { + rtx addop0 = XEXP (extop0, 0); + rtx addop1 = XEXP (extop0, 1); + + if (get_max_loop_iterations (loop, &iterations) + && wi::ltu_p (iterations, GET_MODE_MASK (GET_MODE (addop0))) + && addop1 == constm1_rtx) + return simplify_gen_unary (ZERO_EXTEND, mode, addop0, + GET_MODE (addop0)); + } + } + + return simplify_gen_binary (PLUS, mode, count, const1_rtx); +} + /* Modify the loop to use the low-overhead looping insn where LOOP describes the loop, DESC describes the number of iterations of the loop, and DOLOOP_INSN is the low-overhead looping insn to emit at the @@ -477,7 +513,7 @@ doloop_modify (class loop *loop, class niter_desc *desc, } if (increment_count) - count = simplify_gen_binary (PLUS, mode, count, const1_rtx); + count = doloop_simplify_count (loop, mode, count); /* Insert initialization of the count register into the loop header. */ start_sequence (); diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 8dfdfc6..e2ee69f 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2020-05-14 Xiong Hu Luo <luoxhu@linux.ibm.com> + + PR rtl-optimization/37451, part of PR target/61837 + * gcc.target/powerpc/doloop-2.c: New test. + 2020-05-14 H.J. Lu <hongjiu.lu@intel.com> PR jit/94778 diff --git a/gcc/testsuite/gcc.target/powerpc/doloop-2.c b/gcc/testsuite/gcc.target/powerpc/doloop-2.c new file mode 100644 index 0000000..3199fe5 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/doloop-2.c @@ -0,0 +1,29 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fno-unroll-loops" } */ + +unsigned int +foo1 (unsigned int l, int *a) +{ + unsigned int i; + for(i = 0;i < l; i++) + a[i] = i; + return l; +} + +int +foo2 (int l, int *a) +{ + int i; + for(i = 0;i < l; i++) + a[i] = i; + return l; +} + +/* The place where we were getting an extra -1 is when converting from 32bits + to 64bits as the ctr register is used as 64bits on powerpc64. We should be + able to do this loop without "add -1/zero_ext/add 1" to the l to get the + number of iterations of this loop still doing a do-loop. */ + +/* { dg-final { scan-assembler-not {(?n)\maddi .*,.*,-1$} } } */ +/* { dg-final { scan-assembler-times "bdnz" 2 } } */ +/* { dg-final { scan-assembler-times "mtctr" 2 } } */ |