diff options
author | Jiufu Guo <guojiufu@linux.ibm.com> | 2019-10-28 05:23:24 +0000 |
---|---|---|
committer | Jiufu Guo <guojiufu@gcc.gnu.org> | 2019-10-28 05:23:24 +0000 |
commit | 6d099a76a0f6a040a3e678f2bce7fc69cc3257d8 (patch) | |
tree | 3f86230269de0eff1807c5bee4129696db86647f /gcc | |
parent | cf20d00ca1ae5a0da9b329896d7b51e55381bdd7 (diff) | |
download | gcc-6d099a76a0f6a040a3e678f2bce7fc69cc3257d8.zip gcc-6d099a76a0f6a040a3e678f2bce7fc69cc3257d8.tar.gz gcc-6d099a76a0f6a040a3e678f2bce7fc69cc3257d8.tar.bz2 |
rs6000: Enable limited unrolling at -O2
In PR88760, there are a few disscussion about improve or tune unroller for
targets. And we would agree to enable unroller for small loops at O2 first.
And we could see performance improvement(~10%) for below code:
```
subroutine foo (i, i1, block)
integer :: i, i1
integer :: block(9, 9, 9)
block(i:9,1,i1) = block(i:9,1,i1) - 10
end subroutine foo
```
This kind of code occurs a few times in exchange2 benchmark.
Similar C code:
```
for (i = 0; i < n; i++)
arr[i] = arr[i] - 10;
```
On powerpcle, for O2 , enable -funroll-loops and limit
PARAM_MAX_UNROLL_TIMES=2 and PARAM_MAX_UNROLLED_INSNS=20, we can see >2%
overall improvement for SPEC2017.
This patch is only for rs6000 in which we see visible performance improvement.
gcc/
2019-10-25 Jiufu Guo <guojiufu@linux.ibm.com>
PR tree-optimization/88760
* config/rs6000/rs6000-common.c (rs6000_option_optimization_table):
Enable -funroll-loops for -O2 and above.
* config/rs6000/rs6000.c (rs6000_option_override_internal): Set
PARAM_MAX_UNROLL_TIMES to 2 and PARAM_MAX_UNROLLED_INSNS to 20, and
do not turn on web and rngreg implicitly, if the unroller is not
explicitly enabled.
gcc.testsuite/
2019-10-25 Jiufu Guo <guojiufu@linux.ibm.com>
PR tree-optimization/88760
* gcc.target/powerpc/small-loop-unroll.c: New test.
* c-c++-common/tsan/thread_leak2.c: Update test.
* gcc.dg/pr59643.c: Update test.
* gcc.target/powerpc/loop_align.c: Update test.
* gcc.target/powerpc/ppc-fma-1.c: Update test.
* gcc.target/powerpc/ppc-fma-2.c: Update test.
* gcc.target/powerpc/ppc-fma-3.c: Update test.
* gcc.target/powerpc/ppc-fma-4.c: Update test.
* gcc.target/powerpc/pr78604.c: Update test.
From-SVN: r277501
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 10 | ||||
-rw-r--r-- | gcc/common/config/rs6000/rs6000-common.c | 1 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000.c | 20 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 13 | ||||
-rw-r--r-- | gcc/testsuite/c-c++-common/tsan/thread_leak2.c | 4 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/pr59643.c | 3 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/powerpc/loop_align.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/powerpc/ppc-fma-1.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/powerpc/ppc-fma-2.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/powerpc/ppc-fma-3.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/powerpc/ppc-fma-4.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/powerpc/pr78604.c | 2 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/powerpc/small-loop-unroll.c | 13 |
13 files changed, 70 insertions, 6 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 4751148..9511081 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,13 @@ +2019-10-25 Jiufu Guo <guojiufu@linux.ibm.com> + + PR tree-optimization/88760 + * config/rs6000/rs6000-common.c (rs6000_option_optimization_table): + Enable -funroll-loops for -O2 and above. + * config/rs6000/rs6000.c (rs6000_option_override_internal): Set + PARAM_MAX_UNROLL_TIMES to 2 and PARAM_MAX_UNROLLED_INSNS to 20, and + do not turn on web and rngreg implicitly, if the unroller is not + explicitly enabled. + 2019-10-27 Jan Hubicka <hubicka@ucw.cz> * ipa-prop.c (ipa_propagate_indirect_call_infos): Do not remove diff --git a/gcc/common/config/rs6000/rs6000-common.c b/gcc/common/config/rs6000/rs6000-common.c index 4b0c205..b947196 100644 --- a/gcc/common/config/rs6000/rs6000-common.c +++ b/gcc/common/config/rs6000/rs6000-common.c @@ -35,6 +35,7 @@ static const struct default_options rs6000_option_optimization_table[] = { OPT_LEVELS_ALL, OPT_fsplit_wide_types_early, NULL, 1 }, /* Enable -fsched-pressure for first pass instruction scheduling. */ { OPT_LEVELS_1_PLUS, OPT_fsched_pressure, NULL, 1 }, + { OPT_LEVELS_2_PLUS, OPT_funroll_loops, NULL, 1 }, { OPT_LEVELS_NONE, 0, NULL, 0 } }; diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 1399221..9ed5151 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -4540,6 +4540,26 @@ rs6000_option_override_internal (bool global_init_p) global_options.x_param_values, global_options_set.x_param_values); + /* unroll very small loops 2 time if no -funroll-loops. */ + if (!global_options_set.x_flag_unroll_loops + && !global_options_set.x_flag_unroll_all_loops) + { + maybe_set_param_value (PARAM_MAX_UNROLL_TIMES, 2, + global_options.x_param_values, + global_options_set.x_param_values); + + maybe_set_param_value (PARAM_MAX_UNROLLED_INSNS, 20, + global_options.x_param_values, + global_options_set.x_param_values); + + /* If fweb or frename-registers are not specificed in command-line, + do not turn them on implicitly. */ + if (!global_options_set.x_flag_web) + global_options.x_flag_web = 0; + if (!global_options_set.x_flag_rename_registers) + global_options.x_flag_rename_registers = 0; + } + /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0) can be optimized to ap = __builtin_next_arg (0). */ diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 86c2da1..f9f5bb7 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,16 @@ +2019-10-25 Jiufu Guo <guojiufu@linux.ibm.com> + + PR tree-optimization/88760 + * gcc.target/powerpc/small-loop-unroll.c: New test. + * c-c++-common/tsan/thread_leak2.c: Update test. + * gcc.dg/pr59643.c: Update test. + * gcc.target/powerpc/loop_align.c: Update test. + * gcc.target/powerpc/ppc-fma-1.c: Update test. + * gcc.target/powerpc/ppc-fma-2.c: Update test. + * gcc.target/powerpc/ppc-fma-3.c: Update test. + * gcc.target/powerpc/ppc-fma-4.c: Update test. + * gcc.target/powerpc/pr78604.c: Update test. + 2019-10-27 Andreas Tobler <andreast@gcc.gnu.org> * gcc.c-torture/execute/fprintf-2.c: Silence a Free/NetBSD libc warning. diff --git a/gcc/testsuite/c-c++-common/tsan/thread_leak2.c b/gcc/testsuite/c-c++-common/tsan/thread_leak2.c index c9b8046..082f2aa 100644 --- a/gcc/testsuite/c-c++-common/tsan/thread_leak2.c +++ b/gcc/testsuite/c-c++-common/tsan/thread_leak2.c @@ -1,5 +1,9 @@ /* { dg-shouldfail "tsan" } */ +/* { dg-additional-options "-fno-unroll-loops" { target { powerpc*-*-* } } } */ +/* -fno-unroll-loops help to avoid ThreadSanitizer reporting multi-times + message for pthread_create at difference calling addresses. */ + #include <pthread.h> #include <unistd.h> diff --git a/gcc/testsuite/gcc.dg/pr59643.c b/gcc/testsuite/gcc.dg/pr59643.c index de78d60..4446f6e 100644 --- a/gcc/testsuite/gcc.dg/pr59643.c +++ b/gcc/testsuite/gcc.dg/pr59643.c @@ -1,6 +1,9 @@ /* PR tree-optimization/59643 */ /* { dg-do compile } */ /* { dg-options "-O3 -fdump-tree-pcom-details" } */ +/* { dg-additional-options "--param max-unrolled-insns=400" { target { powerpc*-*-* } } } */ +/* Implicit threashold of max-unrolled-insn on ppc at O3 is too small for the + loop of this case. */ void foo (double *a, double *b, double *c, double d, double e, int n) diff --git a/gcc/testsuite/gcc.target/powerpc/loop_align.c b/gcc/testsuite/gcc.target/powerpc/loop_align.c index ebe3782..ef67f77 100644 --- a/gcc/testsuite/gcc.target/powerpc/loop_align.c +++ b/gcc/testsuite/gcc.target/powerpc/loop_align.c @@ -1,6 +1,6 @@ /* { dg-do compile { target { powerpc*-*-* } } } */ /* { dg-skip-if "" { powerpc*-*-darwin* powerpc-ibm-aix* } } */ -/* { dg-options "-O2 -mdejagnu-cpu=power7 -falign-functions=16" } */ +/* { dg-options "-O2 -mdejagnu-cpu=power7 -falign-functions=16 -fno-unroll-loops" } */ /* { dg-final { scan-assembler ".p2align 5" } } */ void f(double *a, double *b, double *c, unsigned long n) { diff --git a/gcc/testsuite/gcc.target/powerpc/ppc-fma-1.c b/gcc/testsuite/gcc.target/powerpc/ppc-fma-1.c index b4945e6..2a5b92c 100644 --- a/gcc/testsuite/gcc.target/powerpc/ppc-fma-1.c +++ b/gcc/testsuite/gcc.target/powerpc/ppc-fma-1.c @@ -1,7 +1,7 @@ /* { dg-do compile { target { powerpc*-*-* } } } */ /* { dg-skip-if "" { powerpc*-*-darwin* } } */ /* { dg-require-effective-target powerpc_vsx_ok } */ -/* { dg-options "-O3 -ftree-vectorize -mdejagnu-cpu=power7 -ffast-math" } */ +/* { dg-options "-O3 -ftree-vectorize -mdejagnu-cpu=power7 -ffast-math -fno-unroll-loops" } */ /* { dg-final { scan-assembler-times "xvmadd" 4 } } */ /* { dg-final { scan-assembler-times "xsmadd\|fmadd\ " 2 } } */ /* { dg-final { scan-assembler-times "fmadds" 2 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/ppc-fma-2.c b/gcc/testsuite/gcc.target/powerpc/ppc-fma-2.c index 5ed630a..bf2c67f 100644 --- a/gcc/testsuite/gcc.target/powerpc/ppc-fma-2.c +++ b/gcc/testsuite/gcc.target/powerpc/ppc-fma-2.c @@ -1,7 +1,7 @@ /* { dg-do compile { target { powerpc*-*-* } } } */ /* { dg-skip-if "" { powerpc*-*-darwin* } } */ /* { dg-require-effective-target powerpc_vsx_ok } */ -/* { dg-options "-O3 -ftree-vectorize -mdejagnu-cpu=power7 -ffast-math -ffp-contract=off" } */ +/* { dg-options "-O3 -ftree-vectorize -mdejagnu-cpu=power7 -ffast-math -ffp-contract=off -fno-unroll-loops" } */ /* { dg-final { scan-assembler-times "xvmadd" 2 } } */ /* { dg-final { scan-assembler-times "xsmadd\|fmadd\ " 1 } } */ /* { dg-final { scan-assembler-times "fmadds" 1 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/ppc-fma-3.c b/gcc/testsuite/gcc.target/powerpc/ppc-fma-3.c index ef252b3..8608116 100644 --- a/gcc/testsuite/gcc.target/powerpc/ppc-fma-3.c +++ b/gcc/testsuite/gcc.target/powerpc/ppc-fma-3.c @@ -2,7 +2,7 @@ /* { dg-skip-if "" { powerpc*-*-darwin* } } */ /* { dg-require-effective-target powerpc_altivec_ok } */ /* { dg-require-effective-target powerpc_fprs } */ -/* { dg-options "-O3 -ftree-vectorize -mdejagnu-cpu=power6 -maltivec -ffast-math" } */ +/* { dg-options "-O3 -ftree-vectorize -mdejagnu-cpu=power6 -maltivec -ffast-math -fno-unroll-loops" } */ /* { dg-final { scan-assembler-times "vmaddfp" 2 } } */ /* { dg-final { scan-assembler-times "fmadd " 2 } } */ /* { dg-final { scan-assembler-times "fmadds" 2 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/ppc-fma-4.c b/gcc/testsuite/gcc.target/powerpc/ppc-fma-4.c index c2eaf1a..291c2ee 100644 --- a/gcc/testsuite/gcc.target/powerpc/ppc-fma-4.c +++ b/gcc/testsuite/gcc.target/powerpc/ppc-fma-4.c @@ -2,7 +2,7 @@ /* { dg-skip-if "" { powerpc*-*-darwin* } } */ /* { dg-require-effective-target powerpc_altivec_ok } */ /* { dg-require-effective-target powerpc_fprs } */ -/* { dg-options "-O3 -ftree-vectorize -mdejagnu-cpu=power6 -maltivec -ffast-math -ffp-contract=off" } */ +/* { dg-options "-O3 -ftree-vectorize -mdejagnu-cpu=power6 -maltivec -ffast-math -ffp-contract=off -fno-unroll-loops" } */ /* { dg-final { scan-assembler-times "vmaddfp" 1 } } */ /* { dg-final { scan-assembler-times "fmadd " 1 } } */ /* { dg-final { scan-assembler-times "fmadds" 1 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/pr78604.c b/gcc/testsuite/gcc.target/powerpc/pr78604.c index 76d8945..35bfdb3 100644 --- a/gcc/testsuite/gcc.target/powerpc/pr78604.c +++ b/gcc/testsuite/gcc.target/powerpc/pr78604.c @@ -1,7 +1,7 @@ /* { dg-do compile { target { powerpc*-*-* } } } */ /* { dg-skip-if "" { powerpc*-*-darwin* } } */ /* { dg-require-effective-target powerpc_p8vector_ok } */ -/* { dg-options "-mdejagnu-cpu=power8 -O2 -ftree-vectorize -fdump-tree-vect-details" } */ +/* { dg-options "-mdejagnu-cpu=power8 -O2 -ftree-vectorize -fdump-tree-vect-details -fno-unroll-loops" } */ #ifndef SIZE #define SIZE 1024 diff --git a/gcc/testsuite/gcc.target/powerpc/small-loop-unroll.c b/gcc/testsuite/gcc.target/powerpc/small-loop-unroll.c new file mode 100644 index 0000000..fec5ae9 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/small-loop-unroll.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-rtl-loop2_unroll" } */ + +void __attribute__ ((noinline)) foo(int n, int *arr) +{ + int i; + for (i = 0; i < n; i++) + arr[i] = arr[i] - 10; +} +/* { dg-final { scan-rtl-dump-times "Unrolled loop 1 times" 1 "loop2_unroll" } } */ +/* { dg-final { scan-assembler-times {\mlwz\M} 3 } } */ +/* { dg-final { scan-assembler-times {\mstw\M} 3 } } */ + |