diff options
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 14 | ||||
-rw-r--r-- | gcc/common/config/rs6000/rs6000-common.c | 9 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000.c | 47 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000.opt | 4 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 5 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/pr59643.c | 3 |
6 files changed, 59 insertions, 23 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index d34890b7..96efa42 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,17 @@ +2019-11-11 Jiufu Guo <guojiufu@linux.ibm.com> + + PR tree-optimization/88760 + * gcc/config/rs6000/rs6000.opt (-munroll-only-small-loops): New option. + * gcc/common/config/rs6000/rs6000-common.c + (rs6000_option_optimization_table) [OPT_LEVELS_2_PLUS_SPEED_ONLY]: + Turn on -funroll-loops and -munroll-only-small-loops. + [OPT_LEVELS_ALL]: Turn off -fweb and -frename-registers. + * config/rs6000/rs6000.c (rs6000_option_override_internal): Remove + set of PARAM_MAX_UNROLL_TIMES and PARAM_MAX_UNROLLED_INSNS. + Turn off -munroll-only-small-loops for explicit -funroll-loops. + (TARGET_LOOP_UNROLL_ADJUST): Add loop unroll adjust hook. + (rs6000_loop_unroll_adjust): Define it. Use -munroll-only-small-loops. + 2019-11-11 Kewen Lin <linkw@gcc.gnu.org> * config/rs6000/rs6000.c (rs6000_builtin_vectorization_cost): diff --git a/gcc/common/config/rs6000/rs6000-common.c b/gcc/common/config/rs6000/rs6000-common.c index b947196..9dc7ae8 100644 --- a/gcc/common/config/rs6000/rs6000-common.c +++ b/gcc/common/config/rs6000/rs6000-common.c @@ -35,7 +35,14 @@ static const struct default_options rs6000_option_optimization_table[] = { OPT_LEVELS_ALL, OPT_fsplit_wide_types_early, NULL, 1 }, /* Enable -fsched-pressure for first pass instruction scheduling. */ { OPT_LEVELS_1_PLUS, OPT_fsched_pressure, NULL, 1 }, - { OPT_LEVELS_2_PLUS, OPT_funroll_loops, NULL, 1 }, + /* Enable -munroll-only-small-loops with -funroll-loops to unroll small + loops at -O2 and above by default. */ + { OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_funroll_loops, NULL, 1 }, + { OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_munroll_only_small_loops, NULL, 1 }, + /* -fweb and -frename-registers are useless in general for rs6000, + turn them off. */ + { OPT_LEVELS_ALL, OPT_fweb, NULL, 0 }, + { OPT_LEVELS_ALL, OPT_frename_registers, NULL, 0 }, { OPT_LEVELS_NONE, 0, NULL, 0 } }; diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 6e67db7..5f776f8 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -1428,6 +1428,9 @@ static const struct attribute_spec rs6000_attribute_table[] = #undef TARGET_VECTORIZE_DESTROY_COST_DATA #define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data +#undef TARGET_LOOP_UNROLL_ADJUST +#define TARGET_LOOP_UNROLL_ADJUST rs6000_loop_unroll_adjust + #undef TARGET_INIT_BUILTINS #define TARGET_INIT_BUILTINS rs6000_init_builtins #undef TARGET_BUILTIN_DECL @@ -4540,25 +4543,12 @@ rs6000_option_override_internal (bool global_init_p) global_options.x_param_values, global_options_set.x_param_values); - /* unroll very small loops 2 time if no -funroll-loops. */ - if (!global_options_set.x_flag_unroll_loops - && !global_options_set.x_flag_unroll_all_loops) - { - maybe_set_param_value (PARAM_MAX_UNROLL_TIMES, 2, - global_options.x_param_values, - global_options_set.x_param_values); - - maybe_set_param_value (PARAM_MAX_UNROLLED_INSNS, 20, - global_options.x_param_values, - global_options_set.x_param_values); - - /* If fweb or frename-registers are not specificed in command-line, - do not turn them on implicitly. */ - if (!global_options_set.x_flag_web) - global_options.x_flag_web = 0; - if (!global_options_set.x_flag_rename_registers) - global_options.x_flag_rename_registers = 0; - } + /* Explicit -funroll-loops turns -munroll-only-small-loops off. */ + if (((global_options_set.x_flag_unroll_loops && flag_unroll_loops) + || (global_options_set.x_flag_unroll_all_loops + && flag_unroll_all_loops)) + && !global_options_set.x_unroll_only_small_loops) + unroll_only_small_loops = 0; /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0) can be optimized to @@ -5105,6 +5095,25 @@ rs6000_destroy_cost_data (void *data) free (data); } +/* Implement targetm.loop_unroll_adjust. */ + +static unsigned +rs6000_loop_unroll_adjust (unsigned nunroll, struct loop *loop) +{ + if (unroll_only_small_loops) + { + /* TODO: This is hardcoded to 10 right now. It can be refined, for + example we may want to unroll very small loops more times (4 perhaps). + We also should use a PARAM for this. */ + if (loop->ninsns <= 10) + return MIN (2, nunroll); + else + return 0; + } + + return nunroll; +} + /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a library with vectorized intrinsics. */ diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt index 1f37a92..387d3cf 100644 --- a/gcc/config/rs6000/rs6000.opt +++ b/gcc/config/rs6000/rs6000.opt @@ -501,6 +501,10 @@ moptimize-swaps Target Undocumented Var(rs6000_optimize_swaps) Init(1) Save Analyze and remove doubleword swaps from VSX computations. +munroll-only-small-loops +Target Undocumented Var(unroll_only_small_loops) Init(0) Save +; Use conservative small loop unrolling. + mpower9-misc Target Undocumented Report Mask(P9_MISC) Var(rs6000_isa_flags) Use certain scalar instructions added in ISA 3.0. diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 34a31f0..cc60856 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2019-11-11 Jiufu Guo <guojiufu@linux.ibm.com> + + PR tree-optimization/88760 + * gcc.dg/pr59643.c: Update back to r277550. + 2019-11-10 Paul Thomas <pault@gcc.gnu.org> PR fortran/92123 diff --git a/gcc/testsuite/gcc.dg/pr59643.c b/gcc/testsuite/gcc.dg/pr59643.c index 4446f6e..de78d60 100644 --- a/gcc/testsuite/gcc.dg/pr59643.c +++ b/gcc/testsuite/gcc.dg/pr59643.c @@ -1,9 +1,6 @@ /* PR tree-optimization/59643 */ /* { dg-do compile } */ /* { dg-options "-O3 -fdump-tree-pcom-details" } */ -/* { dg-additional-options "--param max-unrolled-insns=400" { target { powerpc*-*-* } } } */ -/* Implicit threashold of max-unrolled-insn on ppc at O3 is too small for the - loop of this case. */ void foo (double *a, double *b, double *c, double d, double e, int n) |