diff options
-rw-r--r-- | gcc/ChangeLog | 9 | ||||
-rw-r--r-- | gcc/doc/invoke.texi | 3 | ||||
-rw-r--r-- | gcc/gcse.c | 178 | ||||
-rw-r--r-- | gcc/params.def | 6 | ||||
-rw-r--r-- | gcc/params.h | 2 |
5 files changed, 97 insertions, 101 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 04b6507..8b1a41a 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,12 @@ +2009-04-12 Steven Bosscher <steven@gcc.gnu.org> + + * doc/invoke.texi (max_gcse_passes): Remove documentation. + * params.def (PARAM_MAX_GCSE_PASSES): Remove. + * params.h (MAX_GCSE_PASSES): Remove. + * gcse.c (gcse_main): Run CPROP1, PRE or HOIST, and CPROP2 + in sequence. Remove ability to run multiple passes. + (bypass_jumps): Report run as third CPROP pass. + 2009-04-12 Adam Nemet <anemet@caviumnetworks.com> PR middle-end/39651 diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index a09ce75..c5f8dfb 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -7330,9 +7330,6 @@ order to perform the global common subexpression elimination optimization. If more memory than specified is required, the optimization will not be done. -@item max-gcse-passes -The maximum number of passes of GCSE to run. The default is 1. - @item max-pending-list-length The maximum number of pending dependencies scheduling will allow before flushing the current state and starting over. Large functions @@ -190,16 +190,18 @@ along with GCC; see the file COPYING3. If not see We perform the following steps: - 1) Compute basic block information. + 1) Compute table of places where registers are set. - 2) Compute table of places where registers are set. + 2) Perform copy/constant propagation. - 3) Perform copy/constant propagation. - - 4) Perform global cse using lazy code motion if not optimizing + 3) Perform global cse using lazy code motion if not optimizing for size, or code hoisting if we are. - 5) Perform another pass of copy/constant propagation. + 4) Perform another pass of copy/constant propagation. Try to bypass + conditional jumps if the condition can be computed from a value of + an incoming edge. + + 5) Perform store motion. Two passes of copy/constant propagation are done because the first one enables more GCSE and the second one helps to clean up the copies that @@ -212,6 +214,11 @@ along with GCC; see the file COPYING3. If not see (set (pseudo-reg) (expression)). Function want_to_gcse_p says what these are. + In addition, expressions in REG_EQUAL notes are candidates for GXSE-ing. + This allows PRE to hoist expressions that are expressed in multiple insns, + such as comprex address calculations (e.g. for PIC code, or loads with a + high part and as lowe part). + PRE handles moving invariant expressions out of loops (by treating them as partially redundant). @@ -235,9 +242,13 @@ along with GCC; see the file COPYING3. If not see It was found doing copy propagation between each pass enables further substitutions. + This study was done before expressions in REG_EQUAL notes were added as + candidate expressions for optimization, and before the GIMPLE optimizers + were added. Probably, multiple passes is even less efficient now than + at the time when the study was conducted. + PRE is quite expensive in complicated functions because the DFA can take - a while to converge. Hence we only perform one pass. The parameter - max-gcse-passes can be modified if one wants to experiment. + a while to converge. Hence we only perform one pass. ********************** @@ -661,11 +672,7 @@ static bool is_too_expensive (const char *); static int gcse_main (rtx f ATTRIBUTE_UNUSED) { - int changed, pass; - /* Bytes used at start of pass. */ - int initial_bytes_used; - /* Maximum number of bytes used by a pass. */ - int max_pass_bytes; + int changed; /* Point to release obstack data from for each pass. */ char *gcse_obstack_bottom; @@ -697,6 +704,7 @@ gcse_main (rtx f ATTRIBUTE_UNUSED) /* We need alias. */ init_alias_analysis (); + /* Record where pseudo-registers are set. This data is kept accurate during each pass. ??? We could also record hard-reg information here [since it's unchanging], however it is currently done during hash table @@ -704,99 +712,86 @@ gcse_main (rtx f ATTRIBUTE_UNUSED) It may be tempting to compute MEM set information here too, but MEM sets will be subject to code motion one day and thus we need to compute - information about memory sets when we build the hash tables. */ + information about memory sets when we build the hash tables. + + ??? Actually, we already know the information that compute_sets computes + because it is available from DF. FIXME. */ alloc_reg_set_mem (max_gcse_regno); compute_sets (); - pass = 0; - initial_bytes_used = bytes_used; - max_pass_bytes = 0; gcse_obstack_bottom = GOBNEWVAR (char, 1); - changed = 1; - while (changed && pass < MAX_GCSE_PASSES) - { - changed = 0; - if (dump_file) - fprintf (dump_file, "GCSE pass %d\n\n", pass + 1); - - /* Initialize bytes_used to the space for the pred/succ lists, - and the reg_set_table data. */ - bytes_used = initial_bytes_used; + changed = 0; + + if (dump_file) + fprintf (dump_file, "GCSE pass\n\n"); - /* Each pass may create new registers, so recalculate each time. */ - max_gcse_regno = max_reg_num (); + max_gcse_regno = max_reg_num (); - alloc_gcse_mem (); + alloc_gcse_mem (); - /* Don't allow constant propagation to modify jumps - during this pass. */ - if (dbg_cnt (cprop1)) - { - timevar_push (TV_CPROP1); - changed = one_cprop_pass (pass + 1, false, false); - timevar_pop (TV_CPROP1); - } + /* Don't allow constant propagation to modify jumps + during this pass. */ + if (dbg_cnt (cprop1)) + { + timevar_push (TV_CPROP1); + changed = one_cprop_pass (1, false, false); + timevar_pop (TV_CPROP1); + } - if (optimize_function_for_speed_p (cfun)) + if (optimize_function_for_speed_p (cfun)) + { + timevar_push (TV_PRE); + changed |= one_pre_gcse_pass (1); + /* We may have just created new basic blocks. Release and + recompute various things which are sized on the number of + basic blocks. + ??? There would be no need for this if we used a block + based Lazy Code Motion variant, with all (or selected) + edges split before running the pass. That would also + help find_implicit_sets for cprop. FIXME. */ + if (changed) { - timevar_push (TV_PRE); - changed |= one_pre_gcse_pass (pass + 1); - /* We may have just created new basic blocks. Release and - recompute various things which are sized on the number of - basic blocks. */ - if (changed) - { - free_modify_mem_tables (); - modify_mem_list = GCNEWVEC (rtx, last_basic_block); - canon_modify_mem_list = GCNEWVEC (rtx, last_basic_block); - } - free_reg_set_mem (); - alloc_reg_set_mem (max_reg_num ()); - compute_sets (); - run_jump_opt_after_gcse = 1; - timevar_pop (TV_PRE); + free_modify_mem_tables (); + modify_mem_list = GCNEWVEC (rtx, last_basic_block); + canon_modify_mem_list = GCNEWVEC (rtx, last_basic_block); } - if (max_pass_bytes < bytes_used) - max_pass_bytes = bytes_used; - - /* Free up memory, then reallocate for code hoisting. We can - not re-use the existing allocated memory because the tables - will not have info for the insns or registers created by - partial redundancy elimination. */ - free_gcse_mem (); - - /* It does not make sense to run code hoisting unless we are optimizing + /* ??? When we allocate this at the start of the function, + the comment says that "this data is kept accurate during + each pass". Apparently this is not so? FIXME. */ + free_reg_set_mem (); + alloc_reg_set_mem (max_reg_num ()); + compute_sets (); + run_jump_opt_after_gcse = 1; + timevar_pop (TV_PRE); + } + else + { + /* This function is being optimized for code size. + It does not make sense to run code hoisting unless we are optimizing for code size -- it rarely makes programs faster, and can make them bigger if we did partial redundancy elimination (when optimizing for space, we don't run the partial redundancy algorithms). */ - if (optimize_function_for_size_p (cfun)) - { - timevar_push (TV_HOIST); - max_gcse_regno = max_reg_num (); - alloc_gcse_mem (); - changed |= one_code_hoisting_pass (); - free_gcse_mem (); - - if (max_pass_bytes < bytes_used) - max_pass_bytes = bytes_used; - timevar_pop (TV_HOIST); - } + timevar_push (TV_HOIST); + max_gcse_regno = max_reg_num (); + alloc_gcse_mem (); + one_code_hoisting_pass (); + timevar_pop (TV_HOIST); + } - if (dump_file) - { - fprintf (dump_file, "\n"); - fflush (dump_file); - } + free_gcse_mem (); - obstack_free (&gcse_obstack, gcse_obstack_bottom); - pass++; + if (dump_file) + { + fprintf (dump_file, "\n"); + fflush (dump_file); } - /* Do one last pass of copy propagation, including cprop into - conditional jumps. */ + obstack_free (&gcse_obstack, gcse_obstack_bottom); + /* Do the second const/copy propagation pass, including cprop into + conditional jumps. */ if (dbg_cnt (cprop2)) { max_gcse_regno = max_reg_num (); @@ -804,7 +799,7 @@ gcse_main (rtx f ATTRIBUTE_UNUSED) /* This time, go ahead and allow cprop to alter jumps. */ timevar_push (TV_CPROP2); - one_cprop_pass (pass + 1, true, true); + one_cprop_pass (2, true, true); timevar_pop (TV_CPROP2); free_gcse_mem (); } @@ -813,16 +808,17 @@ gcse_main (rtx f ATTRIBUTE_UNUSED) { fprintf (dump_file, "GCSE of %s: %d basic blocks, ", current_function_name (), n_basic_blocks); - fprintf (dump_file, "%d pass%s, %d bytes\n\n", - pass, pass > 1 ? "es" : "", max_pass_bytes); + fprintf (dump_file, "pass 1, %d bytes\n\n", bytes_used); } obstack_free (&gcse_obstack, NULL); free_reg_set_mem (); - /* We are finished with alias. */ + /* We are finished with alias. + ??? Actually we recompute alias in store_motion. */ end_alias_analysis (); + /* Run store motion. */ if (optimize_function_for_speed_p (cfun) && flag_gcse_sm) { timevar_push (TV_LSM); @@ -6530,7 +6526,7 @@ bypass_jumps (void) max_gcse_regno = max_reg_num (); alloc_gcse_mem (); - changed = one_cprop_pass (MAX_GCSE_PASSES + 2, true, true); + changed = one_cprop_pass (3, true, true); free_gcse_mem (); if (dump_file) diff --git a/gcc/params.def b/gcc/params.def index 652bbb2..684de5e 100644 --- a/gcc/params.def +++ b/gcc/params.def @@ -223,11 +223,7 @@ DEFPARAM(PARAM_MAX_GCSE_MEMORY, "max-gcse-memory", "The maximum amount of memory to be allocated by GCSE", 50 * 1024 * 1024, 0, 0) -/* The number of repetitions of copy/const prop and PRE to run. */ -DEFPARAM(PARAM_MAX_GCSE_PASSES, - "max-gcse-passes", - "The maximum number of passes to make when doing GCSE", - 1, 1, 0) + /* This is the threshold ratio when to perform partial redundancy elimination after reload. We perform partial redundancy elimination when the following holds: diff --git a/gcc/params.h b/gcc/params.h index a607fb0..e9e6834 100644 --- a/gcc/params.h +++ b/gcc/params.h @@ -124,8 +124,6 @@ typedef enum compiler_param PARAM_VALUE (PARAM_MAX_PENDING_LIST_LENGTH) #define MAX_GCSE_MEMORY \ ((size_t) PARAM_VALUE (PARAM_MAX_GCSE_MEMORY)) -#define MAX_GCSE_PASSES \ - PARAM_VALUE (PARAM_MAX_GCSE_PASSES) #define GCSE_AFTER_RELOAD_PARTIAL_FRACTION \ PARAM_VALUE (PARAM_GCSE_AFTER_RELOAD_PARTIAL_FRACTION) #define GCSE_AFTER_RELOAD_CRITICAL_FRACTION \ |