aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gcc/ChangeLog9
-rw-r--r--gcc/doc/invoke.texi3
-rw-r--r--gcc/gcse.c178
-rw-r--r--gcc/params.def6
-rw-r--r--gcc/params.h2
5 files changed, 97 insertions, 101 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 04b6507..8b1a41a 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,12 @@
+2009-04-12 Steven Bosscher <steven@gcc.gnu.org>
+
+ * doc/invoke.texi (max_gcse_passes): Remove documentation.
+ * params.def (PARAM_MAX_GCSE_PASSES): Remove.
+ * params.h (MAX_GCSE_PASSES): Remove.
+ * gcse.c (gcse_main): Run CPROP1, PRE or HOIST, and CPROP2
+ in sequence. Remove ability to run multiple passes.
+ (bypass_jumps): Report run as third CPROP pass.
+
2009-04-12 Adam Nemet <anemet@caviumnetworks.com>
PR middle-end/39651
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index a09ce75..c5f8dfb 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -7330,9 +7330,6 @@ order to perform the global common subexpression elimination
optimization. If more memory than specified is required, the
optimization will not be done.
-@item max-gcse-passes
-The maximum number of passes of GCSE to run. The default is 1.
-
@item max-pending-list-length
The maximum number of pending dependencies scheduling will allow
before flushing the current state and starting over. Large functions
diff --git a/gcc/gcse.c b/gcc/gcse.c
index cbb8916..00f0986 100644
--- a/gcc/gcse.c
+++ b/gcc/gcse.c
@@ -190,16 +190,18 @@ along with GCC; see the file COPYING3. If not see
We perform the following steps:
- 1) Compute basic block information.
+ 1) Compute table of places where registers are set.
- 2) Compute table of places where registers are set.
+ 2) Perform copy/constant propagation.
- 3) Perform copy/constant propagation.
-
- 4) Perform global cse using lazy code motion if not optimizing
+ 3) Perform global cse using lazy code motion if not optimizing
for size, or code hoisting if we are.
- 5) Perform another pass of copy/constant propagation.
+ 4) Perform another pass of copy/constant propagation. Try to bypass
+ conditional jumps if the condition can be computed from a value of
+ an incoming edge.
+
+ 5) Perform store motion.
Two passes of copy/constant propagation are done because the first one
enables more GCSE and the second one helps to clean up the copies that
@@ -212,6 +214,11 @@ along with GCC; see the file COPYING3. If not see
(set (pseudo-reg) (expression)).
Function want_to_gcse_p says what these are.
+ In addition, expressions in REG_EQUAL notes are candidates for GXSE-ing.
+ This allows PRE to hoist expressions that are expressed in multiple insns,
+ such as comprex address calculations (e.g. for PIC code, or loads with a
+ high part and as lowe part).
+
PRE handles moving invariant expressions out of loops (by treating them as
partially redundant).
@@ -235,9 +242,13 @@ along with GCC; see the file COPYING3. If not see
It was found doing copy propagation between each pass enables further
substitutions.
+ This study was done before expressions in REG_EQUAL notes were added as
+ candidate expressions for optimization, and before the GIMPLE optimizers
+ were added. Probably, multiple passes is even less efficient now than
+ at the time when the study was conducted.
+
PRE is quite expensive in complicated functions because the DFA can take
- a while to converge. Hence we only perform one pass. The parameter
- max-gcse-passes can be modified if one wants to experiment.
+ a while to converge. Hence we only perform one pass.
**********************
@@ -661,11 +672,7 @@ static bool is_too_expensive (const char *);
static int
gcse_main (rtx f ATTRIBUTE_UNUSED)
{
- int changed, pass;
- /* Bytes used at start of pass. */
- int initial_bytes_used;
- /* Maximum number of bytes used by a pass. */
- int max_pass_bytes;
+ int changed;
/* Point to release obstack data from for each pass. */
char *gcse_obstack_bottom;
@@ -697,6 +704,7 @@ gcse_main (rtx f ATTRIBUTE_UNUSED)
/* We need alias. */
init_alias_analysis ();
+
/* Record where pseudo-registers are set. This data is kept accurate
during each pass. ??? We could also record hard-reg information here
[since it's unchanging], however it is currently done during hash table
@@ -704,99 +712,86 @@ gcse_main (rtx f ATTRIBUTE_UNUSED)
It may be tempting to compute MEM set information here too, but MEM sets
will be subject to code motion one day and thus we need to compute
- information about memory sets when we build the hash tables. */
+ information about memory sets when we build the hash tables.
+
+ ??? Actually, we already know the information that compute_sets computes
+ because it is available from DF. FIXME. */
alloc_reg_set_mem (max_gcse_regno);
compute_sets ();
- pass = 0;
- initial_bytes_used = bytes_used;
- max_pass_bytes = 0;
gcse_obstack_bottom = GOBNEWVAR (char, 1);
- changed = 1;
- while (changed && pass < MAX_GCSE_PASSES)
- {
- changed = 0;
- if (dump_file)
- fprintf (dump_file, "GCSE pass %d\n\n", pass + 1);
-
- /* Initialize bytes_used to the space for the pred/succ lists,
- and the reg_set_table data. */
- bytes_used = initial_bytes_used;
+ changed = 0;
+
+ if (dump_file)
+ fprintf (dump_file, "GCSE pass\n\n");
- /* Each pass may create new registers, so recalculate each time. */
- max_gcse_regno = max_reg_num ();
+ max_gcse_regno = max_reg_num ();
- alloc_gcse_mem ();
+ alloc_gcse_mem ();
- /* Don't allow constant propagation to modify jumps
- during this pass. */
- if (dbg_cnt (cprop1))
- {
- timevar_push (TV_CPROP1);
- changed = one_cprop_pass (pass + 1, false, false);
- timevar_pop (TV_CPROP1);
- }
+ /* Don't allow constant propagation to modify jumps
+ during this pass. */
+ if (dbg_cnt (cprop1))
+ {
+ timevar_push (TV_CPROP1);
+ changed = one_cprop_pass (1, false, false);
+ timevar_pop (TV_CPROP1);
+ }
- if (optimize_function_for_speed_p (cfun))
+ if (optimize_function_for_speed_p (cfun))
+ {
+ timevar_push (TV_PRE);
+ changed |= one_pre_gcse_pass (1);
+ /* We may have just created new basic blocks. Release and
+ recompute various things which are sized on the number of
+ basic blocks.
+ ??? There would be no need for this if we used a block
+ based Lazy Code Motion variant, with all (or selected)
+ edges split before running the pass. That would also
+ help find_implicit_sets for cprop. FIXME. */
+ if (changed)
{
- timevar_push (TV_PRE);
- changed |= one_pre_gcse_pass (pass + 1);
- /* We may have just created new basic blocks. Release and
- recompute various things which are sized on the number of
- basic blocks. */
- if (changed)
- {
- free_modify_mem_tables ();
- modify_mem_list = GCNEWVEC (rtx, last_basic_block);
- canon_modify_mem_list = GCNEWVEC (rtx, last_basic_block);
- }
- free_reg_set_mem ();
- alloc_reg_set_mem (max_reg_num ());
- compute_sets ();
- run_jump_opt_after_gcse = 1;
- timevar_pop (TV_PRE);
+ free_modify_mem_tables ();
+ modify_mem_list = GCNEWVEC (rtx, last_basic_block);
+ canon_modify_mem_list = GCNEWVEC (rtx, last_basic_block);
}
- if (max_pass_bytes < bytes_used)
- max_pass_bytes = bytes_used;
-
- /* Free up memory, then reallocate for code hoisting. We can
- not re-use the existing allocated memory because the tables
- will not have info for the insns or registers created by
- partial redundancy elimination. */
- free_gcse_mem ();
-
- /* It does not make sense to run code hoisting unless we are optimizing
+ /* ??? When we allocate this at the start of the function,
+ the comment says that "this data is kept accurate during
+ each pass". Apparently this is not so? FIXME. */
+ free_reg_set_mem ();
+ alloc_reg_set_mem (max_reg_num ());
+ compute_sets ();
+ run_jump_opt_after_gcse = 1;
+ timevar_pop (TV_PRE);
+ }
+ else
+ {
+ /* This function is being optimized for code size.
+ It does not make sense to run code hoisting unless we are optimizing
for code size -- it rarely makes programs faster, and can make
them bigger if we did partial redundancy elimination (when optimizing
for space, we don't run the partial redundancy algorithms). */
- if (optimize_function_for_size_p (cfun))
- {
- timevar_push (TV_HOIST);
- max_gcse_regno = max_reg_num ();
- alloc_gcse_mem ();
- changed |= one_code_hoisting_pass ();
- free_gcse_mem ();
-
- if (max_pass_bytes < bytes_used)
- max_pass_bytes = bytes_used;
- timevar_pop (TV_HOIST);
- }
+ timevar_push (TV_HOIST);
+ max_gcse_regno = max_reg_num ();
+ alloc_gcse_mem ();
+ one_code_hoisting_pass ();
+ timevar_pop (TV_HOIST);
+ }
- if (dump_file)
- {
- fprintf (dump_file, "\n");
- fflush (dump_file);
- }
+ free_gcse_mem ();
- obstack_free (&gcse_obstack, gcse_obstack_bottom);
- pass++;
+ if (dump_file)
+ {
+ fprintf (dump_file, "\n");
+ fflush (dump_file);
}
- /* Do one last pass of copy propagation, including cprop into
- conditional jumps. */
+ obstack_free (&gcse_obstack, gcse_obstack_bottom);
+ /* Do the second const/copy propagation pass, including cprop into
+ conditional jumps. */
if (dbg_cnt (cprop2))
{
max_gcse_regno = max_reg_num ();
@@ -804,7 +799,7 @@ gcse_main (rtx f ATTRIBUTE_UNUSED)
/* This time, go ahead and allow cprop to alter jumps. */
timevar_push (TV_CPROP2);
- one_cprop_pass (pass + 1, true, true);
+ one_cprop_pass (2, true, true);
timevar_pop (TV_CPROP2);
free_gcse_mem ();
}
@@ -813,16 +808,17 @@ gcse_main (rtx f ATTRIBUTE_UNUSED)
{
fprintf (dump_file, "GCSE of %s: %d basic blocks, ",
current_function_name (), n_basic_blocks);
- fprintf (dump_file, "%d pass%s, %d bytes\n\n",
- pass, pass > 1 ? "es" : "", max_pass_bytes);
+ fprintf (dump_file, "pass 1, %d bytes\n\n", bytes_used);
}
obstack_free (&gcse_obstack, NULL);
free_reg_set_mem ();
- /* We are finished with alias. */
+ /* We are finished with alias.
+ ??? Actually we recompute alias in store_motion. */
end_alias_analysis ();
+ /* Run store motion. */
if (optimize_function_for_speed_p (cfun) && flag_gcse_sm)
{
timevar_push (TV_LSM);
@@ -6530,7 +6526,7 @@ bypass_jumps (void)
max_gcse_regno = max_reg_num ();
alloc_gcse_mem ();
- changed = one_cprop_pass (MAX_GCSE_PASSES + 2, true, true);
+ changed = one_cprop_pass (3, true, true);
free_gcse_mem ();
if (dump_file)
diff --git a/gcc/params.def b/gcc/params.def
index 652bbb2..684de5e 100644
--- a/gcc/params.def
+++ b/gcc/params.def
@@ -223,11 +223,7 @@ DEFPARAM(PARAM_MAX_GCSE_MEMORY,
"max-gcse-memory",
"The maximum amount of memory to be allocated by GCSE",
50 * 1024 * 1024, 0, 0)
-/* The number of repetitions of copy/const prop and PRE to run. */
-DEFPARAM(PARAM_MAX_GCSE_PASSES,
- "max-gcse-passes",
- "The maximum number of passes to make when doing GCSE",
- 1, 1, 0)
+
/* This is the threshold ratio when to perform partial redundancy
elimination after reload. We perform partial redundancy elimination
when the following holds:
diff --git a/gcc/params.h b/gcc/params.h
index a607fb0..e9e6834 100644
--- a/gcc/params.h
+++ b/gcc/params.h
@@ -124,8 +124,6 @@ typedef enum compiler_param
PARAM_VALUE (PARAM_MAX_PENDING_LIST_LENGTH)
#define MAX_GCSE_MEMORY \
((size_t) PARAM_VALUE (PARAM_MAX_GCSE_MEMORY))
-#define MAX_GCSE_PASSES \
- PARAM_VALUE (PARAM_MAX_GCSE_PASSES)
#define GCSE_AFTER_RELOAD_PARTIAL_FRACTION \
PARAM_VALUE (PARAM_GCSE_AFTER_RELOAD_PARTIAL_FRACTION)
#define GCSE_AFTER_RELOAD_CRITICAL_FRACTION \