diff options
author | Mostafa Hagog <mustafa@il.ibm.com> | 2003-10-17 16:16:45 +0000 |
---|---|---|
committer | Roger Sayle <sayle@gcc.gnu.org> | 2003-10-17 16:16:45 +0000 |
commit | f5f2e3cd9bed8f3c943d0558ad8b1a4c41efcc86 (patch) | |
tree | a29aa6dd07abdea8e834684eb42d8d5349352a34 | |
parent | 9579624e7258fc1c5cf8e79a63bbe950deae7c17 (diff) | |
download | gcc-f5f2e3cd9bed8f3c943d0558ad8b1a4c41efcc86.zip gcc-f5f2e3cd9bed8f3c943d0558ad8b1a4c41efcc86.tar.gz gcc-f5f2e3cd9bed8f3c943d0558ad8b1a4c41efcc86.tar.bz2 |
common.opt: Add description of the new -fgcse-las flag.
2003-10-17 Mostafa Hagog <mustafa@il.ibm.com>
* common.opt: Add description of the new -fgcse-las flag.
* flags.h (flag_gcse_las): Declaration of global flag_gcse_las.
* gcse.c (hash_scan_set): Handle the case of store expression and
insert the memory expression to the hash table, this way we make it
possible to discover redundant loads after stores and remove them.
(pre_insert_copy_insn): moved the call to update_ld_motion_stores,
to pre_insert_copies, it is not the correct place to call it after
adding stores to be in the available expression hash table.
(pre_insert_copies): Added the call to update_ld_motion_stores when
one or more copies were inserted.
* opts.c (common_handle_option): Handle the -fgcse-las flag.
* toplev.c (flag_gcse_las): Initialization of flag_gcse_las.
* doc/invoke.tex: Document new -fgcse-las flag.
From-SVN: r72612
-rw-r--r-- | gcc/ChangeLog | 17 | ||||
-rw-r--r-- | gcc/common.opt | 4 | ||||
-rw-r--r-- | gcc/doc/invoke.texi | 25 | ||||
-rw-r--r-- | gcc/flags.h | 5 | ||||
-rw-r--r-- | gcc/gcse.c | 100 | ||||
-rw-r--r-- | gcc/opts.c | 4 | ||||
-rw-r--r-- | gcc/toplev.c | 6 |
7 files changed, 139 insertions, 22 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 5558cc5..6cb9af4 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,20 @@ +2003-10-17 Mostafa Hagog <mustafa@il.ibm.com> + + * common.opt: Add description of the new -fgcse-las flag. + * flags.h (flag_gcse_las): Declaration of global flag_gcse_las. + * gcse.c (hash_scan_set): Handle the case of store expression and + insert the memory expression to the hash table, this way we make it + possible to discover redundant loads after stores and remove them. + (pre_insert_copy_insn): moved the call to update_ld_motion_stores, + to pre_insert_copies, it is not the correct place to call it after + adding stores to be in the available expression hash table. + (pre_insert_copies): Added the call to update_ld_motion_stores when + one or more copies were inserted. + * opts.c (common_handle_option): Handle the -fgcse-las flag. + * toplev.c (flag_gcse_las): Initialization of flag_gcse_las. + + * doc/invoke.tex: Document new -fgcse-las flag. + 2003-10-18 Alan Modra <amodra@bigpond.net.au> * config/rs6000/crtsavres.asm: Correct alignment of powerpc64 code diff --git a/gcc/common.opt b/gcc/common.opt index 26af280..fdf28b2 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -362,6 +362,10 @@ fgcse-sm Common Perform store motion after global common subexpression elimination +fgcse-las +Common +Perform redundant load after store elimination in global common subexpression elimination + fgnu-linker Common Output GNU ld formatted global initializers diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 474c9b7..fd20400 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -270,8 +270,8 @@ in the following sections. -fdelayed-branch -fdelete-null-pointer-checks @gol -fexpensive-optimizations -ffast-math -ffloat-store @gol -fforce-addr -fforce-mem -ffunction-sections @gol --fgcse -fgcse-lm -fgcse-sm -floop-optimize -fcrossjumping @gol --fif-conversion -fif-conversion2 @gol +-fgcse -fgcse-lm -fgcse-sm -fgcse-las -floop-optimize @gol +-fcrossjumping -fif-conversion -fif-conversion2 @gol -finline-functions -finline-limit=@var{n} -fkeep-inline-functions @gol -fkeep-static-consts -fmerge-constants -fmerge-all-constants @gol -fmove-all-movables -fnew-ra -fno-branch-count-reg @gol @@ -3677,10 +3677,10 @@ also turns on the following optimization flags: -fstrength-reduce @gol -fcse-follow-jumps -fcse-skip-blocks @gol -frerun-cse-after-loop -frerun-loop-opt @gol --fgcse -fgcse-lm -fgcse-sm @gol +-fgcse -fgcse-lm -fgcse-sm -fgcse-las @gol -fdelete-null-pointer-checks @gol -fexpensive-optimizations @gol --fregmove -@gol +-fregmove @gol -fschedule-insns -fschedule-insns2 @gol -fsched-interblock -fsched-spec @gol -fcaller-saves @gol @@ -3996,10 +3996,19 @@ Enabled by default when gcse is enabled. @item -fgcse-sm @opindex fgcse-sm -When @option{-fgcse-sm} is enabled, A store motion pass is run after global common -subexpression elimination. This pass will attempt to move stores out of loops. -When used in conjunction with @option{-fgcse-lm}, loops containing a load/store sequence -can be changed to a load before the loop and a store after the loop. +When @option{-fgcse-sm} is enabled, a store motion pass is run after +global common subexpression elimination. This pass will attempt to move +stores out of loops. When used in conjunction with @option{-fgcse-lm}, +loops containing a load/store sequence can be changed to a load before +the loop and a store after the loop. + +Enabled by default when gcse is enabled. + +@item -fgcse-las +@opindex fgcse-las +When @option{-fgcse-las} is enabled, the global common subexpression +elimination pass eliminates redundant loads that come after stores to the +same memory location (both partial and full redundacies). Enabled by default when gcse is enabled. diff --git a/gcc/flags.h b/gcc/flags.h index 93600fb..2a1a10f 100644 --- a/gcc/flags.h +++ b/gcc/flags.h @@ -675,6 +675,11 @@ extern int flag_gcse_lm; extern int flag_gcse_sm; +/* Nonzero if we want to perform redundant load-after-store elimination + in gcse. */ + +extern int flag_gcse_las; + /* Perform branch target register optimization before prologue / epilogue threading. */ @@ -2205,6 +2205,49 @@ hash_scan_set (rtx pat, rtx insn, struct hash_table *table) && oprs_available_p (pat, tmp)))) insert_set_in_table (pat, insn, table); } + /* In case of store we want to consider the memory value as avaiable in + the REG stored in that memory. This makes it possible to remove + redundant loads from due to stores to the same location. */ + else if (flag_gcse_las && GET_CODE (src) == REG && GET_CODE (dest) == MEM) + { + unsigned int regno = REGNO (src); + + /* Do not do this for constant/copy propagation. */ + if (! table->set_p + /* Only record sets of pseudo-regs in the hash table. */ + && regno >= FIRST_PSEUDO_REGISTER + /* Don't GCSE something if we can't do a reg/reg copy. */ + && can_copy_p (GET_MODE (src)) + /* GCSE commonly inserts instruction after the insn. We can't + do that easily for EH_REGION notes so disable GCSE on these + for now. */ + && ! find_reg_note (insn, REG_EH_REGION, NULL_RTX) + /* Is SET_DEST something we want to gcse? */ + && want_to_gcse_p (dest) + /* Don't CSE a nop. */ + && ! set_noop_p (pat) + /* Don't GCSE if it has attached REG_EQUIV note. + At this point this only function parameters should have + REG_EQUIV notes and if the argument slot is used somewhere + explicitly, it means address of parameter has been taken, + so we should not extend the lifetime of the pseudo. */ + && ((note = find_reg_note (insn, REG_EQUIV, NULL_RTX)) == 0 + || GET_CODE (XEXP (note, 0)) != MEM)) + { + /* Stores are never anticipatable. */ + int antic_p = 0; + /* An expression is not available if its operands are + subsequently modified, including this insn. It's also not + available if this is a branch, because we can't insert + a set after the branch. */ + int avail_p = oprs_available_p (dest, insn) + && ! JUMP_P (insn); + + /* Record the memory expression (DEST) in the hash table. */ + insert_expr_in_table (dest, GET_MODE (dest), insn, + antic_p, avail_p, table); + } + } } static void @@ -5360,7 +5403,13 @@ pre_edge_insert (struct edge_list *edge_list, struct expr **index_map) reaching_reg <- expr old_reg <- reaching_reg because this way copy propagation can discover additional PRE - opportunities. But if this fails, we try the old way. */ + opportunities. But if this fails, we try the old way. + When "expr" is a store, i.e. + given "MEM <- old_reg", instead of adding after it + reaching_reg <- old_reg + it's better to add it before as follows: + reaching_reg <- old_reg + MEM <- reaching_reg. */ static void pre_insert_copy_insn (struct expr *expr, rtx insn) @@ -5395,22 +5444,38 @@ pre_insert_copy_insn (struct expr *expr, rtx insn) else abort (); - old_reg = SET_DEST (set); - - /* Check if we can modify the set destination in the original insn. */ - if (validate_change (insn, &SET_DEST (set), reg, 0)) + if (GET_CODE (SET_DEST (set)) == REG) { - new_insn = gen_move_insn (old_reg, reg); - new_insn = emit_insn_after (new_insn, insn); + old_reg = SET_DEST (set); + /* Check if we can modify the set destination in the original insn. */ + if (validate_change (insn, &SET_DEST (set), reg, 0)) + { + new_insn = gen_move_insn (old_reg, reg); + new_insn = emit_insn_after (new_insn, insn); - /* Keep register set table up to date. */ - replace_one_set (REGNO (old_reg), insn, new_insn); - record_one_set (regno, insn); + /* Keep register set table up to date. */ + replace_one_set (REGNO (old_reg), insn, new_insn); + record_one_set (regno, insn); + } + else + { + new_insn = gen_move_insn (reg, old_reg); + new_insn = emit_insn_after (new_insn, insn); + + /* Keep register set table up to date. */ + record_one_set (regno, new_insn); + } } - else + else /* This is possible only in case of a store to memory. */ { + old_reg = SET_SRC (set); new_insn = gen_move_insn (reg, old_reg); - new_insn = emit_insn_after (new_insn, insn); + + /* Check if we can modify the set source in the original insn. */ + if (validate_change (insn, &SET_SRC (set), reg, 0)) + new_insn = emit_insn_before (new_insn, insn); + else + new_insn = emit_insn_after (new_insn, insn); /* Keep register set table up to date. */ record_one_set (regno, new_insn); @@ -5423,7 +5488,6 @@ pre_insert_copy_insn (struct expr *expr, rtx insn) "PRE: bb %d, insn %d, copy expression %d in insn %d to reg %d\n", BLOCK_NUM (insn), INSN_UID (new_insn), indx, INSN_UID (insn), regno); - update_ld_motion_stores (expr); } /* Copy available expressions that reach the redundant expression @@ -5432,7 +5496,7 @@ pre_insert_copy_insn (struct expr *expr, rtx insn) static void pre_insert_copies (void) { - unsigned int i; + unsigned int i, added_copy; struct expr *expr; struct occr *occr; struct occr *avail; @@ -5453,6 +5517,9 @@ pre_insert_copies (void) expression wasn't deleted anywhere. */ if (expr->reaching_reg == NULL) continue; + + /* Set when we add a copy for that expression. */ + added_copy = 0; for (occr = expr->antic_occr; occr != NULL; occr = occr->next) { @@ -5477,11 +5544,16 @@ pre_insert_copies (void) BLOCK_FOR_INSN (occr->insn))) continue; + added_copy = 1; + /* Copy the result of avail to reaching_reg. */ pre_insert_copy_insn (expr, insn); avail->copied_p = 1; } } + + if (added_copy) + update_ld_motion_stores (expr); } } @@ -1019,6 +1019,10 @@ common_handle_option (size_t scode, const char *arg, flag_gcse_sm = value; break; + case OPT_fgcse_las: + flag_gcse_las = value; + break; + case OPT_fgnu_linker: flag_gnu_linker = value; break; diff --git a/gcc/toplev.c b/gcc/toplev.c index e711135..c1a05f6 100644 --- a/gcc/toplev.c +++ b/gcc/toplev.c @@ -697,6 +697,11 @@ int flag_gcse_lm = 1; int flag_gcse_sm = 1; +/* Nonzero if we want to perfrom redundant load after store elimination + in gcse. */ + +int flag_gcse_las = 1; + /* Perform target register optimization before prologue / epilogue threading. */ @@ -1075,6 +1080,7 @@ static const lang_independent_options f_options[] = {"gcse", &flag_gcse, 1 }, {"gcse-lm", &flag_gcse_lm, 1 }, {"gcse-sm", &flag_gcse_sm, 1 }, + {"gcse-las", &flag_gcse_las, 1 }, {"branch-target-load-optimize", &flag_branch_target_load_optimize, 1 }, {"branch-target-load-optimize2", &flag_branch_target_load_optimize2, 1 }, {"loop-optimize", &flag_loop_optimize, 1 }, |