From 0516f6fe82641daf7c1ac8812998049ac591201e Mon Sep 17 00:00:00 2001 From: Steven Bosscher Date: Wed, 18 Aug 2004 20:53:59 +0000 Subject: Makefile.in (OBJS-common): Add postreload-gcse.c. * Makefile.in (OBJS-common): Add postreload-gcse.c. Add new postreload-gcse.o. * cse.c (SAFE_HASH): Define as wrapper around safe_hash. (lookup_as_function, insert, rehash_using_reg, use_related_value, equiv_constant): Use SAFE_HASH instead of safe_hash. (exp_equiv_p): Export. Add for_gcse argument when comparing for GCSE. (lookup, lookup_for_remove, merge_equiv_classes, find_best_addr, find_comparison_args, fold_rtx, cse_insn): Update callers. (hash_rtx): New function derived from old canon_hash and bits from gcse.c hash_expr_1. (canon_hash_string): Rename to hash_rtx_string. (canon_hash, safe_hash): Make static inline. Call hash_rtx. * cselib.c (hash_rtx): Rename to cselib_hash_rtx. (cselib_lookup): Update this caller. * gcse.c (modify_mem_list_set, canon_modify_mem_list_set): Make static. (hash_expr): Call hash_rtx. (ldst_entry): Likewise. (expr_equiv_p): Call exp_equiv_p. (struct unoccr, hash_expr_1, hash_string_1, lookup_expr, reg_used_on_edge, reg_set_between_after_reload_p, reg_used_between_after_reload_p, get_avail_load_store_reg, is_jump_table_basic_block, bb_has_well_behaved_predecessors, get_bb_avail_insn, hash_scan_set_after_reload, compute_hash_table_after_reload, eliminate_partially_redundant_loads, gcse_after_reload, get_bb_avail_insn, gcse_after_reload_main): Remove. * postreload-gcse.c: New file, reincarnating most of the above. * rtl.h (exp_equiv_p, hash_rtx): New prototypes. (gcse_after_reload_main): Update prototype. * timevar.def (TV_GCSE_AFTER_RELOAD): New timevar. * passes.c (rest_of_handle_gcse2): Use it. From-SVN: r86206 --- gcc/ChangeLog | 36 ++ gcc/Makefile.in | 15 +- gcc/cse.c | 404 ++++++++------- gcc/cselib.c | 16 +- gcc/gcse.c | 1028 +----------------------------------- gcc/passes.c | 6 +- gcc/postreload-gcse.c | 1379 +++++++++++++++++++++++++++++++++++++++++++++++++ gcc/rtl.h | 6 +- gcc/timevar.def | 1 + 9 files changed, 1668 insertions(+), 1223 deletions(-) create mode 100644 gcc/postreload-gcse.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index d67d216..58420cf 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,39 @@ +2004-08-18 Steven Bosscher + + * Makefile.in (OBJS-common): Add postreload-gcse.c. + Add new postreload-gcse.o. + * cse.c (SAFE_HASH): Define as wrapper around safe_hash. + (lookup_as_function, insert, rehash_using_reg, use_related_value, + equiv_constant): Use SAFE_HASH instead of safe_hash. + (exp_equiv_p): Export. Add for_gcse argument when comparing + for GCSE. + (lookup, lookup_for_remove, merge_equiv_classes, find_best_addr, + find_comparison_args, fold_rtx, cse_insn): Update callers. + (hash_rtx): New function derived from old canon_hash and bits + from gcse.c hash_expr_1. + (canon_hash_string): Rename to hash_rtx_string. + (canon_hash, safe_hash): Make static inline. Call hash_rtx. + * cselib.c (hash_rtx): Rename to cselib_hash_rtx. + (cselib_lookup): Update this caller. + * gcse.c (modify_mem_list_set, canon_modify_mem_list_set): + Make static. + (hash_expr): Call hash_rtx. + (ldst_entry): Likewise. + (expr_equiv_p): Call exp_equiv_p. + (struct unoccr, hash_expr_1, hash_string_1, lookup_expr, + reg_used_on_edge, reg_set_between_after_reload_p, + reg_used_between_after_reload_p, get_avail_load_store_reg, + is_jump_table_basic_block, bb_has_well_behaved_predecessors, + get_bb_avail_insn, hash_scan_set_after_reload, + compute_hash_table_after_reload, + eliminate_partially_redundant_loads, gcse_after_reload, + get_bb_avail_insn, gcse_after_reload_main): Remove. + * postreload-gcse.c: New file, reincarnating most of the above. + * rtl.h (exp_equiv_p, hash_rtx): New prototypes. + (gcse_after_reload_main): Update prototype. + * timevar.def (TV_GCSE_AFTER_RELOAD): New timevar. + * passes.c (rest_of_handle_gcse2): Use it. + 2004-08-18 Diego Novillo * tree-ssa-loop.c (pass_loop_init): Add TODO_dump_func. diff --git a/gcc/Makefile.in b/gcc/Makefile.in index 5bafc59..2a18bf2 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -899,15 +899,18 @@ OBJS-common = \ genrtl.o ggc-common.o global.o graph.o gtype-desc.o \ haifa-sched.o hooks.o ifcvt.o insn-attrtab.o insn-emit.o insn-modes.o \ insn-extract.o insn-opinit.o insn-output.o insn-peep.o insn-recog.o \ - insn-preds.o integrate.o intl.o jump.o langhooks.o lcm.o lists.o \ - local-alloc.o loop.o modulo-sched.o \ - optabs.o options.o opts.o params.o postreload.o predict.o \ + integrate.o intl.o jump.o langhooks.o lcm.o lists.o local-alloc.o \ + loop.o modulo-sched.o optabs.o options.o opts.o \ + params.o postreload.o postreload-gcse.o predict.o \ + insn-preds.o integrate.o intl.o jump.o langhooks.o lcm.o lists.o \ + local-alloc.o loop.o modulo-sched.o optabs.o options.o opts.o \ + params.o postreload.o postreload-gcse.o predict.o \ print-rtl.o print-tree.o value-prof.o var-tracking.o \ profile.o ra.o ra-build.o ra-colorize.o ra-debug.o ra-rewrite.o \ real.o recog.o reg-stack.o regclass.o regmove.o regrename.o \ reload.o reload1.o reorg.o resource.o rtl.o rtlanal.o rtl-error.o \ sbitmap.o sched-deps.o sched-ebb.o sched-rgn.o sched-vis.o sdbout.o \ - simplify-rtx.o sreal.o stmt.o stor-layout.o stringpool.o \ + simplify-rtx.o sreal.o stmt.o stor-layout.o stringpool.o \ targhooks.o timevar.o toplev.o tracer.o tree.o tree-dump.o unroll.o \ varasm.o varray.o vec.o version.o vmsdbgout.o xcoffout.o alloc-pool.o \ et-forest.o cfghooks.o bt-load.o pretty-print.o $(GGC) web.o passes.o \ @@ -2047,6 +2050,10 @@ postreload.o : postreload.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) $(EXPR_H) $(OPTABS_H) reload.h $(REGS_H) hard-reg-set.h insn-config.h \ $(BASIC_BLOCK_H) $(RECOG_H) output.h function.h toplev.h cselib.h $(TM_P_H) \ except.h $(TREE_H) +postreload-gcse.o : postreload-gcse.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ + $(RTL_H) $(REGS_H) hard-reg-set.h $(FLAGS_H) real.h insn-config.h $(GGC_H) \ + $(RECOG_H) $(EXPR_H) $(BASIC_BLOCK_H) function.h output.h toplev.h $(TM_P_H) \ + except.h $(TREE_H) caller-save.o : caller-save.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \ $(FLAGS_H) $(REGS_H) hard-reg-set.h insn-config.h $(BASIC_BLOCK_H) function.h \ $(RECOG_H) reload.h $(EXPR_H) toplev.h $(TM_P_H) diff --git a/gcc/cse.c b/gcc/cse.c index 93acbe5..47708e2 100644 --- a/gcc/cse.c +++ b/gcc/cse.c @@ -489,6 +489,12 @@ struct table_elt ? (((unsigned) REG << 7) + (unsigned) REG_QTY (REGNO (X))) \ : canon_hash (X, M)) & HASH_MASK) +/* Like HASH, but without side-effects. */ +#define SAFE_HASH(X, M) \ + ((REG_P (X) && REGNO (X) >= FIRST_PSEUDO_REGISTER \ + ? (((unsigned) REG << 7) + (unsigned) REG_QTY (REGNO (X))) \ + : safe_hash (X, M)) & HASH_MASK) + /* Determine whether register number N is considered a fixed register for the purpose of approximating register costs. It is desirable to replace other regs with fixed regs, to reduce need for @@ -625,10 +631,11 @@ static void rehash_using_reg (rtx); static void invalidate_memory (void); static void invalidate_for_call (void); static rtx use_related_value (rtx, struct table_elt *); -static unsigned canon_hash (rtx, enum machine_mode); -static unsigned canon_hash_string (const char *); -static unsigned safe_hash (rtx, enum machine_mode); -static int exp_equiv_p (rtx, rtx, int, int); + +static inline unsigned canon_hash (rtx, enum machine_mode); +static inline unsigned safe_hash (rtx, enum machine_mode); +static unsigned hash_rtx_string (const char *); + static rtx canon_reg (rtx, rtx); static void find_best_addr (rtx, rtx *, enum machine_mode); static enum rtx_code find_comparison_args (enum rtx_code, rtx *, rtx *, @@ -1324,7 +1331,7 @@ lookup (rtx x, unsigned int hash, enum machine_mode mode) for (p = table[hash]; p; p = p->next_same_hash) if (mode == p->mode && ((x == p->exp && REG_P (x)) - || exp_equiv_p (x, p->exp, !REG_P (x), 0))) + || exp_equiv_p (x, p->exp, !REG_P (x), false))) return p; return 0; @@ -1352,7 +1359,8 @@ lookup_for_remove (rtx x, unsigned int hash, enum machine_mode mode) else { for (p = table[hash]; p; p = p->next_same_hash) - if (mode == p->mode && (x == p->exp || exp_equiv_p (x, p->exp, 0, 0))) + if (mode == p->mode + && (x == p->exp || exp_equiv_p (x, p->exp, 0, false))) return p; } @@ -1366,7 +1374,7 @@ static rtx lookup_as_function (rtx x, enum rtx_code code) { struct table_elt *p - = lookup (x, safe_hash (x, VOIDmode) & HASH_MASK, GET_MODE (x)); + = lookup (x, SAFE_HASH (x, VOIDmode), GET_MODE (x)); /* If we are looking for a CONST_INT, the mode doesn't really matter, as long as we are narrowing. So if we looked in vain for a mode narrower @@ -1376,7 +1384,7 @@ lookup_as_function (rtx x, enum rtx_code code) { x = copy_rtx (x); PUT_MODE (x, word_mode); - p = lookup (x, safe_hash (x, VOIDmode) & HASH_MASK, word_mode); + p = lookup (x, SAFE_HASH (x, VOIDmode), word_mode); } if (p == 0) @@ -1385,7 +1393,7 @@ lookup_as_function (rtx x, enum rtx_code code) for (p = p->first_same_value; p; p = p->next_same_value) if (GET_CODE (p->exp) == code /* Make sure this is a valid entry in the table. */ - && exp_equiv_p (p->exp, p->exp, 1, 0)) + && exp_equiv_p (p->exp, p->exp, 1, false)) return p->exp; return 0; @@ -1568,7 +1576,7 @@ insert (rtx x, struct table_elt *classp, unsigned int hash, enum machine_mode mo if (subexp != 0) { /* Get the integer-free subexpression in the hash table. */ - subhash = safe_hash (subexp, mode) & HASH_MASK; + subhash = SAFE_HASH (subexp, mode); subelt = lookup (subexp, subhash, mode); if (subelt == 0) subelt = insert (subexp, NULL, subhash, mode); @@ -1622,7 +1630,7 @@ merge_equiv_classes (struct table_elt *class1, struct table_elt *class2) /* Remove old entry, make a new one in CLASS1's class. Don't do this for invalid entries as we cannot find their hash code (it also isn't necessary). */ - if (REG_P (exp) || exp_equiv_p (exp, exp, 1, 0)) + if (REG_P (exp) || exp_equiv_p (exp, exp, 1, false)) { bool need_rehash = false; @@ -1917,8 +1925,8 @@ rehash_using_reg (rtx x) { next = p->next_same_hash; if (reg_mentioned_p (x, p->exp) - && exp_equiv_p (p->exp, p->exp, 1, 0) - && i != (hash = safe_hash (p->exp, p->mode) & HASH_MASK)) + && exp_equiv_p (p->exp, p->exp, 1, false) + && i != (hash = SAFE_HASH (p->exp, p->mode))) { if (p->next_same_hash) p->next_same_hash->prev_same_hash = p->prev_same_hash; @@ -2017,7 +2025,7 @@ use_related_value (rtx x, struct table_elt *elt) rtx subexp = get_related_value (x); if (subexp != 0) relt = lookup (subexp, - safe_hash (subexp, GET_MODE (subexp)) & HASH_MASK, + SAFE_HASH (subexp, GET_MODE (subexp)), GET_MODE (subexp)); } @@ -2068,7 +2076,7 @@ use_related_value (rtx x, struct table_elt *elt) /* Hash a string. Just add its bytes up. */ static inline unsigned -canon_hash_string (const char *ps) +hash_rtx_string (const char *ps) { unsigned hash = 0; const unsigned char *p = (const unsigned char *) ps; @@ -2085,23 +2093,26 @@ canon_hash_string (const char *ps) MODE is used in hashing for CONST_INTs only; otherwise the mode of X is used. - Store 1 in do_not_record if any subexpression is volatile. + Store 1 in DO_NOT_RECORD_P if any subexpression is volatile. - Store 1 in hash_arg_in_memory if X contains a MEM rtx - which does not have the MEM_READONLY_P bit set. + If HASH_ARG_IN_MEMORY_P is not NULL, store 1 in it if X contains + a MEM rtx which does not have the RTX_UNCHANGING_P bit set. Note that cse_insn knows that the hash code of a MEM expression is just (int) MEM plus the hash code of the address. */ -static unsigned -canon_hash (rtx x, enum machine_mode mode) +unsigned +hash_rtx (rtx x, enum machine_mode mode, int *do_not_record_p, + int *hash_arg_in_memory_p, bool have_reg_qty) { int i, j; unsigned hash = 0; enum rtx_code code; const char *fmt; - /* repeat is used to turn tail-recursion into iteration. */ + /* Used to turn recursion into iteration. We can't rely on GCC's + tail-recursion elimination since we need to keep accumulating values + in HASH. */ repeat: if (x == 0) return hash; @@ -2112,48 +2123,52 @@ canon_hash (rtx x, enum machine_mode mode) case REG: { unsigned int regno = REGNO (x); - bool record; - /* On some machines, we can't record any non-fixed hard register, - because extending its life will cause reload problems. We - consider ap, fp, sp, gp to be fixed for this purpose. - - We also consider CCmode registers to be fixed for this purpose; - failure to do so leads to failure to simplify 0<100 type of - conditionals. - - On all machines, we can't record any global registers. - Nor should we record any register that is in a small - class, as defined by CLASS_LIKELY_SPILLED_P. */ - - if (regno >= FIRST_PSEUDO_REGISTER) - record = true; - else if (x == frame_pointer_rtx - || x == hard_frame_pointer_rtx - || x == arg_pointer_rtx - || x == stack_pointer_rtx - || x == pic_offset_table_rtx) - record = true; - else if (global_regs[regno]) - record = false; - else if (fixed_regs[regno]) - record = true; - else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC) - record = true; - else if (SMALL_REGISTER_CLASSES) - record = false; - else if (CLASS_LIKELY_SPILLED_P (REGNO_REG_CLASS (regno))) - record = false; - else - record = true; - - if (!record) + if (!reload_completed) { - do_not_record = 1; - return 0; + /* On some machines, we can't record any non-fixed hard register, + because extending its life will cause reload problems. We + consider ap, fp, sp, gp to be fixed for this purpose. + + We also consider CCmode registers to be fixed for this purpose; + failure to do so leads to failure to simplify 0<100 type of + conditionals. + + On all machines, we can't record any global registers. + Nor should we record any register that is in a small + class, as defined by CLASS_LIKELY_SPILLED_P. */ + bool record; + + if (regno >= FIRST_PSEUDO_REGISTER) + record = true; + else if (x == frame_pointer_rtx + || x == hard_frame_pointer_rtx + || x == arg_pointer_rtx + || x == stack_pointer_rtx + || x == pic_offset_table_rtx) + record = true; + else if (global_regs[regno]) + record = false; + else if (fixed_regs[regno]) + record = true; + else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC) + record = true; + else if (SMALL_REGISTER_CLASSES) + record = false; + else if (CLASS_LIKELY_SPILLED_P (REGNO_REG_CLASS (regno))) + record = false; + else + record = true; + + if (!record) + { + *do_not_record_p = 1; + return 0; + } } - hash += ((unsigned) REG << 7) + (unsigned) REG_QTY (regno); + hash += ((unsigned int) REG << 7); + hash += (have_reg_qty ? (unsigned) REG_QTY (regno) : regno); return hash; } @@ -2164,7 +2179,7 @@ canon_hash (rtx x, enum machine_mode mode) { if (REG_P (SUBREG_REG (x))) { - hash += (((unsigned) SUBREG << 7) + hash += (((unsigned int) SUBREG << 7) + REGNO (SUBREG_REG (x)) + (SUBREG_BYTE (x) / UNITS_PER_WORD)); return hash; @@ -2173,21 +2188,19 @@ canon_hash (rtx x, enum machine_mode mode) } case CONST_INT: - { - unsigned HOST_WIDE_INT tem = INTVAL (x); - hash += ((unsigned) CONST_INT << 7) + (unsigned) mode + tem; - return hash; - } + hash += (((unsigned int) CONST_INT << 7) + (unsigned int) mode + + (unsigned int) INTVAL (x)); + return hash; case CONST_DOUBLE: /* This is like the general case, except that it only counts the integers representing the constant. */ - hash += (unsigned) code + (unsigned) GET_MODE (x); + hash += (unsigned int) code + (unsigned int) GET_MODE (x); if (GET_MODE (x) != VOIDmode) hash += real_hash (CONST_DOUBLE_REAL_VALUE (x)); else - hash += ((unsigned) CONST_DOUBLE_LOW (x) - + (unsigned) CONST_DOUBLE_HIGH (x)); + hash += ((unsigned int) CONST_DOUBLE_LOW (x) + + (unsigned int) CONST_DOUBLE_HIGH (x)); return hash; case CONST_VECTOR: @@ -2200,7 +2213,8 @@ canon_hash (rtx x, enum machine_mode mode) for (i = 0; i < units; ++i) { elt = CONST_VECTOR_ELT (x, i); - hash += canon_hash (elt, GET_MODE (elt)); + hash += hash_rtx (elt, GET_MODE (elt), do_not_record_p, + hash_arg_in_memory_p, have_reg_qty); } return hash; @@ -2208,23 +2222,39 @@ canon_hash (rtx x, enum machine_mode mode) /* Assume there is only one rtx object for any given label. */ case LABEL_REF: - hash += ((unsigned) LABEL_REF << 7) + (unsigned long) XEXP (x, 0); + /* We don't hash on the address of the CODE_LABEL to avoid bootstrap + differences and differences between each stage's debugging dumps. */ + hash += (((unsigned int) LABEL_REF << 7) + + CODE_LABEL_NUMBER (XEXP (x, 0))); return hash; case SYMBOL_REF: - hash += ((unsigned) SYMBOL_REF << 7) + (unsigned long) XSTR (x, 0); - return hash; + { + /* Don't hash on the symbol's address to avoid bootstrap differences. + Different hash values may cause expressions to be recorded in + different orders and thus different registers to be used in the + final assembler. This also avoids differences in the dump files + between various stages. */ + unsigned int h = 0; + const unsigned char *p = (const unsigned char *) XSTR (x, 0); + + while (*p) + h += (h << 7) + *p++; /* ??? revisit */ + + hash += ((unsigned int) SYMBOL_REF << 7) + h; + return hash; + } case MEM: /* We don't record if marked volatile or if BLKmode since we don't know the size of the move. */ if (MEM_VOLATILE_P (x) || GET_MODE (x) == BLKmode) { - do_not_record = 1; + *do_not_record_p = 1; return 0; } - if (!MEM_READONLY_P (x)) - hash_arg_in_memory = 1; + if (hash_arg_in_memory_p && !MEM_READONLY_P (x)) + *hash_arg_in_memory_p = 1; /* Now that we have already found this special case, might as well speed it up as much as possible. */ @@ -2236,15 +2266,16 @@ canon_hash (rtx x, enum machine_mode mode) /* A USE that mentions non-volatile memory needs special handling since the MEM may be BLKmode which normally prevents an entry from being made. Pure calls are - marked by a USE which mentions BLKmode memory. */ + marked by a USE which mentions BLKmode memory. + See calls.c:emit_call_1. */ if (MEM_P (XEXP (x, 0)) && ! MEM_VOLATILE_P (XEXP (x, 0))) { hash += (unsigned) USE; x = XEXP (x, 0); - if (!MEM_READONLY_P (x)) - hash_arg_in_memory = 1; + if (hash_arg_in_memory_p && !MEM_READONLY_P (x)) + *hash_arg_in_memory_p = 1; /* Now that we have already found this special case, might as well speed it up as much as possible. */ @@ -2264,34 +2295,36 @@ canon_hash (rtx x, enum machine_mode mode) case CC0: case CALL: case UNSPEC_VOLATILE: - do_not_record = 1; + *do_not_record_p = 1; return 0; case ASM_OPERANDS: if (MEM_VOLATILE_P (x)) { - do_not_record = 1; + *do_not_record_p = 1; return 0; } else { /* We don't want to take the filename and line into account. */ hash += (unsigned) code + (unsigned) GET_MODE (x) - + canon_hash_string (ASM_OPERANDS_TEMPLATE (x)) - + canon_hash_string (ASM_OPERANDS_OUTPUT_CONSTRAINT (x)) + + hash_rtx_string (ASM_OPERANDS_TEMPLATE (x)) + + hash_rtx_string (ASM_OPERANDS_OUTPUT_CONSTRAINT (x)) + (unsigned) ASM_OPERANDS_OUTPUT_IDX (x); if (ASM_OPERANDS_INPUT_LENGTH (x)) { for (i = 1; i < ASM_OPERANDS_INPUT_LENGTH (x); i++) { - hash += (canon_hash (ASM_OPERANDS_INPUT (x, i), - GET_MODE (ASM_OPERANDS_INPUT (x, i))) - + canon_hash_string (ASM_OPERANDS_INPUT_CONSTRAINT - (x, i))); + hash += (hash_rtx (ASM_OPERANDS_INPUT (x, i), + GET_MODE (ASM_OPERANDS_INPUT (x, i)), + do_not_record_p, hash_arg_in_memory_p, + have_reg_qty) + + hash_rtx_string + (ASM_OPERANDS_INPUT_CONSTRAINT (x, i))); } - hash += canon_hash_string (ASM_OPERANDS_INPUT_CONSTRAINT (x, 0)); + hash += hash_rtx_string (ASM_OPERANDS_INPUT_CONSTRAINT (x, 0)); x = ASM_OPERANDS_INPUT (x, 0); mode = GET_MODE (x); goto repeat; @@ -2312,48 +2345,59 @@ canon_hash (rtx x, enum machine_mode mode) { if (fmt[i] == 'e') { - rtx tem = XEXP (x, i); - /* If we are about to do the last recursive call needed at this level, change it into iteration. This function is called enough to be worth it. */ if (i == 0) { - x = tem; + x = XEXP (x, i); goto repeat; } - hash += canon_hash (tem, 0); + + hash += hash_rtx (XEXP (x, i), 0, do_not_record_p, + hash_arg_in_memory_p, have_reg_qty); } + else if (fmt[i] == 'E') for (j = 0; j < XVECLEN (x, i); j++) - hash += canon_hash (XVECEXP (x, i, j), 0); + { + hash += hash_rtx (XVECEXP (x, i, j), 0, do_not_record_p, + hash_arg_in_memory_p, have_reg_qty); + } + else if (fmt[i] == 's') - hash += canon_hash_string (XSTR (x, i)); + hash += hash_rtx_string (XSTR (x, i)); else if (fmt[i] == 'i') - { - unsigned tem = XINT (x, i); - hash += tem; - } + hash += (unsigned int) XINT (x, i); else if (fmt[i] == '0' || fmt[i] == 't') /* Unused. */ ; else abort (); } + return hash; } -/* Like canon_hash but with no side effects. */ +/* Hash an rtx X for cse via hash_rtx. + Stores 1 in do_not_record if any subexpression is volatile. + Stores 1 in hash_arg_in_memory if X contains a mem rtx which + does not have the RTX_UNCHANGING_P bit set. */ + +static inline unsigned +canon_hash (rtx x, enum machine_mode mode) +{ + return hash_rtx (x, mode, &do_not_record, &hash_arg_in_memory, true); +} + +/* Like canon_hash but with no side effects, i.e. do_not_record + and hash_arg_in_memory are not changed. */ -static unsigned +static inline unsigned safe_hash (rtx x, enum machine_mode mode) { - int save_do_not_record = do_not_record; - int save_hash_arg_in_memory = hash_arg_in_memory; - unsigned hash = canon_hash (x, mode); - hash_arg_in_memory = save_hash_arg_in_memory; - do_not_record = save_do_not_record; - return hash; + int dummy_do_not_record; + return hash_rtx (x, mode, &dummy_do_not_record, NULL, true); } /* Return 1 iff X and Y would canonicalize into the same thing, @@ -2363,16 +2407,10 @@ safe_hash (rtx x, enum machine_mode mode) and Y was found in the hash table. We check register refs in Y for being marked as valid. - If EQUAL_VALUES is nonzero, we allow a register to match a constant value - that is known to be in the register. Ordinarily, we don't allow them - to match, because letting them match would cause unpredictable results - in all the places that search a hash table chain for an equivalent - for a given value. A possible equivalent that has different structure - has its hash code computed from different data. Whether the hash code - is the same as that of the given value is pure luck. */ + If FOR_GCSE is true, we compare X and Y for equivalence for GCSE. */ -static int -exp_equiv_p (rtx x, rtx y, int validate, int equal_values) +int +exp_equiv_p (rtx x, rtx y, int validate, bool for_gcse) { int i, j; enum rtx_code code; @@ -2382,42 +2420,13 @@ exp_equiv_p (rtx x, rtx y, int validate, int equal_values) if VALIDATE is nonzero. */ if (x == y && !validate) return 1; + if (x == 0 || y == 0) return x == y; code = GET_CODE (x); if (code != GET_CODE (y)) - { - if (!equal_values) - return 0; - - /* If X is a constant and Y is a register or vice versa, they may be - equivalent. We only have to validate if Y is a register. */ - if (CONSTANT_P (x) && REG_P (y) - && REGNO_QTY_VALID_P (REGNO (y))) - { - int y_q = REG_QTY (REGNO (y)); - struct qty_table_elem *y_ent = &qty_table[y_q]; - - if (GET_MODE (y) == y_ent->mode - && rtx_equal_p (x, y_ent->const_rtx) - && (! validate || REG_IN_TABLE (REGNO (y)) == REG_TICK (REGNO (y)))) - return 1; - } - - if (CONSTANT_P (y) && code == REG - && REGNO_QTY_VALID_P (REGNO (x))) - { - int x_q = REG_QTY (REGNO (x)); - struct qty_table_elem *x_ent = &qty_table[x_q]; - - if (GET_MODE (x) == x_ent->mode - && rtx_equal_p (y, x_ent->const_rtx)) - return 1; - } - - return 0; - } + return 0; /* (MULT:SI x y) and (MULT:HI x y) are NOT equivalent. */ if (GET_MODE (x) != GET_MODE (y)) @@ -2437,29 +2446,48 @@ exp_equiv_p (rtx x, rtx y, int validate, int equal_values) return XSTR (x, 0) == XSTR (y, 0); case REG: - { - unsigned int regno = REGNO (y); - unsigned int endregno - = regno + (regno >= FIRST_PSEUDO_REGISTER ? 1 - : hard_regno_nregs[regno][GET_MODE (y)]); - unsigned int i; + if (for_gcse) + return REGNO (x) == REGNO (y); + else + { + unsigned int regno = REGNO (y); + unsigned int i; + unsigned int endregno + = regno + (regno >= FIRST_PSEUDO_REGISTER ? 1 + : hard_regno_nregs[regno][GET_MODE (y)]); - /* If the quantities are not the same, the expressions are not - equivalent. If there are and we are not to validate, they - are equivalent. Otherwise, ensure all regs are up-to-date. */ + /* If the quantities are not the same, the expressions are not + equivalent. If there are and we are not to validate, they + are equivalent. Otherwise, ensure all regs are up-to-date. */ - if (REG_QTY (REGNO (x)) != REG_QTY (regno)) - return 0; + if (REG_QTY (REGNO (x)) != REG_QTY (regno)) + return 0; + + if (! validate) + return 1; + + for (i = regno; i < endregno; i++) + if (REG_IN_TABLE (i) != REG_TICK (i)) + return 0; - if (! validate) return 1; + } - for (i = regno; i < endregno; i++) - if (REG_IN_TABLE (i) != REG_TICK (i)) + case MEM: + if (for_gcse) + { + /* Can't merge two expressions in different alias sets, since we + can decide that the expression is transparent in a block when + it isn't, due to it being set with the different alias set. */ + if (MEM_ALIAS_SET (x) != MEM_ALIAS_SET (y)) return 0; - return 1; - } + /* A volatile mem should not be considered equivalent to any + other. */ + if (MEM_VOLATILE_P (x) || MEM_VOLATILE_P (y)) + return 0; + } + break; /* For commutative operations, check both orders. */ case PLUS: @@ -2469,13 +2497,14 @@ exp_equiv_p (rtx x, rtx y, int validate, int equal_values) case XOR: case NE: case EQ: - return ((exp_equiv_p (XEXP (x, 0), XEXP (y, 0), validate, equal_values) + return ((exp_equiv_p (XEXP (x, 0), XEXP (y, 0), + validate, for_gcse) && exp_equiv_p (XEXP (x, 1), XEXP (y, 1), - validate, equal_values)) + validate, for_gcse)) || (exp_equiv_p (XEXP (x, 0), XEXP (y, 1), - validate, equal_values) + validate, for_gcse) && exp_equiv_p (XEXP (x, 1), XEXP (y, 0), - validate, equal_values))); + validate, for_gcse))); case ASM_OPERANDS: /* We don't use the generic code below because we want to @@ -2498,7 +2527,7 @@ exp_equiv_p (rtx x, rtx y, int validate, int equal_values) for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; i--) if (! exp_equiv_p (ASM_OPERANDS_INPUT (x, i), ASM_OPERANDS_INPUT (y, i), - validate, equal_values) + validate, for_gcse) || strcmp (ASM_OPERANDS_INPUT_CONSTRAINT (x, i), ASM_OPERANDS_INPUT_CONSTRAINT (y, i))) return 0; @@ -2511,7 +2540,7 @@ exp_equiv_p (rtx x, rtx y, int validate, int equal_values) } /* Compare the elements. If any pair of corresponding elements - fail to match, return 0 for the whole things. */ + fail to match, return 0 for the whole thing. */ fmt = GET_RTX_FORMAT (code); for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--) @@ -2519,7 +2548,8 @@ exp_equiv_p (rtx x, rtx y, int validate, int equal_values) switch (fmt[i]) { case 'e': - if (! exp_equiv_p (XEXP (x, i), XEXP (y, i), validate, equal_values)) + if (! exp_equiv_p (XEXP (x, i), XEXP (y, i), + validate, for_gcse)) return 0; break; @@ -2528,7 +2558,7 @@ exp_equiv_p (rtx x, rtx y, int validate, int equal_values) return 0; for (j = 0; j < XVECLEN (x, i); j++) if (! exp_equiv_p (XVECEXP (x, i, j), XVECEXP (y, i, j), - validate, equal_values)) + validate, for_gcse)) return 0; break; @@ -2827,7 +2857,7 @@ find_best_addr (rtx insn, rtx *loc, enum machine_mode mode) if (! p->flag) { if ((REG_P (p->exp) - || exp_equiv_p (p->exp, p->exp, 1, 0)) + || exp_equiv_p (p->exp, p->exp, 1, false)) && ((exp_cost = address_cost (p->exp, mode)) < best_addr_cost || (exp_cost == best_addr_cost && ((p->cost + 1) >> 1) > best_rtx_cost))) @@ -2903,7 +2933,7 @@ find_best_addr (rtx insn, rtx *loc, enum machine_mode mode) p = p->next_same_value, count++) if (! p->flag && (REG_P (p->exp) - || exp_equiv_p (p->exp, p->exp, 1, 0))) + || exp_equiv_p (p->exp, p->exp, 1, false))) { rtx new = simplify_gen_binary (GET_CODE (*loc), Pmode, p->exp, op1); @@ -3012,8 +3042,7 @@ find_comparison_args (enum rtx_code code, rtx *parg1, rtx *parg2, if (x == 0) /* Look up ARG1 in the hash table and see if it has an equivalence that lets us see what is being compared. */ - p = lookup (arg1, safe_hash (arg1, GET_MODE (arg1)) & HASH_MASK, - GET_MODE (arg1)); + p = lookup (arg1, SAFE_HASH (arg1, GET_MODE (arg1)), GET_MODE (arg1)); if (p) { p = p->first_same_value; @@ -3038,7 +3067,7 @@ find_comparison_args (enum rtx_code code, rtx *parg1, rtx *parg2, #endif /* If the entry isn't valid, skip it. */ - if (! exp_equiv_p (p->exp, p->exp, 1, 0)) + if (! exp_equiv_p (p->exp, p->exp, 1, false)) continue; if (GET_CODE (p->exp) == COMPARE @@ -3235,7 +3264,7 @@ fold_rtx (rtx x, rtx insn) if (GET_CODE (elt->exp) == SUBREG && GET_MODE (SUBREG_REG (elt->exp)) == mode - && exp_equiv_p (elt->exp, elt->exp, 1, 0)) + && exp_equiv_p (elt->exp, elt->exp, 1, false)) return copy_rtx (SUBREG_REG (elt->exp)); } @@ -3264,8 +3293,6 @@ fold_rtx (rtx x, rtx insn) { struct table_elt *elt; - /* We can use HASH here since we know that canon_hash won't be - called. */ elt = lookup (folded_arg0, HASH (folded_arg0, GET_MODE (folded_arg0)), GET_MODE (folded_arg0)); @@ -3370,7 +3397,7 @@ fold_rtx (rtx x, rtx insn) && GET_MODE (SUBREG_REG (elt->exp)) == mode && (GET_MODE_SIZE (GET_MODE (folded_arg0)) <= UNITS_PER_WORD) - && exp_equiv_p (elt->exp, elt->exp, 1, 0)) + && exp_equiv_p (elt->exp, elt->exp, 1, false)) new = copy_rtx (SUBREG_REG (elt->exp)); if (new) @@ -3829,11 +3856,11 @@ fold_rtx (rtx x, rtx insn) && (REG_QTY (REGNO (folded_arg0)) == REG_QTY (REGNO (folded_arg1)))) || ((p0 = lookup (folded_arg0, - (safe_hash (folded_arg0, mode_arg0) - & HASH_MASK), mode_arg0)) + SAFE_HASH (folded_arg0, mode_arg0), + mode_arg0)) && (p1 = lookup (folded_arg1, - (safe_hash (folded_arg1, mode_arg0) - & HASH_MASK), mode_arg0)) + SAFE_HASH (folded_arg1, mode_arg0), + mode_arg0)) && p0->first_same_value == p1->first_same_value)) { /* Sadly two equal NaNs are not equivalent. */ @@ -4007,8 +4034,7 @@ fold_rtx (rtx x, rtx insn) { rtx new_const = GEN_INT (-INTVAL (const_arg1)); struct table_elt *p - = lookup (new_const, safe_hash (new_const, mode) & HASH_MASK, - mode); + = lookup (new_const, SAFE_HASH (new_const, mode), mode); if (p) for (p = p->first_same_value; p; p = p->next_same_value) @@ -4195,7 +4221,7 @@ equiv_constant (rtx x) if (CONSTANT_P (x)) return x; - elt = lookup (x, safe_hash (x, GET_MODE (x)) & HASH_MASK, GET_MODE (x)); + elt = lookup (x, SAFE_HASH (x, GET_MODE (x)), GET_MODE (x)); if (elt == 0) return 0; @@ -5182,7 +5208,7 @@ cse_insn (rtx insn, rtx libcall_insn) /* If the expression is not valid, ignore it. Then we do not have to check for validity below. In most cases, we can use `rtx_equal_p', since canonicalization has already been done. */ - if (code != REG && ! exp_equiv_p (p->exp, p->exp, 1, 0)) + if (code != REG && ! exp_equiv_p (p->exp, p->exp, 1, false)) continue; /* Also skip paradoxical subregs, unless that's what we're @@ -5279,7 +5305,7 @@ cse_insn (rtx insn, rtx libcall_insn) /* Skip invalid entries. */ while (elt && !REG_P (elt->exp) - && ! exp_equiv_p (elt->exp, elt->exp, 1, 0)) + && ! exp_equiv_p (elt->exp, elt->exp, 1, false)) elt = elt->next_same_value; /* A paradoxical subreg would be bad here: it'll be the right @@ -6006,7 +6032,7 @@ cse_insn (rtx insn, rtx libcall_insn) /* Ignore invalid entries. */ if (!REG_P (elt->exp) - && ! exp_equiv_p (elt->exp, elt->exp, 1, 0)) + && ! exp_equiv_p (elt->exp, elt->exp, 1, false)) continue; /* We may have already been playing subreg games. If the @@ -6059,7 +6085,7 @@ cse_insn (rtx insn, rtx libcall_insn) /* Ignore invalid entries. */ while (classp && !REG_P (classp->exp) - && ! exp_equiv_p (classp->exp, classp->exp, 1, 0)) + && ! exp_equiv_p (classp->exp, classp->exp, 1, false)) classp = classp->next_same_value; } } diff --git a/gcc/cselib.c b/gcc/cselib.c index 6fd4317..bf50dca 100644 --- a/gcc/cselib.c +++ b/gcc/cselib.c @@ -55,7 +55,7 @@ static int discard_useless_locs (void **, void *); static int discard_useless_values (void **, void *); static void remove_useless_values (void); static rtx wrap_constant (enum machine_mode, rtx); -static unsigned int hash_rtx (rtx, enum machine_mode, int); +static unsigned int cselib_hash_rtx (rtx, enum machine_mode, int); static cselib_val *new_cselib_val (unsigned int, enum machine_mode); static void add_mem_for_addr (cselib_val *, cselib_val *, rtx); static cselib_val *cselib_lookup_mem (rtx, int); @@ -257,8 +257,8 @@ entry_and_rtx_equal_p (const void *entry, const void *x_arg) } /* The hash function for our hash table. The value is always computed with - hash_rtx when adding an element; this function just extracts the hash - value from a cselib_val structure. */ + cselib_hash_rtx when adding an element; this function just extracts the + hash value from a cselib_val structure. */ static hashval_t get_value_hash (const void *entry) @@ -554,7 +554,7 @@ wrap_constant (enum machine_mode mode, rtx x) otherwise the mode of X is used. */ static unsigned int -hash_rtx (rtx x, enum machine_mode mode, int create) +cselib_hash_rtx (rtx x, enum machine_mode mode, int create) { cselib_val *e; int i, j; @@ -600,7 +600,7 @@ hash_rtx (rtx x, enum machine_mode mode, int create) for (i = 0; i < units; ++i) { elt = CONST_VECTOR_ELT (x, i); - hash += hash_rtx (elt, GET_MODE (elt), 0); + hash += cselib_hash_rtx (elt, GET_MODE (elt), 0); } return hash; @@ -646,7 +646,7 @@ hash_rtx (rtx x, enum machine_mode mode, int create) if (fmt[i] == 'e') { rtx tem = XEXP (x, i); - unsigned int tem_hash = hash_rtx (tem, 0, create); + unsigned int tem_hash = cselib_hash_rtx (tem, 0, create); if (tem_hash == 0) return 0; @@ -656,7 +656,7 @@ hash_rtx (rtx x, enum machine_mode mode, int create) else if (fmt[i] == 'E') for (j = 0; j < XVECLEN (x, i); j++) { - unsigned int tem_hash = hash_rtx (XVECEXP (x, i, j), 0, create); + unsigned int tem_hash = cselib_hash_rtx (XVECEXP (x, i, j), 0, create); if (tem_hash == 0) return 0; @@ -926,7 +926,7 @@ cselib_lookup (rtx x, enum machine_mode mode, int create) if (MEM_P (x)) return cselib_lookup_mem (x, create); - hashval = hash_rtx (x, mode, create); + hashval = cselib_hash_rtx (x, mode, create); /* Can't even create if hashing is not possible. */ if (! hashval) return 0; diff --git a/gcc/gcse.c b/gcc/gcse.c index b9a7874..16d76fe 100644 --- a/gcc/gcse.c +++ b/gcc/gcse.c @@ -495,11 +495,12 @@ static sbitmap *reg_set_in_block; /* Array, indexed by basic block number for a list of insns which modify memory within that block. */ static rtx * modify_mem_list; -bitmap modify_mem_list_set; +static bitmap modify_mem_list_set; /* This array parallels modify_mem_list, but is kept canonicalized. */ static rtx * canon_modify_mem_list; -bitmap canon_modify_mem_list_set; +static bitmap canon_modify_mem_list_set; + /* Various variables for statistics gathering. */ /* Memory used in a pass. @@ -564,8 +565,6 @@ static void insert_expr_in_table (rtx, enum machine_mode, rtx, int, int, struct hash_table *); static void insert_set_in_table (rtx, rtx, struct hash_table *); static unsigned int hash_expr (rtx, enum machine_mode, int *, int); -static unsigned int hash_expr_1 (rtx, enum machine_mode, int *); -static unsigned int hash_string_1 (const char *); static unsigned int hash_set (int, int); static int expr_equiv_p (rtx, rtx); static void record_last_reg_set_info (rtx, int); @@ -576,7 +575,6 @@ static void alloc_hash_table (int, struct hash_table *, int); static void free_hash_table (struct hash_table *); static void compute_hash_table_work (struct hash_table *); static void dump_hash_table (FILE *, const char *, struct hash_table *); -static struct expr *lookup_expr (rtx, struct hash_table *); static struct expr *lookup_set (unsigned int, struct hash_table *); static struct expr *next_set (unsigned int, struct expr *); static void reset_opr_set_tables (void); @@ -1462,9 +1460,7 @@ oprs_available_p (rtx x, rtx insn) MODE is only used if X is a CONST_INT. DO_NOT_RECORD_P is a boolean indicating if a volatile operand is found or if the expression contains something we don't want to insert in the table. HASH_TABLE_SIZE is - the current size of the hash table to be probed. - - ??? One might want to merge this with canon_hash. Later. */ + the current size of the hash table to be probed. */ static unsigned int hash_expr (rtx x, enum machine_mode mode, int *do_not_record_p, @@ -1474,208 +1470,11 @@ hash_expr (rtx x, enum machine_mode mode, int *do_not_record_p, *do_not_record_p = 0; - hash = hash_expr_1 (x, mode, do_not_record_p); + hash = hash_rtx (x, mode, do_not_record_p, + NULL, /*have_reg_qty=*/false); return hash % hash_table_size; } -/* Hash a string. Just add its bytes up. */ - -static inline unsigned -hash_string_1 (const char *ps) -{ - unsigned hash = 0; - const unsigned char *p = (const unsigned char *) ps; - - if (p) - while (*p) - hash += *p++; - - return hash; -} - -/* Subroutine of hash_expr to do the actual work. */ - -static unsigned int -hash_expr_1 (rtx x, enum machine_mode mode, int *do_not_record_p) -{ - int i, j; - unsigned hash = 0; - enum rtx_code code; - const char *fmt; - - if (x == 0) - return hash; - - /* Used to turn recursion into iteration. We can't rely on GCC's - tail-recursion elimination since we need to keep accumulating values - in HASH. */ - repeat: - - code = GET_CODE (x); - switch (code) - { - case REG: - hash += ((unsigned int) REG << 7) + REGNO (x); - return hash; - - case CONST_INT: - hash += (((unsigned int) CONST_INT << 7) + (unsigned int) mode - + (unsigned int) INTVAL (x)); - return hash; - - case CONST_DOUBLE: - /* This is like the general case, except that it only counts - the integers representing the constant. */ - hash += (unsigned int) code + (unsigned int) GET_MODE (x); - if (GET_MODE (x) != VOIDmode) - for (i = 2; i < GET_RTX_LENGTH (CONST_DOUBLE); i++) - hash += (unsigned int) XWINT (x, i); - else - hash += ((unsigned int) CONST_DOUBLE_LOW (x) - + (unsigned int) CONST_DOUBLE_HIGH (x)); - return hash; - - case CONST_VECTOR: - { - int units; - rtx elt; - - units = CONST_VECTOR_NUNITS (x); - - for (i = 0; i < units; ++i) - { - elt = CONST_VECTOR_ELT (x, i); - hash += hash_expr_1 (elt, GET_MODE (elt), do_not_record_p); - } - - return hash; - } - - /* Assume there is only one rtx object for any given label. */ - case LABEL_REF: - /* We don't hash on the address of the CODE_LABEL to avoid bootstrap - differences and differences between each stage's debugging dumps. */ - hash += (((unsigned int) LABEL_REF << 7) - + CODE_LABEL_NUMBER (XEXP (x, 0))); - return hash; - - case SYMBOL_REF: - { - /* Don't hash on the symbol's address to avoid bootstrap differences. - Different hash values may cause expressions to be recorded in - different orders and thus different registers to be used in the - final assembler. This also avoids differences in the dump files - between various stages. */ - unsigned int h = 0; - const unsigned char *p = (const unsigned char *) XSTR (x, 0); - - while (*p) - h += (h << 7) + *p++; /* ??? revisit */ - - hash += ((unsigned int) SYMBOL_REF << 7) + h; - return hash; - } - - case MEM: - if (MEM_VOLATILE_P (x)) - { - *do_not_record_p = 1; - return 0; - } - - hash += (unsigned int) MEM; - /* We used alias set for hashing, but this is not good, since the alias - set may differ in -fprofile-arcs and -fbranch-probabilities compilation - causing the profiles to fail to match. */ - x = XEXP (x, 0); - goto repeat; - - case PRE_DEC: - case PRE_INC: - case POST_DEC: - case POST_INC: - case PC: - case CC0: - case CALL: - case UNSPEC_VOLATILE: - *do_not_record_p = 1; - return 0; - - case ASM_OPERANDS: - if (MEM_VOLATILE_P (x)) - { - *do_not_record_p = 1; - return 0; - } - else - { - /* We don't want to take the filename and line into account. */ - hash += (unsigned) code + (unsigned) GET_MODE (x) - + hash_string_1 (ASM_OPERANDS_TEMPLATE (x)) - + hash_string_1 (ASM_OPERANDS_OUTPUT_CONSTRAINT (x)) - + (unsigned) ASM_OPERANDS_OUTPUT_IDX (x); - - if (ASM_OPERANDS_INPUT_LENGTH (x)) - { - for (i = 1; i < ASM_OPERANDS_INPUT_LENGTH (x); i++) - { - hash += (hash_expr_1 (ASM_OPERANDS_INPUT (x, i), - GET_MODE (ASM_OPERANDS_INPUT (x, i)), - do_not_record_p) - + hash_string_1 (ASM_OPERANDS_INPUT_CONSTRAINT - (x, i))); - } - - hash += hash_string_1 (ASM_OPERANDS_INPUT_CONSTRAINT (x, 0)); - x = ASM_OPERANDS_INPUT (x, 0); - mode = GET_MODE (x); - goto repeat; - } - return hash; - } - - default: - break; - } - - hash += (unsigned) code + (unsigned) GET_MODE (x); - for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--) - { - if (fmt[i] == 'e') - { - /* If we are about to do the last recursive call - needed at this level, change it into iteration. - This function is called enough to be worth it. */ - if (i == 0) - { - x = XEXP (x, i); - goto repeat; - } - - hash += hash_expr_1 (XEXP (x, i), 0, do_not_record_p); - if (*do_not_record_p) - return 0; - } - - else if (fmt[i] == 'E') - for (j = 0; j < XVECLEN (x, i); j++) - { - hash += hash_expr_1 (XVECEXP (x, i, j), 0, do_not_record_p); - if (*do_not_record_p) - return 0; - } - - else if (fmt[i] == 's') - hash += hash_string_1 (XSTR (x, i)); - else if (fmt[i] == 'i') - hash += (unsigned int) XINT (x, i); - else - abort (); - } - - return hash; -} - /* Hash a set of register REGNO. Sets are hashed on the register that is set. This simplifies the PRE copy @@ -1692,148 +1491,12 @@ hash_set (int regno, int hash_table_size) return hash % hash_table_size; } -/* Return nonzero if exp1 is equivalent to exp2. - ??? Borrowed from cse.c. Might want to remerge with cse.c. Later. */ +/* Return nonzero if exp1 is equivalent to exp2. */ static int expr_equiv_p (rtx x, rtx y) { - int i, j; - enum rtx_code code; - const char *fmt; - - if (x == y) - return 1; - - if (x == 0 || y == 0) - return 0; - - code = GET_CODE (x); - if (code != GET_CODE (y)) - return 0; - - /* (MULT:SI x y) and (MULT:HI x y) are NOT equivalent. */ - if (GET_MODE (x) != GET_MODE (y)) - return 0; - - switch (code) - { - case PC: - case CC0: - case CONST_INT: - return 0; - - case LABEL_REF: - return XEXP (x, 0) == XEXP (y, 0); - - case SYMBOL_REF: - return XSTR (x, 0) == XSTR (y, 0); - - case REG: - return REGNO (x) == REGNO (y); - - case MEM: - /* Can't merge two expressions in different alias sets, since we can - decide that the expression is transparent in a block when it isn't, - due to it being set with the different alias set. */ - if (MEM_ALIAS_SET (x) != MEM_ALIAS_SET (y)) - return 0; - - /* A volatile mem should not be considered equivalent to any other. */ - if (MEM_VOLATILE_P (x) || MEM_VOLATILE_P (y)) - return 0; - break; - - /* For commutative operations, check both orders. */ - case PLUS: - case MULT: - case AND: - case IOR: - case XOR: - case NE: - case EQ: - return ((expr_equiv_p (XEXP (x, 0), XEXP (y, 0)) - && expr_equiv_p (XEXP (x, 1), XEXP (y, 1))) - || (expr_equiv_p (XEXP (x, 0), XEXP (y, 1)) - && expr_equiv_p (XEXP (x, 1), XEXP (y, 0)))); - - case ASM_OPERANDS: - /* We don't use the generic code below because we want to - disregard filename and line numbers. */ - - /* A volatile asm isn't equivalent to any other. */ - if (MEM_VOLATILE_P (x) || MEM_VOLATILE_P (y)) - return 0; - - if (GET_MODE (x) != GET_MODE (y) - || strcmp (ASM_OPERANDS_TEMPLATE (x), ASM_OPERANDS_TEMPLATE (y)) - || strcmp (ASM_OPERANDS_OUTPUT_CONSTRAINT (x), - ASM_OPERANDS_OUTPUT_CONSTRAINT (y)) - || ASM_OPERANDS_OUTPUT_IDX (x) != ASM_OPERANDS_OUTPUT_IDX (y) - || ASM_OPERANDS_INPUT_LENGTH (x) != ASM_OPERANDS_INPUT_LENGTH (y)) - return 0; - - if (ASM_OPERANDS_INPUT_LENGTH (x)) - { - for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; i--) - if (! expr_equiv_p (ASM_OPERANDS_INPUT (x, i), - ASM_OPERANDS_INPUT (y, i)) - || strcmp (ASM_OPERANDS_INPUT_CONSTRAINT (x, i), - ASM_OPERANDS_INPUT_CONSTRAINT (y, i))) - return 0; - } - - return 1; - - default: - break; - } - - /* Compare the elements. If any pair of corresponding elements - fail to match, return 0 for the whole thing. */ - - fmt = GET_RTX_FORMAT (code); - for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--) - { - switch (fmt[i]) - { - case 'e': - if (! expr_equiv_p (XEXP (x, i), XEXP (y, i))) - return 0; - break; - - case 'E': - if (XVECLEN (x, i) != XVECLEN (y, i)) - return 0; - for (j = 0; j < XVECLEN (x, i); j++) - if (! expr_equiv_p (XVECEXP (x, i, j), XVECEXP (y, i, j))) - return 0; - break; - - case 's': - if (strcmp (XSTR (x, i), XSTR (y, i))) - return 0; - break; - - case 'i': - if (XINT (x, i) != XINT (y, i)) - return 0; - break; - - case 'w': - if (XWINT (x, i) != XWINT (y, i)) - return 0; - break; - - case '0': - break; - - default: - abort (); - } - } - - return 1; + return exp_equiv_p (x, y, 0, true); } /* Insert expression X in INSN in the hash TABLE. @@ -2556,28 +2219,6 @@ compute_hash_table (struct hash_table *table) /* Expression tracking support. */ -/* Lookup pattern PAT in the expression TABLE. - The result is a pointer to the table entry, or NULL if not found. */ - -static struct expr * -lookup_expr (rtx pat, struct hash_table *table) -{ - int do_not_record_p; - unsigned int hash = hash_expr (pat, GET_MODE (pat), &do_not_record_p, - table->size); - struct expr *expr; - - if (do_not_record_p) - return NULL; - - expr = table->table[hash]; - - while (expr && ! expr_equiv_p (expr->expr, pat)) - expr = expr->next_same_hash; - - return expr; -} - /* Lookup REGNO in the set TABLE. The result is a pointer to the table entry, or NULL if not found. */ @@ -5426,7 +5067,8 @@ ldst_entry (rtx x) struct ls_expr * ptr; unsigned int hash; - hash = hash_expr_1 (x, GET_MODE (x), & do_not_record_p); + hash = hash_rtx (x, GET_MODE (x), &do_not_record_p, + NULL, /*have_reg_qty=*/false); for (ptr = pre_ldst_mems; ptr != NULL; ptr = ptr->next) if (ptr->hash_index == hash && expr_equiv_p (ptr->pattern, x)) @@ -6945,654 +6587,4 @@ is_too_expensive (const char *pass) return false; } -/* The following code implements gcse after reload, the purpose of this - pass is to cleanup redundant loads generated by reload and other - optimizations that come after gcse. It searches for simple inter-block - redundancies and tries to eliminate them by adding moves and loads - in cold places. */ - -/* The following structure holds the information about the occurrences of - the redundant instructions. */ -struct unoccr -{ - struct unoccr *next; - edge pred; - rtx insn; -}; - -static bool reg_used_on_edge (rtx, edge); -static rtx reg_set_between_after_reload_p (rtx, rtx, rtx); -static rtx reg_used_between_after_reload_p (rtx, rtx, rtx); -static rtx get_avail_load_store_reg (rtx); -static bool is_jump_table_basic_block (basic_block); -static bool bb_has_well_behaved_predecessors (basic_block); -static struct occr* get_bb_avail_insn (basic_block, struct occr *); -static void hash_scan_set_after_reload (rtx, rtx, struct hash_table *); -static void compute_hash_table_after_reload (struct hash_table *); -static void eliminate_partially_redundant_loads (basic_block, - rtx, - struct expr *); -static void gcse_after_reload (void); -static struct occr* get_bb_avail_insn (basic_block, struct occr *); -void gcse_after_reload_main (rtx, FILE *); - - -/* Check if register REG is used in any insn waiting to be inserted on E. - Assumes no such insn can be a CALL_INSN; if so call reg_used_between_p - with PREV(insn),NEXT(insn) instead of calling - reg_overlap_mentioned_p. */ - -static bool -reg_used_on_edge (rtx reg, edge e) -{ - rtx insn; - - for (insn = e->insns.r; insn; insn = NEXT_INSN (insn)) - if (INSN_P (insn) && reg_overlap_mentioned_p (reg, PATTERN (insn))) - return true; - - return false; -} - -/* Return the insn that sets register REG or clobbers it in between - FROM_INSN and TO_INSN (exclusive of those two). - Just like reg_set_between but for hard registers and not pseudos. */ - -static rtx -reg_set_between_after_reload_p (rtx reg, rtx from_insn, rtx to_insn) -{ - rtx insn; - int regno; - - if (! REG_P (reg)) - abort (); - regno = REGNO (reg); - - /* We are called after register allocation. */ - if (regno >= FIRST_PSEUDO_REGISTER) - abort (); - - if (from_insn == to_insn) - return NULL_RTX; - - for (insn = NEXT_INSN (from_insn); - insn != to_insn; - insn = NEXT_INSN (insn)) - { - if (INSN_P (insn)) - { - if (FIND_REG_INC_NOTE (insn, reg) - || (CALL_P (insn) - && call_used_regs[regno]) - || find_reg_fusage (insn, CLOBBER, reg)) - return insn; - } - if (set_of (reg, insn) != NULL_RTX) - return insn; - } - return NULL_RTX; -} - -/* Return the insn that uses register REG in between FROM_INSN and TO_INSN - (exclusive of those two). Similar to reg_used_between but for hard - registers and not pseudos. */ - -static rtx -reg_used_between_after_reload_p (rtx reg, rtx from_insn, rtx to_insn) -{ - rtx insn; - int regno; - - if (! REG_P (reg)) - return to_insn; - regno = REGNO (reg); - - /* We are called after register allocation. */ - if (regno >= FIRST_PSEUDO_REGISTER) - abort (); - if (from_insn == to_insn) - return NULL_RTX; - - for (insn = NEXT_INSN (from_insn); - insn != to_insn; - insn = NEXT_INSN (insn)) - if (INSN_P (insn) - && (reg_overlap_mentioned_p (reg, PATTERN (insn)) - || (CALL_P (insn) - && call_used_regs[regno]) - || find_reg_fusage (insn, USE, reg) - || find_reg_fusage (insn, CLOBBER, reg))) - return insn; - return NULL_RTX; -} - -/* Return the loaded/stored register of a load/store instruction. */ - -static rtx -get_avail_load_store_reg (rtx insn) -{ - if (REG_P (SET_DEST (PATTERN (insn)))) /* A load. */ - return SET_DEST(PATTERN(insn)); - if (REG_P (SET_SRC (PATTERN (insn)))) /* A store. */ - return SET_SRC (PATTERN (insn)); - abort (); -} - -/* Don't handle ABNORMAL edges or jump tables. */ - -static bool -is_jump_table_basic_block (basic_block bb) -{ - rtx insn = BB_END (bb); - - if (JUMP_TABLE_DATA_P (insn)) - return true; - return false; -} - -/* Return nonzero if the predecessors of BB are "well behaved". */ - -static bool -bb_has_well_behaved_predecessors (basic_block bb) -{ - edge pred; - - if (! bb->pred) - return false; - for (pred = bb->pred; pred != NULL; pred = pred->pred_next) - if (((pred->flags & EDGE_ABNORMAL) && EDGE_CRITICAL_P (pred)) - || is_jump_table_basic_block (pred->src)) - return false; - return true; -} - - -/* Search for the occurrences of expression in BB. */ - -static struct occr* -get_bb_avail_insn (basic_block bb, struct occr *occr) -{ - for (; occr != NULL; occr = occr->next) - if (BLOCK_FOR_INSN (occr->insn)->index == bb->index) - return occr; - return NULL; -} - -/* Perform partial GCSE pass after reload, try to eliminate redundant loads - created by the reload pass. We try to look for a full or partial - redundant loads fed by one or more loads/stores in predecessor BBs, - and try adding loads to make them fully redundant. We also check if - it's worth adding loads to be able to delete the redundant load. - - Algorithm: - 1. Build available expressions hash table: - For each load/store instruction, if the loaded/stored memory didn't - change until the end of the basic block add this memory expression to - the hash table. - 2. Perform Redundancy elimination: - For each load instruction do the following: - perform partial redundancy elimination, check if it's worth adding - loads to make the load fully redundant. If so add loads and - register copies and delete the load. - - Future enhancement: - if loaded register is used/defined between load and some store, - look for some other free register between load and all its stores, - and replace load with a copy from this register to the loaded - register. */ - - -/* This handles the case where several stores feed a partially redundant - load. It checks if the redundancy elimination is possible and if it's - worth it. */ - -static void -eliminate_partially_redundant_loads (basic_block bb, rtx insn, - struct expr *expr) -{ - edge pred; - rtx avail_insn = NULL_RTX; - rtx avail_reg; - rtx dest, pat; - struct occr *a_occr; - struct unoccr *occr, *avail_occrs = NULL; - struct unoccr *unoccr, *unavail_occrs = NULL; - int npred_ok = 0; - gcov_type ok_count = 0; /* Redundant load execution count. */ - gcov_type critical_count = 0; /* Execution count of critical edges. */ - - /* The execution count of the loads to be added to make the - load fully redundant. */ - gcov_type not_ok_count = 0; - basic_block pred_bb; - - pat = PATTERN (insn); - dest = SET_DEST (pat); - /* Check that the loaded register is not used, set, or killed from the - beginning of the block. */ - if (reg_used_between_after_reload_p (dest, - PREV_INSN (BB_HEAD (bb)), insn) - || reg_set_between_after_reload_p (dest, - PREV_INSN (BB_HEAD (bb)), insn)) - return; - - /* Check potential for replacing load with copy for predecessors. */ - for (pred = bb->pred; pred; pred = pred->pred_next) - { - rtx next_pred_bb_end; - - avail_insn = NULL_RTX; - pred_bb = pred->src; - next_pred_bb_end = NEXT_INSN (BB_END (pred_bb)); - for (a_occr = get_bb_avail_insn (pred_bb, expr->avail_occr); a_occr; - a_occr = get_bb_avail_insn (pred_bb, a_occr->next)) - { - /* Check if the loaded register is not used. */ - avail_insn = a_occr->insn; - if (! (avail_reg = get_avail_load_store_reg (avail_insn))) - abort (); - /* Make sure we can generate a move from register avail_reg to - dest. */ - extract_insn (gen_move_insn (copy_rtx (dest), - copy_rtx (avail_reg))); - if (! constrain_operands (1) - || reg_killed_on_edge (avail_reg, pred) - || reg_used_on_edge (dest, pred)) - { - avail_insn = NULL; - continue; - } - if (! reg_set_between_after_reload_p (avail_reg, avail_insn, - next_pred_bb_end)) - /* AVAIL_INSN remains non-null. */ - break; - else - avail_insn = NULL; - } - if (avail_insn != NULL_RTX) - { - npred_ok++; - ok_count += pred->count; - if (EDGE_CRITICAL_P (pred)) - critical_count += pred->count; - occr = gmalloc (sizeof (struct unoccr)); - occr->insn = avail_insn; - occr->pred = pred; - occr->next = avail_occrs; - avail_occrs = occr; - } - else - { - not_ok_count += pred->count; - if (EDGE_CRITICAL_P (pred)) - critical_count += pred->count; - unoccr = gmalloc (sizeof (struct unoccr)); - unoccr->insn = NULL_RTX; - unoccr->pred = pred; - unoccr->next = unavail_occrs; - unavail_occrs = unoccr; - } - } - - if (npred_ok == 0 /* No load can be replaced by copy. */ - || (optimize_size && npred_ok > 1)) /* Prevent exploding the code. */ - goto cleanup; - - /* Check if it's worth applying the partial redundancy elimination. */ - if (ok_count < GCSE_AFTER_RELOAD_PARTIAL_FRACTION * not_ok_count) - goto cleanup; - - if (ok_count < GCSE_AFTER_RELOAD_CRITICAL_FRACTION * critical_count) - goto cleanup; - - /* Generate moves to the loaded register from where - the memory is available. */ - for (occr = avail_occrs; occr; occr = occr->next) - { - avail_insn = occr->insn; - pred = occr->pred; - /* Set avail_reg to be the register having the value of the - memory. */ - avail_reg = get_avail_load_store_reg (avail_insn); - if (! avail_reg) - abort (); - - insert_insn_on_edge (gen_move_insn (copy_rtx (dest), - copy_rtx (avail_reg)), - pred); - - if (gcse_file) - fprintf (gcse_file, - "GCSE AFTER reload generating move from %d to %d on \ - edge from %d to %d\n", - REGNO (avail_reg), - REGNO (dest), - pred->src->index, - pred->dest->index); - } - - /* Regenerate loads where the memory is unavailable. */ - for (unoccr = unavail_occrs; unoccr; unoccr = unoccr->next) - { - pred = unoccr->pred; - insert_insn_on_edge (copy_insn (PATTERN (insn)), pred); - - if (gcse_file) - fprintf (gcse_file, - "GCSE AFTER reload: generating on edge from %d to %d\ - a copy of load:\n", - pred->src->index, - pred->dest->index); - } - - /* Delete the insn if it is not available in this block and mark it - for deletion if it is available. If insn is available it may help - discover additional redundancies, so mark it for later deletion.*/ - for (a_occr = get_bb_avail_insn (bb, expr->avail_occr); - a_occr && (a_occr->insn != insn); - a_occr = get_bb_avail_insn (bb, a_occr->next)); - - if (!a_occr) - delete_insn (insn); - else - a_occr->deleted_p = 1; - -cleanup: - - while (unavail_occrs) - { - struct unoccr *temp = unavail_occrs->next; - free (unavail_occrs); - unavail_occrs = temp; - } - - while (avail_occrs) - { - struct unoccr *temp = avail_occrs->next; - free (avail_occrs); - avail_occrs = temp; - } -} - -/* Performing the redundancy elimination as described before. */ - -static void -gcse_after_reload (void) -{ - unsigned int i; - rtx insn; - basic_block bb; - struct expr *expr; - struct occr *occr; - - /* Note we start at block 1. */ - - if (ENTRY_BLOCK_PTR->next_bb == EXIT_BLOCK_PTR) - return; - - FOR_BB_BETWEEN (bb, - ENTRY_BLOCK_PTR->next_bb->next_bb, - EXIT_BLOCK_PTR, - next_bb) - { - if (! bb_has_well_behaved_predecessors (bb)) - continue; - - /* Do not try this optimization on cold basic blocks. */ - if (probably_cold_bb_p (bb)) - continue; - - reset_opr_set_tables (); - - for (insn = BB_HEAD (bb); - insn != NULL - && insn != NEXT_INSN (BB_END (bb)); - insn = NEXT_INSN (insn)) - { - /* Is it a load - of the form (set (reg) (mem))? */ - if (NONJUMP_INSN_P (insn) - && GET_CODE (PATTERN (insn)) == SET - && REG_P (SET_DEST (PATTERN (insn))) - && MEM_P (SET_SRC (PATTERN (insn)))) - { - rtx pat = PATTERN (insn); - rtx src = SET_SRC (pat); - struct expr *expr; - - if (general_operand (src, GET_MODE (src)) - /* Is the expression recorded? */ - && (expr = lookup_expr (src, &expr_hash_table)) != NULL - /* Are the operands unchanged since the start of the - block? */ - && oprs_not_set_p (src, insn) - && ! MEM_VOLATILE_P (src) - && GET_MODE (src) != BLKmode - && !(flag_non_call_exceptions && may_trap_p (src)) - && !side_effects_p (src)) - { - /* We now have a load (insn) and an available memory at - its BB start (expr). Try to remove the loads if it is - redundant. */ - eliminate_partially_redundant_loads (bb, insn, expr); - } - } - - /* Keep track of everything modified by this insn. */ - if (INSN_P (insn)) - mark_oprs_set (insn); - } - } - - commit_edge_insertions (); - - /* Go over the expression hash table and delete insns that were - marked for later deletion. */ - for (i = 0; i < expr_hash_table.size; i++) - { - for (expr = expr_hash_table.table[i]; - expr != NULL; - expr = expr->next_same_hash) - for (occr = expr->avail_occr; occr; occr = occr->next) - if (occr->deleted_p) - delete_insn (occr->insn); - } -} - -/* Scan pattern PAT of INSN and add an entry to the hash TABLE. - After reload we are interested in loads/stores only. */ - -static void -hash_scan_set_after_reload (rtx pat, rtx insn, struct hash_table *table) -{ - rtx src = SET_SRC (pat); - rtx dest = SET_DEST (pat); - - if (! MEM_P (src) && ! MEM_P (dest)) - return; - - if (REG_P (dest)) - { - if (/* Don't GCSE something if we can't do a reg/reg copy. */ - can_copy_p (GET_MODE (dest)) - /* GCSE commonly inserts instruction after the insn. We can't - do that easily for EH_REGION notes so disable GCSE on these - for now. */ - && ! find_reg_note (insn, REG_EH_REGION, NULL_RTX) - /* Is SET_SRC something we want to gcse? */ - && general_operand (src, GET_MODE (src)) - /* Don't CSE a nop. */ - && ! set_noop_p (pat) - && ! JUMP_P (insn)) - { - /* An expression is not available if its operands are - subsequently modified, including this insn. */ - if (oprs_available_p (src, insn)) - insert_expr_in_table (src, GET_MODE (dest), insn, 0, 1, table); - } - } - else if (REG_P (src)) - { - /* Only record sets of pseudo-regs in the hash table. */ - if (/* Don't GCSE something if we can't do a reg/reg copy. */ - can_copy_p (GET_MODE (src)) - /* GCSE commonly inserts instruction after the insn. We can't - do that easily for EH_REGION notes so disable GCSE on these - for now. */ - && ! find_reg_note (insn, REG_EH_REGION, NULL_RTX) - /* Is SET_DEST something we want to gcse? */ - && general_operand (dest, GET_MODE (dest)) - /* Don't CSE a nop. */ - && ! set_noop_p (pat) - &&! JUMP_P (insn) - && ! (flag_float_store && FLOAT_MODE_P (GET_MODE (dest))) - /* Check if the memory expression is killed after insn. */ - && ! load_killed_in_block_p (BLOCK_FOR_INSN (insn), - INSN_CUID (insn) + 1, - dest, - 1) - && oprs_unchanged_p (XEXP (dest, 0), insn, 1)) - { - insert_expr_in_table (dest, GET_MODE (dest), insn, 0, 1, table); - } - } -} - - -/* Create hash table of memory expressions available at end of basic - blocks. */ - -static void -compute_hash_table_after_reload (struct hash_table *table) -{ - unsigned int i; - - table->set_p = 0; - - /* Initialize count of number of entries in hash table. */ - table->n_elems = 0; - memset ((char *) table->table, 0, - table->size * sizeof (struct expr *)); - - /* While we compute the hash table we also compute a bit array of which - registers are set in which blocks. */ - sbitmap_vector_zero (reg_set_in_block, last_basic_block); - - /* Re-cache any INSN_LIST nodes we have allocated. */ - clear_modify_mem_tables (); - - /* Some working arrays used to track first and last set in each block. */ - reg_avail_info = gmalloc (max_gcse_regno * sizeof (struct reg_avail_info)); - - for (i = 0; i < max_gcse_regno; ++i) - reg_avail_info[i].last_bb = NULL; - - FOR_EACH_BB (current_bb) - { - rtx insn; - unsigned int regno; - - /* First pass over the instructions records information used to - determine when registers and memory are first and last set. */ - for (insn = BB_HEAD (current_bb); - insn && insn != NEXT_INSN (BB_END (current_bb)); - insn = NEXT_INSN (insn)) - { - if (! INSN_P (insn)) - continue; - - if (CALL_P (insn)) - { - bool clobbers_all = false; - -#ifdef NON_SAVING_SETJMP - if (NON_SAVING_SETJMP - && find_reg_note (insn, REG_SETJMP, NULL_RTX)) - clobbers_all = true; -#endif - - for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) - if (clobbers_all - || TEST_HARD_REG_BIT (regs_invalidated_by_call, - regno)) - record_last_reg_set_info (insn, regno); - - mark_call (insn); - } - - note_stores (PATTERN (insn), record_last_set_info, insn); - - if (GET_CODE (PATTERN (insn)) == SET) - { - rtx src, dest; - - src = SET_SRC (PATTERN (insn)); - dest = SET_DEST (PATTERN (insn)); - if (MEM_P (src) && auto_inc_p (XEXP (src, 0))) - { - regno = REGNO (XEXP (XEXP (src, 0), 0)); - record_last_reg_set_info (insn, regno); - } - if (MEM_P (dest) && auto_inc_p (XEXP (dest, 0))) - { - regno = REGNO (XEXP (XEXP (dest, 0), 0)); - record_last_reg_set_info (insn, regno); - } - } - } - - /* The next pass builds the hash table. */ - for (insn = BB_HEAD (current_bb); - insn && insn != NEXT_INSN (BB_END (current_bb)); - insn = NEXT_INSN (insn)) - if (INSN_P (insn) && GET_CODE (PATTERN (insn)) == SET) - if (! find_reg_note (insn, REG_LIBCALL, NULL_RTX)) - hash_scan_set_after_reload (PATTERN (insn), insn, table); - } - - free (reg_avail_info); - reg_avail_info = NULL; -} - - -/* Main entry point of the GCSE after reload - clean some redundant loads - due to spilling. */ - -void -gcse_after_reload_main (rtx f, FILE* file) -{ - gcse_subst_count = 0; - gcse_create_count = 0; - - gcse_file = file; - - gcc_obstack_init (&gcse_obstack); - bytes_used = 0; - - /* We need alias. */ - init_alias_analysis (); - - max_gcse_regno = max_reg_num (); - - alloc_reg_set_mem (max_gcse_regno); - alloc_gcse_mem (f); - alloc_hash_table (max_cuid, &expr_hash_table, 0); - compute_hash_table_after_reload (&expr_hash_table); - - if (gcse_file) - dump_hash_table (gcse_file, "Expression", &expr_hash_table); - - if (expr_hash_table.n_elems > 0) - gcse_after_reload (); - - free_hash_table (&expr_hash_table); - - free_gcse_mem (); - free_reg_set_mem (); - - /* We are finished with alias. */ - end_alias_analysis (); - - obstack_free (&gcse_obstack, NULL); -} - #include "gt-gcse.h" diff --git a/gcc/passes.c b/gcc/passes.c index e8a9322..da0beff 100644 --- a/gcc/passes.c +++ b/gcc/passes.c @@ -841,10 +841,10 @@ rest_of_handle_sched2 (void) static void rest_of_handle_gcse2 (void) { - timevar_push (TV_RELOAD_CSE_REGS); + timevar_push (TV_GCSE_AFTER_RELOAD); open_dump_file (DFI_gcse2, current_function_decl); - gcse_after_reload_main (get_insns (), dump_file); + gcse_after_reload_main (get_insns ()); rebuild_jump_labels (get_insns ()); delete_trivially_dead_insns (get_insns (), max_reg_num ()); close_dump_file (DFI_gcse2, print_rtl_with_bb, get_insns ()); @@ -855,7 +855,7 @@ rest_of_handle_gcse2 (void) verify_flow_info (); #endif - timevar_pop (TV_RELOAD_CSE_REGS); + timevar_pop (TV_GCSE_AFTER_RELOAD); } /* Register allocation pre-pass, to reduce number of moves necessary diff --git a/gcc/postreload-gcse.c b/gcc/postreload-gcse.c new file mode 100644 index 0000000..a9d0344 --- /dev/null +++ b/gcc/postreload-gcse.c @@ -0,0 +1,1379 @@ +/* Post reload partially redundant load elimination + Copyright (C) 2004 + Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING. If not, write to the Free +Software Foundation, 59 Temple Place - Suite 330, Boston, MA +02111-1307, USA. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "toplev.h" + +#include "rtl.h" +#include "tree.h" +#include "tm_p.h" +#include "regs.h" +#include "hard-reg-set.h" +#include "flags.h" +#include "real.h" +#include "insn-config.h" +#include "recog.h" +#include "basic-block.h" +#include "output.h" +#include "function.h" +#include "expr.h" +#include "except.h" +#include "intl.h" +#include "obstack.h" +#include "hashtab.h" +#include "params.h" + +/* The following code implements gcse after reload, the purpose of this + pass is to cleanup redundant loads generated by reload and other + optimizations that come after gcse. It searches for simple inter-block + redundancies and tries to eliminate them by adding moves and loads + in cold places. + + Perform partially redundant load elimination, try to eliminate redundant + loads created by the reload pass. We try to look for full or partial + redundant loads fed by one or more loads/stores in predecessor BBs, + and try adding loads to make them fully redundant. We also check if + it's worth adding loads to be able to delete the redundant load. + + Algorithm: + 1. Build available expressions hash table: + For each load/store instruction, if the loaded/stored memory didn't + change until the end of the basic block add this memory expression to + the hash table. + 2. Perform Redundancy elimination: + For each load instruction do the following: + perform partial redundancy elimination, check if it's worth adding + loads to make the load fully redundant. If so add loads and + register copies and delete the load. + 3. Delete instructions made redundant in step 2. + + Future enhancement: + If the loaded register is used/defined between load and some store, + look for some other free register between load and all its stores, + and replace the load with a copy from this register to the loaded + register. +*/ + + +/* Keep statistics of this pass. */ +static struct +{ + int moves_inserted; + int copies_inserted; + int insns_deleted; +} stats; + +/* We need to keep a hash table of expressions. The table entries are of + type 'struct expr', and for each expression there is a single linked + list of occurences. */ + +/* The table itself. */ +static htab_t expr_table; + +/* Expression elements in the hash table. */ +struct expr +{ + /* The expression (SET_SRC for expressions, PATTERN for assignments). */ + rtx expr; + + /* The same hash for this entry. */ + hashval_t hash; + + /* List of available occurrence in basic blocks in the function. */ + struct occr *avail_occr; +}; + +static struct obstack expr_obstack; + +/* Occurrence of an expression. + There is at most one occurence per basic block. If a pattern appears + more than once, the last appearance is used. */ + +struct occr +{ + /* Next occurrence of this expression. */ + struct occr *next; + /* The insn that computes the expression. */ + rtx insn; + /* Nonzero if this [anticipatable] occurrence has been deleted. */ + char deleted_p; +}; + +static struct obstack occr_obstack; + +/* The following structure holds the information about the occurrences of + the redundant instructions. */ +struct unoccr +{ + struct unoccr *next; + edge pred; + rtx insn; +}; + +static struct obstack unoccr_obstack; + +/* Array where each element is the CUID if the insn that last set the hard + register with the number of the element, since the start of the current + basic block. */ +static int *reg_avail_info; + +/* A list of insns that may modify memory within the current basic block. */ +struct modifies_mem +{ + rtx insn; + struct modifies_mem *next; +}; +static struct modifies_mem *modifies_mem_list; + +/* The modifies_mem structs also go on an obstack, only this obstack is + freed each time after completing the analysis or transformations on + a basic block. So we allocate a dummy modifies_mem_obstack_bottom + object on the obstack to keep track of the bottom of the obstack. */ +static struct obstack modifies_mem_obstack; +static struct modifies_mem *modifies_mem_obstack_bottom; + +/* Mapping of insn UIDs to CUIDs. + CUIDs are like UIDs except they increase monotonically in each basic + block, have no gaps, and only apply to real insns. */ +static int *uid_cuid; +#define INSN_CUID(INSN) (uid_cuid[INSN_UID (INSN)]) + + +/* Helpers for memory allocation/freeing. */ +static void alloc_mem (void); +static void free_mem (void); + +/* Support for hash table construction and transformations. */ +static bool oprs_unchanged_p (rtx, rtx, bool); +static void record_last_reg_set_info (rtx, int); +static void record_last_mem_set_info (rtx); +static void record_last_set_info (rtx, rtx, void *); +static void mark_call (rtx); +static void mark_set (rtx, rtx); +static void mark_clobber (rtx, rtx); +static void mark_oprs_set (rtx); + +static void find_mem_conflicts (rtx, rtx, void *); +static int load_killed_in_block_p (int, rtx, bool); +static void reset_opr_set_tables (void); + +/* Hash table support. */ +static hashval_t hash_expr (rtx, int *); +static hashval_t hash_expr_for_htab (const void *); +static int expr_equiv_p (const void *, const void *); +static void insert_expr_in_table (rtx, rtx); +static struct expr *lookup_expr_in_table (rtx); +static int dump_hash_table_entry (void **, void *); +static void dump_hash_table (FILE *); + +/* Helpers for eliminate_partially_redundant_load. */ +static bool reg_killed_on_edge (rtx, edge); +static bool reg_used_on_edge (rtx, edge); + +static rtx reg_set_between_after_reload_p (rtx, rtx, rtx); +static rtx reg_used_between_after_reload_p (rtx, rtx, rtx); +static rtx get_avail_load_store_reg (rtx); + +static bool bb_has_well_behaved_predecessors (basic_block); +static struct occr* get_bb_avail_insn (basic_block, struct occr *); +static void hash_scan_set (rtx); +static void compute_hash_table (void); + +/* The work horses of this pass. */ +static void eliminate_partially_redundant_load (basic_block, + rtx, + struct expr *); +static void eliminate_partially_redundant_loads (void); + + +/* Allocate memory for the CUID mapping array and register/memory + tracking tables. */ + +static void +alloc_mem (void) +{ + int i; + basic_block bb; + rtx insn; + + /* Find the largest UID and create a mapping from UIDs to CUIDs. */ + uid_cuid = xcalloc (get_max_uid () + 1, sizeof (int)); + i = 0; + FOR_EACH_BB (bb) + FOR_BB_INSNS (bb, insn) + { + if (INSN_P (insn)) + uid_cuid[INSN_UID (insn)] = i++; + else + uid_cuid[INSN_UID (insn)] = i; + } + + /* Allocate the available expressions hash table. We don't want to + make the hash table too small, but unnecessarily making it too large + also doesn't help. The i/4 is a gcse.c relic, and seems like a + reasonable choice. */ + expr_table = htab_create (MAX (i / 4, 13), + hash_expr_for_htab, expr_equiv_p, NULL); + + /* We allocate everything on obstacks because we often can roll back + the whole obstack to some point. Freeing obstacks is very fast. */ + gcc_obstack_init (&expr_obstack); + gcc_obstack_init (&occr_obstack); + gcc_obstack_init (&unoccr_obstack); + gcc_obstack_init (&modifies_mem_obstack); + + /* Working array used to track the last set for each register + in the current block. */ + reg_avail_info = (int *) xmalloc (FIRST_PSEUDO_REGISTER * sizeof (int)); + + /* Put a dummy modifies_mem object on the modifies_mem_obstack, so we + can roll it back in reset_opr_set_tables. */ + modifies_mem_obstack_bottom = + (struct modifies_mem *) obstack_alloc (&modifies_mem_obstack, + sizeof (struct modifies_mem)); +} + +/* Free memory allocated by alloc_mem. */ + +static void +free_mem (void) +{ + free (uid_cuid); + + htab_delete (expr_table); + + obstack_free (&expr_obstack, NULL); + obstack_free (&occr_obstack, NULL); + obstack_free (&unoccr_obstack, NULL); + obstack_free (&modifies_mem_obstack, NULL); + + free (reg_avail_info); +} + + +/* Hash expression X. + DO_NOT_RECORD_P is a boolean indicating if a volatile operand is found + or if the expression contains something we don't want to insert in the + table. */ + +static hashval_t +hash_expr (rtx x, int *do_not_record_p) +{ + *do_not_record_p = 0; + return hash_rtx (x, GET_MODE (x), do_not_record_p, + NULL, /*have_reg_qty=*/false); +} + +/* Callback for hashtab. + Return the hash value for expression EXP. We don't actually hash + here, we just return the cached hash value. */ + +static hashval_t +hash_expr_for_htab (const void *expp) +{ + struct expr *exp = (struct expr *) expp; + return exp->hash; +} + +/* Callbach for hashtab. + Return nonzero if exp1 is equivalent to exp2. */ + +static int +expr_equiv_p (const void *exp1p, const void *exp2p) +{ + struct expr *exp1 = (struct expr *) exp1p; + struct expr *exp2 = (struct expr *) exp2p; + int equiv_p = exp_equiv_p (exp1->expr, exp2->expr, 0, true); + if (equiv_p + && exp1->hash != exp2->hash) + abort (); + return equiv_p; +} + + +/* Insert expression X in INSN in the hash TABLE. + If it is already present, record it as the last occurrence in INSN's + basic block. */ + +static void +insert_expr_in_table (rtx x, rtx insn) +{ + int do_not_record_p; + hashval_t hash; + struct expr *cur_expr, **slot; + struct occr *avail_occr, *last_occr = NULL; + + hash = hash_expr (x, &do_not_record_p); + + /* Do not insert expression in the table if it contains volatile operands, + or if hash_expr determines the expression is something we don't want + to or can't handle. */ + if (do_not_record_p) + return; + + /* We anticipate that redundant expressions are rare, so for convenience + allocate a new hash table element here already and set its fields. + If we don't do this, we need a hack with a static struct expr. Anyway, + obstack_free is really fast and one more obstack_alloc doesn't hurt if + we're going to see more expressions later on. */ + cur_expr = (struct expr *) obstack_alloc (&expr_obstack, + sizeof (struct expr)); + cur_expr->expr = x; + cur_expr->hash = hash; + cur_expr->avail_occr = NULL; + + slot = (struct expr **) htab_find_slot_with_hash (expr_table, cur_expr, + hash, INSERT); + + if (! (*slot)) + /* The expression isn't found, so insert it. */ + *slot = cur_expr; + else + { + /* The expression is already in the table, so roll back the + obstack and use the existing table entry. */ + obstack_free (&expr_obstack, cur_expr); + cur_expr = *slot; + } + + /* Search for another occurrence in the same basic block. */ + avail_occr = cur_expr->avail_occr; + while (avail_occr && BLOCK_NUM (avail_occr->insn) != BLOCK_NUM (insn)) + { + /* If an occurrence isn't found, save a pointer to the end of + the list. */ + last_occr = avail_occr; + avail_occr = avail_occr->next; + } + + if (avail_occr) + /* Found another instance of the expression in the same basic block. + Prefer this occurrence to the currently recorded one. We want + the last one in the block and the block is scanned from start + to end. */ + avail_occr->insn = insn; + else + { + /* First occurrence of this expression in this basic block. */ + avail_occr = (struct occr *) obstack_alloc (&occr_obstack, + sizeof (struct occr)); + + /* First occurrence of this expression in any block? */ + if (cur_expr->avail_occr == NULL) + cur_expr->avail_occr = avail_occr; + else + last_occr->next = avail_occr; + + avail_occr->insn = insn; + avail_occr->next = NULL; + avail_occr->deleted_p = 0; + } +} + + +/* Lookup pattern PAT in the expression hash table. + The result is a pointer to the table entry, or NULL if not found. */ + +static struct expr * +lookup_expr_in_table (rtx pat) +{ + int do_not_record_p; + struct expr **slot, *tmp_expr; + hashval_t hash = hash_expr (pat, &do_not_record_p); + + if (do_not_record_p) + return NULL; + + tmp_expr = (struct expr *) obstack_alloc (&expr_obstack, + sizeof (struct expr)); + tmp_expr->expr = pat; + tmp_expr->hash = hash; + tmp_expr->avail_occr = NULL; + + slot = (struct expr **) htab_find_slot_with_hash (expr_table, tmp_expr, + hash, INSERT); + obstack_free (&expr_obstack, tmp_expr); + + if (!slot) + return NULL; + else + return (*slot); +} + + +/* Dump all expressions and occurences that are currently in the + expression hash table to FILE. */ + +/* This helper is called via htab_traverse. */ +static int +dump_hash_table_entry (void **slot, void *filep) +{ + struct expr *expr = (struct expr *) *slot; + FILE *file = (FILE *) filep; + struct occr *occr; + + fprintf (file, "expr: "); + print_rtl (file, expr->expr); + fprintf (file,"\nhashcode: %u\n", expr->hash); + fprintf (file,"list of occurences:\n"); + occr = expr->avail_occr; + while (occr) + { + rtx insn = occr->insn; + print_rtl_single (file, insn); + fprintf (file, "\n"); + occr = occr->next; + } + fprintf (file, "\n"); + return 1; +} + +static void +dump_hash_table (FILE *file) +{ + fprintf (file, "\n\nexpression hash table\n"); + fprintf (file, "size %ld, %ld elements, %f collision/search ratio\n", + (long) htab_size (expr_table), + (long) htab_elements (expr_table), + htab_collisions (expr_table)); + if (htab_elements (expr_table) > 0) + { + fprintf (file, "\n\ntable entries:\n"); + htab_traverse (expr_table, dump_hash_table_entry, file); + } + fprintf (file, "\n"); +} + + +/* Return nonzero if the operands of expression X are unchanged from the + start of INSN's basic block up to but not including INSN if AFTER_INSN + is false, or from INSN to the end of INSN's basic block if AFTER_INSN + is true. */ + +static bool +oprs_unchanged_p (rtx x, rtx insn, bool after_insn) +{ + int i, j; + enum rtx_code code; + const char *fmt; + + if (x == 0) + return 1; + + code = GET_CODE (x); + switch (code) + { + case REG: +#ifdef ENABLE_CHECKING + /* We are called after register allocation. */ + if (REGNO (x) >= FIRST_PSEUDO_REGISTER) + abort (); +#endif + if (after_insn) + /* If the last CUID setting the insn is less than the CUID of + INSN, then reg X is not changed in or after INSN. */ + return reg_avail_info[REGNO (x)] < INSN_CUID (insn); + else + /* Reg X is not set before INSN in the current basic block if + we have not yet recorded the CUID of an insn that touches + the reg. */ + return reg_avail_info[REGNO (x)] == 0; + + case MEM: + if (load_killed_in_block_p (INSN_CUID (insn), x, after_insn)) + return 0; + else + return oprs_unchanged_p (XEXP (x, 0), insn, after_insn); + + case PC: + case CC0: /*FIXME*/ + case CONST: + case CONST_INT: + case CONST_DOUBLE: + case CONST_VECTOR: + case SYMBOL_REF: + case LABEL_REF: + case ADDR_VEC: + case ADDR_DIFF_VEC: + return 1; + + case PRE_DEC: + case PRE_INC: + case POST_DEC: + case POST_INC: + case PRE_MODIFY: + case POST_MODIFY: + if (after_insn) + return 0; + break; + + default: + break; + } + + for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--) + { + if (fmt[i] == 'e') + { + if (! oprs_unchanged_p (XEXP (x, i), insn, after_insn)) + return 0; + } + else if (fmt[i] == 'E') + for (j = 0; j < XVECLEN (x, i); j++) + if (! oprs_unchanged_p (XVECEXP (x, i, j), insn, after_insn)) + return 0; + } + + return 1; +} + + +/* Used for communication between find_mem_conflicts and + load_killed_in_block_p. Nonzero if find_mem_conflicts finds a + conflict between two memory references. + This is a bit of a hack to work around the limitations of note_stores. */ +static int mems_conflict_p; + +/* DEST is the output of an instruction. If it is a memory reference, and + possibly conflicts with the load found in DATA, then set mems_conflict_p + to a nonzero value. */ + +static void +find_mem_conflicts (rtx dest, rtx setter ATTRIBUTE_UNUSED, + void *data) +{ + rtx mem_op = (rtx) data; + + while (GET_CODE (dest) == SUBREG + || GET_CODE (dest) == ZERO_EXTRACT + || GET_CODE (dest) == SIGN_EXTRACT + || GET_CODE (dest) == STRICT_LOW_PART) + dest = XEXP (dest, 0); + + /* If DEST is not a MEM, then it will not conflict with the load. Note + that function calls are assumed to clobber memory, but are handled + elsewhere. */ + if (! MEM_P (dest)) + return; + + if (true_dependence (dest, GET_MODE (dest), mem_op, + rtx_addr_varies_p)) + mems_conflict_p = 1; +} + + +/* Return nonzero if the expression in X (a memory reference) is killed + in block BB before if (AFTER_INSN is false) or after (if AFTER_INSN + is true) the insn with the CUID in UID_LIMIT. */ + +static int +load_killed_in_block_p (int uid_limit, rtx x, bool after_insn) +{ + struct modifies_mem *list_entry = modifies_mem_list; + + while (list_entry) + { + rtx setter = list_entry->insn; + + /* Ignore entries in the list that do not apply. */ + if ((after_insn + && INSN_CUID (setter) < uid_limit) + || (! after_insn + && INSN_CUID (setter) > uid_limit)) + { + list_entry = list_entry->next; + continue; + } + + /* If SETTER is a call everything is clobbered. Note that calls + to pure functions are never put on the list, so we need not + worry about them. */ + if (CALL_P (setter)) + return 1; + + /* SETTER must be an insn of some kind that sets memory. Call + note_stores to examine each hunk of memory that is modified. + It will set mems_conflict_p to nonzero if there may be a + conflict between X and SETTER. */ + mems_conflict_p = 0; + note_stores (PATTERN (setter), find_mem_conflicts, x); + if (mems_conflict_p) + return 1; + + list_entry = list_entry->next; + } + return 0; +} + + +/* Record register first/last/block set information for REGNO in INSN. */ + +static void +record_last_reg_set_info (rtx insn, int regno) +{ + reg_avail_info[regno] = INSN_CUID (insn); +} + + +/* Record memory modification information for INSN. We do not actually care + about the memory location(s) that are set, or even how they are set (consider + a CALL_INSN). We merely need to record which insns modify memory. */ + +static void +record_last_mem_set_info (rtx insn) +{ + struct modifies_mem *list_entry; + + list_entry = (struct modifies_mem *) obstack_alloc (&modifies_mem_obstack, + sizeof (struct modifies_mem)); + list_entry->insn = insn; + list_entry->next = modifies_mem_list; + modifies_mem_list = list_entry; +} + +/* Called from compute_hash_table via note_stores to handle one + SET or CLOBBER in an insn. DATA is really the instruction in which + the SET is taking place. */ + +static void +record_last_set_info (rtx dest, rtx setter ATTRIBUTE_UNUSED, void *data) +{ + rtx last_set_insn = (rtx) data; + + if (GET_CODE (dest) == SUBREG) + dest = SUBREG_REG (dest); + + if (REG_P (dest)) + record_last_reg_set_info (last_set_insn, REGNO (dest)); + else if (MEM_P (dest) + /* Ignore pushes, they clobber nothing. */ + && ! push_operand (dest, GET_MODE (dest))) + record_last_mem_set_info (last_set_insn); +} + + +/* Reset tables used to keep track of what's still available since the + start of the block. */ + +static void +reset_opr_set_tables (void) +{ + memset (reg_avail_info, 0, FIRST_PSEUDO_REGISTER * sizeof (int)); + obstack_free (&modifies_mem_obstack, modifies_mem_obstack_bottom); + modifies_mem_list = NULL; +} + +/* Mark things set by a CALL. */ + +static void +mark_call (rtx insn) +{ + if (! CONST_OR_PURE_CALL_P (insn)) + record_last_mem_set_info (insn); +} + +/* Mark things set by a SET. */ + +static void +mark_set (rtx pat, rtx insn) +{ + rtx dest = SET_DEST (pat); + + while (GET_CODE (dest) == SUBREG + || GET_CODE (dest) == ZERO_EXTRACT + || GET_CODE (dest) == SIGN_EXTRACT + || GET_CODE (dest) == STRICT_LOW_PART) + dest = XEXP (dest, 0); + + if (REG_P (dest)) + record_last_reg_set_info (insn, REGNO (dest)); + else if (MEM_P (dest)) + record_last_mem_set_info (insn); + + if (GET_CODE (SET_SRC (pat)) == CALL) + mark_call (insn); +} + +/* Record things set by a CLOBBER. */ + +static void +mark_clobber (rtx pat, rtx insn) +{ + rtx clob = XEXP (pat, 0); + + while (GET_CODE (clob) == SUBREG + || GET_CODE (clob) == STRICT_LOW_PART) + clob = XEXP (clob, 0); + + if (REG_P (clob)) + record_last_reg_set_info (insn, REGNO (clob)); + else + record_last_mem_set_info (insn); +} + +/* Record things set by INSN. + This data is used by oprs_unchanged_p. */ + +static void +mark_oprs_set (rtx insn) +{ + rtx pat = PATTERN (insn); + int i; + + if (GET_CODE (pat) == SET) + mark_set (pat, insn); + + else if (GET_CODE (pat) == PARALLEL) + for (i = 0; i < XVECLEN (pat, 0); i++) + { + rtx x = XVECEXP (pat, 0, i); + + if (GET_CODE (x) == SET) + mark_set (x, insn); + else if (GET_CODE (x) == CLOBBER) + mark_clobber (x, insn); + else if (GET_CODE (x) == CALL) + mark_call (insn); + } + + else if (GET_CODE (pat) == CLOBBER) + mark_clobber (pat, insn); + + else if (GET_CODE (pat) == CALL) + mark_call (insn); +} + + +/* Scan the pattern of INSN and add an entry to the hash TABLE. + After reload we are interested in loads/stores only. */ + +static void +hash_scan_set (rtx insn) +{ + rtx pat = PATTERN (insn); + rtx src = SET_SRC (pat); + rtx dest = SET_DEST (pat); + + /* We are only interested in loads and stores. */ + if (! MEM_P (src) && ! MEM_P (dest)) + return; + + /* Don't mess with jumps and nops. */ + if (JUMP_P (insn) || set_noop_p (pat)) + return; + +#ifdef ENABLE_CHEKCING + /* We shouldn't have any EH_REGION notes post reload. */ + if (find_reg_note (insn, REG_EH_REGION, NULL_RTX)) + abort (); +#endif + + if (REG_P (dest)) + { + if (/* Don't GCSE something if we can't do a reg/reg copy. */ + can_copy_p (GET_MODE (dest)) + /* Is SET_SRC something we want to gcse? */ + && general_operand (src, GET_MODE (src)) + /* An expression is not available if its operands are + subsequently modified, including this insn. */ + && oprs_unchanged_p (src, insn, true)) + { + insert_expr_in_table (src, insn); + } + } + else if (REG_P (src)) + { + /* Only record sets of pseudo-regs in the hash table. */ + if (/* Don't GCSE something if we can't do a reg/reg copy. */ + can_copy_p (GET_MODE (src)) + /* Is SET_DEST something we want to gcse? */ + && general_operand (dest, GET_MODE (dest)) + && ! (flag_float_store && FLOAT_MODE_P (GET_MODE (dest))) + /* Check if the memory expression is killed after insn. */ + && ! load_killed_in_block_p (INSN_CUID (insn) + 1, dest, true) + && oprs_unchanged_p (XEXP (dest, 0), insn, true)) + { + insert_expr_in_table (dest, insn); + } + } +} + +/* Create hash table of memory expressions available at end of basic + blocks. */ + +static void +compute_hash_table (void) +{ + basic_block bb; + + FOR_EACH_BB (bb) + { + rtx insn; + unsigned int regno; + + reset_opr_set_tables (); + + /* First pass over the instructions records information used to + determine when registers and memory are first and last set. */ + FOR_BB_INSNS (bb, insn) + { + if (! INSN_P (insn)) + continue; + + if (CALL_P (insn)) + { + bool clobbers_all = false; + +#ifdef NON_SAVING_SETJMP + if (NON_SAVING_SETJMP + && find_reg_note (insn, REG_SETJMP, NULL_RTX)) + clobbers_all = true; +#endif + + for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) + if (clobbers_all + || TEST_HARD_REG_BIT (regs_invalidated_by_call, + regno)) + record_last_reg_set_info (insn, regno); + + if (! CONST_OR_PURE_CALL_P (insn)) + record_last_mem_set_info (insn); + } + + note_stores (PATTERN (insn), record_last_set_info, insn); + + if (GET_CODE (PATTERN (insn)) == SET) + { + rtx src, dest; + + src = SET_SRC (PATTERN (insn)); + dest = SET_DEST (PATTERN (insn)); + if (MEM_P (src) && auto_inc_p (XEXP (src, 0))) + { + regno = REGNO (XEXP (XEXP (src, 0), 0)); + record_last_reg_set_info (insn, regno); + } + if (MEM_P (dest) && auto_inc_p (XEXP (dest, 0))) + { + regno = REGNO (XEXP (XEXP (dest, 0), 0)); + record_last_reg_set_info (insn, regno); + } + } + } + + /* The next pass builds the hash table. */ + FOR_BB_INSNS (bb, insn) + if (INSN_P (insn) && GET_CODE (PATTERN (insn)) == SET) + hash_scan_set (insn); + } +} + + +/* Check if register REG is killed in any insn waiting to be inserted on + edge E. This function is required to check that our data flow analysis + is still valid prior to commit_edge_insertions. */ + +static bool +reg_killed_on_edge (rtx reg, edge e) +{ + rtx insn; + + for (insn = e->insns.r; insn; insn = NEXT_INSN (insn)) + if (INSN_P (insn) && reg_set_p (reg, insn)) + return true; + + return false; +} + +/* Similar to above - check if register REG is used in any insn waiting + to be inserted on edge E. + Assumes no such insn can be a CALL_INSN; if so call reg_used_between_p + with PREV(insn),NEXT(insn) instead of calling reg_overlap_mentioned_p. */ + +static bool +reg_used_on_edge (rtx reg, edge e) +{ + rtx insn; + + for (insn = e->insns.r; insn; insn = NEXT_INSN (insn)) + if (INSN_P (insn) && reg_overlap_mentioned_p (reg, PATTERN (insn))) + return true; + + return false; +} + + +/* Return the insn that sets register REG or clobbers it in between + FROM_INSN and TO_INSN (exclusive of those two). + Just like reg_set_between but for hard registers and not pseudos. */ + +static rtx +reg_set_between_after_reload_p (rtx reg, rtx from_insn, rtx to_insn) +{ + rtx insn; + int regno; + +#ifdef ENABLE_CHECKING + /* We are called after register allocation. */ + if (!REG_P (reg) || REGNO (reg) >= FIRST_PSEUDO_REGISTER) + abort (); +#endif + + if (from_insn == to_insn) + return NULL_RTX; + + regno = REGNO (reg); + for (insn = NEXT_INSN (from_insn); + insn != to_insn; + insn = NEXT_INSN (insn)) + { + if (INSN_P (insn)) + { + if (FIND_REG_INC_NOTE (insn, reg) + || (CALL_P (insn) + && call_used_regs[regno]) + || find_reg_fusage (insn, CLOBBER, reg)) + return insn; + } + if (set_of (reg, insn) != NULL_RTX) + return insn; + } + + return NULL_RTX; +} + +/* Return the insn that uses register REG in between FROM_INSN and TO_INSN + (exclusive of those two). Similar to reg_used_between but for hard + registers and not pseudos. */ + +static rtx +reg_used_between_after_reload_p (rtx reg, rtx from_insn, rtx to_insn) +{ + rtx insn; + int regno; + +#ifdef ENABLE_CHECKING + /* We are called after register allocation. */ + if (!REG_P (reg) || REGNO (reg) >= FIRST_PSEUDO_REGISTER) + abort (); +#endif + + if (from_insn == to_insn) + return NULL_RTX; + + regno = REGNO (reg); + for (insn = NEXT_INSN (from_insn); + insn != to_insn; + insn = NEXT_INSN (insn)) + if (INSN_P (insn) + && (reg_overlap_mentioned_p (reg, PATTERN (insn)) + || (CALL_P (insn) + && call_used_regs[regno]) + || find_reg_fusage (insn, USE, reg) + || find_reg_fusage (insn, CLOBBER, reg))) + return insn; + + return NULL_RTX; +} + +/* Return true if REG is used, set, or killed between the beginning of + basic block BB and UP_TO_INSN. Caches the result in reg_avail_info. */ + +static bool +reg_set_or_used_since_bb_start (rtx reg, basic_block bb, rtx up_to_insn) +{ + rtx insn, start = PREV_INSN (BB_HEAD (bb)); + + if (reg_avail_info[REGNO (reg)] != 0) + return true; + + insn = reg_used_between_after_reload_p (reg, start, up_to_insn); + if (! insn) + insn = reg_set_between_after_reload_p (reg, start, up_to_insn); + + if (insn) + reg_avail_info[REGNO (reg)] = INSN_CUID (insn); + + return insn != NULL_RTX; +} + +/* Return the loaded/stored register of a load/store instruction. */ + +static rtx +get_avail_load_store_reg (rtx insn) +{ + if (REG_P (SET_DEST (PATTERN (insn)))) /* A load. */ + return SET_DEST(PATTERN(insn)); + if (REG_P (SET_SRC (PATTERN (insn)))) /* A store. */ + return SET_SRC (PATTERN (insn)); + abort (); +} + +/* Return nonzero if the predecessors of BB are "well behaved". */ + +static bool +bb_has_well_behaved_predecessors (basic_block bb) +{ + edge pred; + + if (! bb->pred) + return false; + + for (pred = bb->pred; pred != NULL; pred = pred->pred_next) + { + if ((pred->flags & EDGE_ABNORMAL) && EDGE_CRITICAL_P (pred)) + return false; + + if (JUMP_TABLE_DATA_P (BB_END (pred->src))) + return false; + } + return true; +} + + +/* Search for the occurrences of expression in BB. */ + +static struct occr* +get_bb_avail_insn (basic_block bb, struct occr *occr) +{ + for (; occr != NULL; occr = occr->next) + if (BLOCK_FOR_INSN (occr->insn) == bb) + return occr; + return NULL; +} + + +/* This handles the case where several stores feed a partially redundant + load. It checks if the redundancy elimination is possible and if it's + worth it. */ + +static void +eliminate_partially_redundant_load (basic_block bb, rtx insn, + struct expr *expr) +{ + edge pred; + rtx avail_insn = NULL_RTX; + rtx avail_reg; + rtx dest, pat; + struct occr *a_occr; + struct unoccr *occr, *avail_occrs = NULL; + struct unoccr *unoccr, *unavail_occrs = NULL, *rollback_unoccr = NULL; + int npred_ok = 0; + gcov_type ok_count = 0; /* Redundant load execution count. */ + gcov_type critical_count = 0; /* Execution count of critical edges. */ + + /* The execution count of the loads to be added to make the + load fully redundant. */ + gcov_type not_ok_count = 0; + basic_block pred_bb; + + pat = PATTERN (insn); + dest = SET_DEST (pat); + + /* Check that the loaded register is not used, set, or killed from the + beginning of the block. */ + if (reg_set_or_used_since_bb_start (dest, bb, insn)) + return; + + /* Check potential for replacing load with copy for predecessors. */ + for (pred = bb->pred; pred; pred = pred->pred_next) + { + rtx next_pred_bb_end; + + avail_insn = NULL_RTX; + pred_bb = pred->src; + next_pred_bb_end = NEXT_INSN (BB_END (pred_bb)); + for (a_occr = get_bb_avail_insn (pred_bb, expr->avail_occr); a_occr; + a_occr = get_bb_avail_insn (pred_bb, a_occr->next)) + { + /* Check if the loaded register is not used. */ + avail_insn = a_occr->insn; + if (! (avail_reg = get_avail_load_store_reg (avail_insn))) + abort (); + /* Make sure we can generate a move from register avail_reg to + dest. */ + extract_insn (gen_move_insn (copy_rtx (dest), + copy_rtx (avail_reg))); + if (! constrain_operands (1) + || reg_killed_on_edge (avail_reg, pred) + || reg_used_on_edge (dest, pred)) + { + avail_insn = NULL; + continue; + } + if (! reg_set_between_after_reload_p (avail_reg, avail_insn, + next_pred_bb_end)) + /* AVAIL_INSN remains non-null. */ + break; + else + avail_insn = NULL; + } + + if (EDGE_CRITICAL_P (pred)) + critical_count += pred->count; + + if (avail_insn != NULL_RTX) + { + npred_ok++; + ok_count += pred->count; + occr = (struct unoccr *) obstack_alloc (&unoccr_obstack, + sizeof (struct occr)); + occr->insn = avail_insn; + occr->pred = pred; + occr->next = avail_occrs; + avail_occrs = occr; + if (! rollback_unoccr) + rollback_unoccr = occr; + } + else + { + not_ok_count += pred->count; + unoccr = (struct unoccr *) obstack_alloc (&unoccr_obstack, + sizeof (struct unoccr)); + unoccr->insn = NULL_RTX; + unoccr->pred = pred; + unoccr->next = unavail_occrs; + unavail_occrs = unoccr; + if (! rollback_unoccr) + rollback_unoccr = unoccr; + } + } + + if (/* No load can be replaced by copy. */ + npred_ok == 0 + /* Prevent exploding the code. */ + || (optimize_size && npred_ok > 1)) + goto cleanup; + + /* Check if it's worth applying the partial redundancy elimination. */ + if (ok_count < GCSE_AFTER_RELOAD_PARTIAL_FRACTION * not_ok_count) + goto cleanup; + if (ok_count < GCSE_AFTER_RELOAD_CRITICAL_FRACTION * critical_count) + goto cleanup; + + /* Generate moves to the loaded register from where + the memory is available. */ + for (occr = avail_occrs; occr; occr = occr->next) + { + avail_insn = occr->insn; + pred = occr->pred; + /* Set avail_reg to be the register having the value of the + memory. */ + avail_reg = get_avail_load_store_reg (avail_insn); + if (! avail_reg) + abort (); + + insert_insn_on_edge (gen_move_insn (copy_rtx (dest), + copy_rtx (avail_reg)), + pred); + stats.moves_inserted++; + + if (dump_file) + fprintf (dump_file, + "generating move from %d to %d on edge from %d to %d\n", + REGNO (avail_reg), + REGNO (dest), + pred->src->index, + pred->dest->index); + } + + /* Regenerate loads where the memory is unavailable. */ + for (unoccr = unavail_occrs; unoccr; unoccr = unoccr->next) + { + pred = unoccr->pred; + insert_insn_on_edge (copy_insn (PATTERN (insn)), pred); + stats.copies_inserted++; + + if (dump_file) + { + fprintf (dump_file, + "generating on edge from %d to %d a copy of load: ", + pred->src->index, + pred->dest->index); + print_rtl (dump_file, PATTERN (insn)); + fprintf (dump_file, "\n"); + } + } + + /* Delete the insn if it is not available in this block and mark it + for deletion if it is available. If insn is available it may help + discover additional redundancies, so mark it for later deletion. */ + for (a_occr = get_bb_avail_insn (bb, expr->avail_occr); + a_occr && (a_occr->insn != insn); + a_occr = get_bb_avail_insn (bb, a_occr->next)); + + if (!a_occr) + delete_insn (insn); + else + a_occr->deleted_p = 1; + +cleanup: + if (rollback_unoccr) + obstack_free (&unoccr_obstack, rollback_unoccr); +} + +/* Performing the redundancy elimination as described before. */ + +static void +eliminate_partially_redundant_loads (void) +{ + rtx insn; + basic_block bb; + + /* Note we start at block 1. */ + + if (ENTRY_BLOCK_PTR->next_bb == EXIT_BLOCK_PTR) + return; + + FOR_BB_BETWEEN (bb, + ENTRY_BLOCK_PTR->next_bb->next_bb, + EXIT_BLOCK_PTR, + next_bb) + { + if (! bb_has_well_behaved_predecessors (bb)) + continue; + + /* Do not try this optimization on cold basic blocks. */ + if (probably_cold_bb_p (bb)) + continue; + + reset_opr_set_tables (); + + FOR_BB_INSNS (bb, insn) + { + /* Is it a load - of the form (set (reg) (mem))? */ + if (NONJUMP_INSN_P (insn) + && GET_CODE (PATTERN (insn)) == SET + && REG_P (SET_DEST (PATTERN (insn))) + && MEM_P (SET_SRC (PATTERN (insn)))) + { + rtx pat = PATTERN (insn); + rtx src = SET_SRC (pat); + struct expr *expr; + + if (!MEM_VOLATILE_P (src) + && GET_MODE (src) != BLKmode + && general_operand (src, GET_MODE (src)) + /* Are the operands unchanged since the start of the + block? */ + && oprs_unchanged_p (src, insn, false) + && !(flag_non_call_exceptions && may_trap_p (src)) + && !side_effects_p (src) + /* Is the expression recorded? */ + && (expr = lookup_expr_in_table (src)) != NULL) + { + /* We now have a load (insn) and an available memory at + its BB start (expr). Try to remove the loads if it is + redundant. */ + eliminate_partially_redundant_load (bb, insn, expr); + } + } + + /* Keep track of everything modified by this insn. */ + if (INSN_P (insn)) + mark_oprs_set (insn); + } + } + + commit_edge_insertions (); +} + +/* Go over the expression hash table and delete insns that were + marked for later deletion. */ + +/* This helper is called via htab_traverse. */ +static int +delete_redundant_insns_1 (void **slot, void *data ATTRIBUTE_UNUSED) +{ + struct expr *expr = (struct expr *) *slot; + struct occr *occr; + + for (occr = expr->avail_occr; occr != NULL; occr = occr->next) + { + if (occr->deleted_p) + { + delete_insn (occr->insn); + stats.insns_deleted++; + + if (dump_file) + { + fprintf (dump_file, "deleting insn:\n"); + print_rtl_single (dump_file, occr->insn); + fprintf (dump_file, "\n"); + } + } + } + + return 1; +} + +static void +delete_redundant_insns (void) +{ + htab_traverse (expr_table, delete_redundant_insns_1, NULL); + if (dump_file) + fprintf (dump_file, "\n"); +} + +/* Main entry point of the GCSE after reload - clean some redundant loads + due to spilling. */ + +void +gcse_after_reload_main (rtx f ATTRIBUTE_UNUSED) +{ + memset (&stats, 0, sizeof (stats)); + + /* Allocate ememory for this pass. + Also computes and initializes the insns' CUIDs. */ + alloc_mem (); + + /* We need alias analysis. */ + init_alias_analysis (); + + compute_hash_table (); + + if (dump_file) + dump_hash_table (dump_file); + + if (htab_elements (expr_table) > 0) + { + eliminate_partially_redundant_loads (); + delete_redundant_insns (); + + if (dump_file) + { + fprintf (dump_file, "GCSE AFTER RELOAD stats:\n"); + fprintf (dump_file, "copies inserted: %d\n", stats.copies_inserted); + fprintf (dump_file, "moves inserted: %d\n", stats.moves_inserted); + fprintf (dump_file, "insns deleted: %d\n", stats.insns_deleted); + fprintf (dump_file, "\n\n"); + } + } + + /* We are finished with alias. */ + end_alias_analysis (); + + free_mem (); +} + diff --git a/gcc/rtl.h b/gcc/rtl.h index 0b75b99..cebc616 100644 --- a/gcc/rtl.h +++ b/gcc/rtl.h @@ -2119,6 +2119,8 @@ extern int rtx_to_tree_code (enum rtx_code); extern int delete_trivially_dead_insns (rtx, int); extern int cse_main (rtx, int, int, FILE *); extern void cse_condition_code_reg (void); +extern int exp_equiv_p (rtx, rtx, int, bool); +extern unsigned hash_rtx (rtx x, enum machine_mode, int *, int *, bool); /* In jump.c */ extern int comparison_dominates_p (enum rtx_code, enum rtx_code); @@ -2265,7 +2267,9 @@ extern bool can_copy_p (enum machine_mode); extern rtx fis_get_condition (rtx); extern int gcse_main (rtx, FILE *); extern int bypass_jumps (FILE *); -extern void gcse_after_reload_main (rtx, FILE *); + +/* In postreload-gcse.c */ +extern void gcse_after_reload_main (rtx); /* In global.c */ extern void mark_elimination (int, int); diff --git a/gcc/timevar.def b/gcc/timevar.def index 617f26b..14591b5 100644 --- a/gcc/timevar.def +++ b/gcc/timevar.def @@ -122,6 +122,7 @@ DEFTIMEVAR (TV_SCHED , "scheduling") DEFTIMEVAR (TV_LOCAL_ALLOC , "local alloc") DEFTIMEVAR (TV_GLOBAL_ALLOC , "global alloc") DEFTIMEVAR (TV_RELOAD_CSE_REGS , "reload CSE regs") +DEFTIMEVAR (TV_GCSE_AFTER_RELOAD , "load CSE after reload") DEFTIMEVAR (TV_FLOW2 , "flow 2") DEFTIMEVAR (TV_IFCVT2 , "if-conversion 2") DEFTIMEVAR (TV_PEEPHOLE2 , "peephole 2") -- cgit v1.1