diff options
-rw-r--r-- | gcc/ChangeLog | 34 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/tree-ssa/ivopt_1.c | 18 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/tree-ssa/ivopt_2.c | 17 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/tree-ssa/ivopt_3.c | 20 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/tree-ssa/ivopt_4.c | 19 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/tree-ssa/ivopt_infer_1.c | 25 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/tree-ssa/ivopt_infer_2.c | 25 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/tree-ssa/ivopt_mult_1.c | 24 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/tree-ssa/ivopt_mult_2.c | 25 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/tree-ssa/ivopt_mult_3.c | 22 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/tree-ssa/ivopt_mult_4.c | 25 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/tree-ssa/ivopts-3.c | 2 | ||||
-rw-r--r-- | gcc/tree-ssa-loop-ivopts.c | 430 |
13 files changed, 628 insertions, 58 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index d5f931d..10059b0 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,37 @@ +2010-07-28 Xinliang David Li <davidxl@google.com> + + * tree-ssa-loop-ivopts.c (avg_loop_niter): New function. + (dump_cand): Dump var_before/after. + (htab_inv_expr_eq): New function. + (htab_inv_expr_hash): New function. + (tree_ssa_iv_optimize_init): Support pseudo invariants. + (add_candidate_1): consider base type precision. + (set_use_iv_cost): New parameter. + (adjust_setup_cost): Use profile information. + (get_address_cost): Do not hard code width in computing address + offset limits. + (compare_aff_trees): New function. + (get_loop_invariant_expr_id): New function. + (get_computation_cost_at): New parameter and use profile information. + (get_computation_cost): New parameter. + (determine_use_iv_cost_generic): Pass new parameter. + (determine_use_iv_cost_address): Ditto. + (determine_use_iv_cost_condition): Ditto. + (autoinc_possible_for_pair): Ditto. + (determine_use_iv_costs): More dumps. + (iv_ca_get_num_inv_exprs): New function. + (iv_ca_recount_cost): Consider loop invariants in register pressure + cost. + (iv_ca_add_use): New parameter. + (iv_ca_dump): Better dumping. + (iv_ca_extend): New parameter. + (try_add_cand_for): Attempt to get better partial solution. + (try_improve_iv_set): Pass new parameter to iv_ca_extend. + (create_new-ivs): More dumps. + (rewrite_use_compare): Ditto. + (free_loop_data): More cleanup. + (treee_ssa_iv_optimize_finalize): Ditto. + 2010-07-28 Kai Tietz <kai.tietz@onevision.com> * config/i386/i386.h (MCOUNT_NAME_BEFORE_PROLOGUE): New. diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ivopt_1.c b/gcc/testsuite/gcc.dg/tree-ssa/ivopt_1.c new file mode 100644 index 0000000..60baa4b --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/ivopt_1.c @@ -0,0 +1,18 @@ +/* { dg-do compile { target {{ i?86-*-* x86_64-*-* } && lp64 } } } */ +/* { dg-options "-O2 -m64 -fdump-tree-ivopts" } */ +#define TYPE char* + +/* Testing that only one induction variable is selected after IVOPT on + the given target instead of 3. */ +void foo (int i_width, TYPE dst, TYPE src1, TYPE src2) +{ + int x; + for( x = 0; x < i_width; x++ ) + { + dst[x] = ( src1[x] + src2[x] + 1 ) >> 1; + } +} + + +/* { dg-final { scan-tree-dump-times "PHI <ivtmp" 1 "ivopts"} } */ +/* { dg-final { cleanup-tree-dump "ivopts" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ivopt_2.c b/gcc/testsuite/gcc.dg/tree-ssa/ivopt_2.c new file mode 100644 index 0000000..ba87b50 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/ivopt_2.c @@ -0,0 +1,17 @@ +/* { dg-do compile { target {{ i?86-*-* x86_64-*-* } && lp64 } } } */ +/* { dg-options "-O2 -m64 -fdump-tree-ivopts" } */ + +#define TYPE char* + +/* Testing on the given target, only one iv candidate instead of 3. */ +void foo (int i_width, TYPE dst, TYPE src1, TYPE src2) +{ + int x; + for( x = 0; x < i_width; x++ ) + { + *dst++ = ( *src1++ + *src2++ + 1 ) >> 1; + } +} + +/* { dg-final { scan-tree-dump-times "PHI <ivtmp" 1 "ivopts"} } */ +/* { dg-final { cleanup-tree-dump "ivopts" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ivopt_3.c b/gcc/testsuite/gcc.dg/tree-ssa/ivopt_3.c new file mode 100644 index 0000000..ae4185a --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/ivopt_3.c @@ -0,0 +1,20 @@ +/* { dg-do compile { target {{ i?86-*-* x86_64-*-* } && lp64 } } } */ +/* { dg-options "-O2 -m64 -fdump-tree-ivopts" } */ + +#define TYPE char* + +/* Make sure only 1 iv candidate is selected after IVOPT. */ +void foo (int i_width, char* dst, char* src1, char* src2) +{ + int x; + for( x = 0; x < i_width; x++ ) + { + *((TYPE)dst) = ( *((TYPE)src1) + *((TYPE)src2) + 1 ) >> 1; + dst+=sizeof(TYPE); + src1+=sizeof(TYPE); + src2+=sizeof(TYPE); + } +} + +/* { dg-final { scan-tree-dump-times "PHI <ivtmp" 1 "ivopts"} } */ +/* { dg-final { cleanup-tree-dump "ivopts" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ivopt_4.c b/gcc/testsuite/gcc.dg/tree-ssa/ivopt_4.c new file mode 100644 index 0000000..570664c9 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/ivopt_4.c @@ -0,0 +1,19 @@ +/* { dg-do compile { target {{ i?86-*-* x86_64-*-* } && lp64 } } } */ +/* { dg-options "-O2 -m64 -fdump-tree-ivopts" } */ + +#ifndef TYPE +#define TYPE char* +#endif + +/* Make sure only 1 iv candidate is selected. */ +void foo (int i_width, TYPE dst, TYPE src1, TYPE src2) +{ + TYPE dstn= dst + i_width; + for( ; dst < dstn; ) + { + *dst++ = ( *src1++ + *src2++ + 1 ) >> 1; + } +} + +/* { dg-final { scan-tree-dump-times "PHI <ivtmp" 1 "ivopts"} } */ +/* { dg-final { cleanup-tree-dump "ivopts" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ivopt_infer_1.c b/gcc/testsuite/gcc.dg/tree-ssa/ivopt_infer_1.c new file mode 100644 index 0000000..076f5118e --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/ivopt_infer_1.c @@ -0,0 +1,25 @@ +/* { dg-do compile { target {{ i?86-*-* x86_64-*-* } && lp64 } } } */ +/* { dg-options "-O2 -m64 -fdump-tree-ivopts-details" } */ + +#ifndef TYPE +#define TYPE char* +#endif + +int a[400]; + +/* Testing inferred loop iteration from array -- exit test can be replaced. */ +void foo (int i_width, TYPE dst, TYPE src1, TYPE src2) +{ + TYPE dstn= dst + i_width; + TYPE dst0 = dst; + unsigned long long i = 0; + for( ; dst <= dstn; ) + { + dst0[i] = ( src1[i] + src2[i] + 1 +a[i]) >> 1; + dst++; + i += 16; + } +} + +/* { dg-final { scan-tree-dump-times "Replacing" 1 "ivopts"} } */ +/* { dg-final { cleanup-tree-dump "ivopts" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ivopt_infer_2.c b/gcc/testsuite/gcc.dg/tree-ssa/ivopt_infer_2.c new file mode 100644 index 0000000..4b7e197 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/ivopt_infer_2.c @@ -0,0 +1,25 @@ +/* { dg-do compile { target {{ i?86-*-* x86_64-*-* } && lp64 } } } */ +/* { dg-options "-O2 -m64 -fdump-tree-ivopts-details" } */ + +#ifndef TYPE +#define TYPE char* +#endif + +extern int a[]; + +/* Can not infer loop iteration from array -- exit test can not be replaced. */ +void foo (int i_width, TYPE dst, TYPE src1, TYPE src2) +{ + TYPE dstn= dst + i_width; + TYPE dst0 = dst; + unsigned long long i = 0; + for( ; dst <= dstn; ) + { + dst0[i] = ( src1[i] + src2[i] + 1 +a[i]) >> 1; + dst++; + i += 16; + } +} + +/* { dg-final { scan-tree-dump-times "Replacing" 0 "ivopts"} } */ +/* { dg-final { cleanup-tree-dump "ivopts" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ivopt_mult_1.c b/gcc/testsuite/gcc.dg/tree-ssa/ivopt_mult_1.c new file mode 100644 index 0000000..4e19dfd --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/ivopt_mult_1.c @@ -0,0 +1,24 @@ +/* { dg-do compile { target {{ i?86-*-* x86_64-*-* } && lp64 } } } */ +/* { dg-options "-O2 -m64 -fdump-tree-ivopts-details" } */ + +/* The test 'if (p2 > p_limit2)' can be replaced, so iv p2 can be + * eliminated. */ +long foo(long* p, long* p2, int N1, int N2) +{ + int i = 0; + long* p_limit = p + N1; + long* p_limit2 = p2 + N2; + long s = 0; + while (p <= p_limit) + { + p++; + p2++; + if (p2 > p_limit2) + break; + s += (*p); + } + return s; +} + +/* { dg-final { scan-tree-dump-times "Replacing" 1 "ivopts"} } */ +/* { dg-final { cleanup-tree-dump "ivopts" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ivopt_mult_2.c b/gcc/testsuite/gcc.dg/tree-ssa/ivopt_mult_2.c new file mode 100644 index 0000000..5e38df6 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/ivopt_mult_2.c @@ -0,0 +1,25 @@ +/* { dg-do compile { target {{ i?86-*-* x86_64-*-* } && lp64 } } } */ +/* { dg-options "-O2 -m64 -fdump-tree-ivopts-details" } */ + +/* Exit tests 'i < N1' and 'p2 > p_limit2' can be replaced, so + * two ivs i and p2 can be eliminate. */ +long foo(long* p, long* p2, int N1, int N2) +{ + int i = 0; + long* p_limit2 = p2 + N2; + long s = 0; + while (i < N1) + { + p++; + p2++; + i++; + if (p2 > p_limit2) + break; + s += (*p); + } + + return s; +} + +/* { dg-final { scan-tree-dump-times "Replacing" 2 "ivopts"} } */ +/* { dg-final { cleanup-tree-dump "ivopts" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ivopt_mult_3.c b/gcc/testsuite/gcc.dg/tree-ssa/ivopt_mult_3.c new file mode 100644 index 0000000..dc78a43 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/ivopt_mult_3.c @@ -0,0 +1,22 @@ +/* { dg-do compile { target {{ i?86-*-* x86_64-*-* } && lp64 } } } */ +/* { dg-options "-O2 -m64 -fdump-tree-ivopts-details" } */ + +/* iv p2 can be eliminated. */ +long foo(long* p, long* p2, int N1, int N2) +{ + unsigned long i = 0; + long* p_limit2 = p2 + N2; + long s = 0; + while (i < N1) + { + p2++; + i++; + if (p2 > p_limit2) + break; + s += p[i]; + } + return s; +} + +/* { dg-final { scan-tree-dump-times "Replacing" 1 "ivopts"} } */ +/* { dg-final { cleanup-tree-dump "ivopts" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ivopt_mult_4.c b/gcc/testsuite/gcc.dg/tree-ssa/ivopt_mult_4.c new file mode 100644 index 0000000..d2aa78d --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/ivopt_mult_4.c @@ -0,0 +1,25 @@ + +/* { dg-do compile { target {{ i?86-*-* x86_64-*-* } && lp64 } } } */ +/* { dg-options "-O2 -m64 -fdump-tree-ivopts-details" } */ + +/* iv i's step 16 so its period is smaller than the max iterations + * i.e. replacing if (p2 > p_limit2) with testing of i may result in + * overflow. */ +long foo(long* p, long* p2, int N1, int N2) +{ + unsigned long i = 0; + long* p_limit2 = p2 + N2; + long s = 0; + while (i < N1) + { + p2++; + i += 16; + if (p2 > p_limit2) + break; + s += p[i]; + } + return s; +} + +/* { dg-final { scan-tree-dump-times "Replacing" 0 "ivopts"} } */ +/* { dg-final { cleanup-tree-dump "ivopts" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ivopts-3.c b/gcc/testsuite/gcc.dg/tree-ssa/ivopts-3.c index 7c4236b..202ad1f 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/ivopts-3.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/ivopts-3.c @@ -8,5 +8,5 @@ void main (void) f2 (); } -/* { dg-final { scan-tree-dump-times "!= 0" 4 "ivopts" } } */ +/* { dg-final { scan-tree-dump-times "!= 0" 5 "ivopts" } } */ /* { dg-final { cleanup-tree-dump "ivopts" } } */ diff --git a/gcc/tree-ssa-loop-ivopts.c b/gcc/tree-ssa-loop-ivopts.c index 478a019..519f66e 100644 --- a/gcc/tree-ssa-loop-ivopts.c +++ b/gcc/tree-ssa-loop-ivopts.c @@ -89,6 +89,7 @@ along with GCC; see the file COPYING3. If not see #include "langhooks.h" #include "tree-affine.h" #include "target.h" +#include "tree-inline.h" #include "tree-ssa-propagate.h" /* FIXME: Expressions are expanded to RTL in this pass to determine the @@ -99,10 +100,21 @@ along with GCC; see the file COPYING3. If not see /* The infinite cost. */ #define INFTY 10000000 -/* The expected number of loop iterations. TODO -- use profiling instead of - this. */ #define AVG_LOOP_NITER(LOOP) 5 +/* Returns the expected number of loop iterations for LOOP. + The average trip count is computed from profile data if it + exists. */ + +static inline HOST_WIDE_INT +avg_loop_niter (struct loop *loop) +{ + HOST_WIDE_INT niter = estimated_loop_iterations_int (loop, false); + if (niter == -1) + return AVG_LOOP_NITER (loop); + + return niter; +} /* Representation of the induction variable. */ struct iv @@ -158,6 +170,7 @@ struct cost_pair tree value; /* For final value elimination, the expression for the final value of the iv. For iv elimination, the new bound to compare with. */ + int inv_expr_id; /* Loop invariant expression id. */ }; /* Use. */ @@ -212,6 +225,14 @@ struct iv_cand biv. */ }; +/* Loop invariant expression hashtable entry. */ +struct iv_inv_expr_ent +{ + tree expr; + int id; + hashval_t hash; +}; + /* The data used by the induction variable optimizations. */ typedef struct iv_use *iv_use_p; @@ -239,6 +260,13 @@ struct ivopts_data /* The array of information for the ssa names. */ struct version_info *version_info; + /* The hashtable of loop invariant expressions created + by ivopt. */ + htab_t inv_expr_tab; + + /* Loop invariant expression id. */ + int inv_expr_id; + /* The bitmap of indices in version_info whose value was changed. */ bitmap relevant; @@ -520,6 +548,19 @@ dump_cand (FILE *file, struct iv_cand *cand) return; } + if (cand->var_before) + { + fprintf (file, " var_before "); + print_generic_expr (file, cand->var_before, TDF_SLIM); + fprintf (file, "\n"); + } + if (cand->var_after) + { + fprintf (file, " var_after "); + print_generic_expr (file, cand->var_after, TDF_SLIM); + fprintf (file, "\n"); + } + switch (cand->pos) { case IP_NORMAL: @@ -776,6 +817,29 @@ niter_for_single_dom_exit (struct ivopts_data *data) return niter_for_exit (data, exit, NULL); } +/* Hash table equality function for expressions. */ + +static int +htab_inv_expr_eq (const void *ent1, const void *ent2) +{ + const struct iv_inv_expr_ent *expr1 = + (const struct iv_inv_expr_ent *)ent1; + const struct iv_inv_expr_ent *expr2 = + (const struct iv_inv_expr_ent *)ent2; + + return operand_equal_p (expr1->expr, expr2->expr, 0); +} + +/* Hash function for loop invariant expressions. */ + +static hashval_t +htab_inv_expr_hash (const void *ent) +{ + const struct iv_inv_expr_ent *expr = + (const struct iv_inv_expr_ent *)ent; + return expr->hash; +} + /* Initializes data structures used by the iv optimization pass, stored in DATA. */ @@ -790,6 +854,9 @@ tree_ssa_iv_optimize_init (struct ivopts_data *data) data->niters = NULL; data->iv_uses = VEC_alloc (iv_use_p, heap, 20); data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20); + data->inv_expr_tab = htab_create (10, htab_inv_expr_hash, + htab_inv_expr_eq, free); + data->inv_expr_id = 0; decl_rtl_to_reset = VEC_alloc (tree, heap, 20); } @@ -1840,7 +1907,7 @@ find_interesting_uses_outside (struct ivopts_data *data, edge exit) phi = gsi_stmt (psi); def = PHI_ARG_DEF_FROM_EDGE (phi, exit); if (is_gimple_reg (def)) - find_interesting_uses_op (data, def); + find_interesting_uses_op (data, def); } } @@ -2157,7 +2224,9 @@ add_candidate_1 (struct ivopts_data *data, continue; if (operand_equal_p (base, cand->iv->base, 0) - && operand_equal_p (step, cand->iv->step, 0)) + && operand_equal_p (step, cand->iv->step, 0) + && (TYPE_PRECISION (TREE_TYPE (base)) + == TYPE_PRECISION (TREE_TYPE (cand->iv->base)))) break; } @@ -2571,7 +2640,8 @@ infinite_cost_p (comp_cost cost) static void set_use_iv_cost (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand, - comp_cost cost, bitmap depends_on, tree value) + comp_cost cost, bitmap depends_on, tree value, + int inv_expr_id) { unsigned i, s; @@ -2587,6 +2657,7 @@ set_use_iv_cost (struct ivopts_data *data, use->cost_map[cand->id].cost = cost; use->cost_map[cand->id].depends_on = depends_on; use->cost_map[cand->id].value = value; + use->cost_map[cand->id].inv_expr_id = inv_expr_id; return; } @@ -2606,6 +2677,7 @@ found: use->cost_map[i].cost = cost; use->cost_map[i].depends_on = depends_on; use->cost_map[i].value = value; + use->cost_map[i].inv_expr_id = inv_expr_id; } /* Gets cost of (USE, CANDIDATE) pair. */ @@ -2956,7 +3028,7 @@ adjust_setup_cost (struct ivopts_data *data, unsigned cost) if (cost == INFTY) return cost; else if (optimize_loop_for_speed_p (data->current_loop)) - return cost / AVG_LOOP_NITER (data->current_loop); + return cost / avg_loop_niter (data->current_loop); else return cost; } @@ -3171,7 +3243,7 @@ get_address_cost (bool symbol_present, bool var_present, HOST_WIDE_INT i; HOST_WIDE_INT start = BIGGEST_ALIGNMENT / BITS_PER_UNIT; HOST_WIDE_INT rat, off; - int old_cse_not_expected; + int old_cse_not_expected, width; unsigned sym_p, var_p, off_p, rat_p, add_c; rtx seq, addr, base; rtx reg0, reg1; @@ -3180,8 +3252,10 @@ get_address_cost (bool symbol_present, bool var_present, reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1); + width = (GET_MODE_BITSIZE (address_mode) < HOST_BITS_PER_WIDE_INT - 2) + ? GET_MODE_BITSIZE (address_mode) : HOST_BITS_PER_WIDE_INT - 2; addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX); - for (i = start; i <= 1 << 20; i <<= 1) + for (i = start; i <= 1ll << width; i <<= 1) { XEXP (addr, 1) = gen_int_mode (i, address_mode); if (!memory_address_addr_space_p (mem_mode, addr, as)) @@ -3190,7 +3264,7 @@ get_address_cost (bool symbol_present, bool var_present, data->max_offset = i == start ? 0 : i >> 1; off = data->max_offset; - for (i = start; i <= 1 << 20; i <<= 1) + for (i = start; i <= 1ll << width; i <<= 1) { XEXP (addr, 1) = gen_int_mode (-i, address_mode); if (!memory_address_addr_space_p (mem_mode, addr, as)) @@ -3201,12 +3275,12 @@ get_address_cost (bool symbol_present, bool var_present, if (dump_file && (dump_flags & TDF_DETAILS)) { fprintf (dump_file, "get_address_cost:\n"); - fprintf (dump_file, " min offset %s %d\n", + fprintf (dump_file, " min offset %s " HOST_WIDE_INT_PRINT_DEC "\n", GET_MODE_NAME (mem_mode), - (int) data->min_offset); - fprintf (dump_file, " max offset %s %d\n", + data->min_offset); + fprintf (dump_file, " max offset %s " HOST_WIDE_INT_PRINT_DEC "\n", GET_MODE_NAME (mem_mode), - (int) data->max_offset); + data->max_offset); } rat = 1; @@ -3717,6 +3791,144 @@ difference_cost (struct ivopts_data *data, return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on); } +/* Returns true if AFF1 and AFF2 are identical. */ + +static bool +compare_aff_trees (aff_tree *aff1, aff_tree *aff2) +{ + unsigned i; + + if (aff1->n != aff2->n) + return false; + + for (i = 0; i < aff1->n; i++) + { + if (double_int_cmp (aff1->elts[i].coef, aff2->elts[i].coef, 0) != 0) + return false; + + if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0)) + return false; + } + return true; +} + +/* Returns the pseudo expr id if expression UBASE - RATIO * CBASE + requires a new compiler generated temporary. Returns -1 otherwise. + ADDRESS_P is a flag indicating if the expression is for address + computation. */ + +static int +get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase, + tree cbase, HOST_WIDE_INT ratio, + bool address_p) +{ + aff_tree ubase_aff, cbase_aff; + tree expr, ub, cb; + struct iv_inv_expr_ent ent; + struct iv_inv_expr_ent **slot; + + STRIP_NOPS (ubase); + STRIP_NOPS (cbase); + ub = ubase; + cb = cbase; + + if ((TREE_CODE (ubase) == INTEGER_CST) + && (TREE_CODE (cbase) == INTEGER_CST)) + return -1; + + /* Strips the constant part. */ + if (TREE_CODE (ubase) == PLUS_EXPR + || TREE_CODE (ubase) == MINUS_EXPR + || TREE_CODE (ubase) == POINTER_PLUS_EXPR) + { + if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST) + ubase = TREE_OPERAND (ubase, 0); + } + + /* Strips the constant part. */ + if (TREE_CODE (cbase) == PLUS_EXPR + || TREE_CODE (cbase) == MINUS_EXPR + || TREE_CODE (cbase) == POINTER_PLUS_EXPR) + { + if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST) + cbase = TREE_OPERAND (cbase, 0); + } + + if (address_p) + { + if (((TREE_CODE (ubase) == SSA_NAME) + || (TREE_CODE (ubase) == ADDR_EXPR + && is_gimple_min_invariant (ubase))) + && (TREE_CODE (cbase) == INTEGER_CST)) + return -1; + + if (((TREE_CODE (cbase) == SSA_NAME) + || (TREE_CODE (cbase) == ADDR_EXPR + && is_gimple_min_invariant (cbase))) + && (TREE_CODE (ubase) == INTEGER_CST)) + return -1; + } + + if (ratio == 1) + { + if(operand_equal_p (ubase, cbase, 0)) + return -1; + + if (TREE_CODE (ubase) == ADDR_EXPR + && TREE_CODE (cbase) == ADDR_EXPR) + { + tree usym, csym; + + usym = TREE_OPERAND (ubase, 0); + csym = TREE_OPERAND (cbase, 0); + if (TREE_CODE (usym) == ARRAY_REF) + { + tree ind = TREE_OPERAND (usym, 1); + if (TREE_CODE (ind) == INTEGER_CST + && host_integerp (ind, 0) + && TREE_INT_CST_LOW (ind) == 0) + usym = TREE_OPERAND (usym, 0); + } + if (TREE_CODE (csym) == ARRAY_REF) + { + tree ind = TREE_OPERAND (csym, 1); + if (TREE_CODE (ind) == INTEGER_CST + && host_integerp (ind, 0) + && TREE_INT_CST_LOW (ind) == 0) + csym = TREE_OPERAND (csym, 0); + } + if (operand_equal_p (usym, csym, 0)) + return -1; + } + /* Now do more complex comparison */ + tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff); + tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff); + if (compare_aff_trees (&ubase_aff, &cbase_aff)) + return -1; + } + + tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff); + tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff); + + aff_combination_scale (&cbase_aff, shwi_to_double_int (-1 * ratio)); + aff_combination_add (&ubase_aff, &cbase_aff); + expr = aff_combination_to_tree (&ubase_aff); + ent.expr = expr; + ent.hash = iterative_hash_expr (expr, 0); + slot = (struct iv_inv_expr_ent **) htab_find_slot (data->inv_expr_tab, + &ent, INSERT); + if (*slot) + return (*slot)->id; + + *slot = XNEW (struct iv_inv_expr_ent); + (*slot)->expr = expr; + (*slot)->hash = ent.hash; + (*slot)->id = data->inv_expr_id++; + return (*slot)->id; +} + + + /* Determines the cost of the computation by that USE is expressed from induction variable CAND. If ADDRESS_P is true, we just need to create an address from it, otherwise we want to get it into @@ -3729,7 +3941,8 @@ static comp_cost get_computation_cost_at (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand, bool address_p, bitmap *depends_on, gimple at, - bool *can_autoinc) + bool *can_autoinc, + int *inv_expr_id) { tree ubase = use->iv->base, ustep = use->iv->step; tree cbase, cstep; @@ -3812,6 +4025,7 @@ get_computation_cost_at (struct ivopts_data *data, ubase, build_int_cst (utype, 0), &symbol_present, &var_present, &offset, depends_on); + cost.cost /= avg_loop_niter (data->current_loop); } else if (ratio == 1) { @@ -3819,6 +4033,7 @@ get_computation_cost_at (struct ivopts_data *data, ubase, cbase, &symbol_present, &var_present, &offset, depends_on); + cost.cost /= avg_loop_niter (data->current_loop); } else if (address_p && !POINTER_TYPE_P (ctype) @@ -3832,16 +4047,27 @@ get_computation_cost_at (struct ivopts_data *data, ubase, cbase, &symbol_present, &var_present, &offset, depends_on); + cost.cost /= avg_loop_niter (data->current_loop); } else { cost = force_var_cost (data, cbase, depends_on); - cost.cost += add_cost (TYPE_MODE (ctype), data->speed); cost = add_costs (cost, difference_cost (data, ubase, build_int_cst (utype, 0), &symbol_present, &var_present, &offset, depends_on)); + cost.cost /= avg_loop_niter (data->current_loop); + cost.cost += add_cost (TYPE_MODE (ctype), data->speed); + } + + if (inv_expr_id) + { + *inv_expr_id = + get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p); + /* Clear depends on. */ + if (*inv_expr_id != -1 && depends_on && *depends_on) + bitmap_clear (*depends_on); } /* If we are after the increment, the value of the candidate is higher by @@ -3916,11 +4142,12 @@ fallback: static comp_cost get_computation_cost (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand, - bool address_p, bitmap *depends_on, bool *can_autoinc) + bool address_p, bitmap *depends_on, + bool *can_autoinc, int *inv_expr_id) { return get_computation_cost_at (data, use, cand, address_p, depends_on, use->stmt, - can_autoinc); + can_autoinc, inv_expr_id); } /* Determines cost of basing replacement of USE on CAND in a generic @@ -3932,6 +4159,7 @@ determine_use_iv_cost_generic (struct ivopts_data *data, { bitmap depends_on; comp_cost cost; + int inv_expr_id = -1; /* The simple case first -- if we need to express value of the preserved original biv, the cost is 0. This also prevents us from counting the @@ -3940,12 +4168,15 @@ determine_use_iv_cost_generic (struct ivopts_data *data, if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt) { - set_use_iv_cost (data, use, cand, zero_cost, NULL, NULL_TREE); + set_use_iv_cost (data, use, cand, zero_cost, NULL, NULL_TREE, -1); return true; } - cost = get_computation_cost (data, use, cand, false, &depends_on, NULL); - set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE); + cost = get_computation_cost (data, use, cand, false, &depends_on, + NULL, &inv_expr_id); + + set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, + inv_expr_id); return !infinite_cost_p (cost); } @@ -3958,8 +4189,9 @@ determine_use_iv_cost_address (struct ivopts_data *data, { bitmap depends_on; bool can_autoinc; + int inv_expr_id = -1; comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on, - &can_autoinc); + &can_autoinc, &inv_expr_id); if (cand->ainc_use == use) { @@ -3971,7 +4203,8 @@ determine_use_iv_cost_address (struct ivopts_data *data, else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE) cost = infinite_cost; } - set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE); + set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, + inv_expr_id); return !infinite_cost_p (cost); } @@ -4151,12 +4384,13 @@ determine_use_iv_cost_condition (struct ivopts_data *data, bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on; comp_cost elim_cost, express_cost, cost; bool ok; + int inv_expr_id = -1; tree *control_var, *bound_cst; /* Only consider real candidates. */ if (!cand->iv) { - set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE); + set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE, -1); return false; } @@ -4190,7 +4424,8 @@ determine_use_iv_cost_condition (struct ivopts_data *data, elim_cost.cost -= 1; express_cost = get_computation_cost (data, use, cand, false, - &depends_on_express, NULL); + &depends_on_express, NULL, + &inv_expr_id); fd_ivopts_data = data; walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL); @@ -4209,7 +4444,7 @@ determine_use_iv_cost_condition (struct ivopts_data *data, bound = NULL_TREE; } - set_use_iv_cost (data, use, cand, cost, depends_on, bound); + set_use_iv_cost (data, use, cand, cost, depends_on, bound, inv_expr_id); if (depends_on_elim) BITMAP_FREE (depends_on_elim); @@ -4257,7 +4492,7 @@ autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use, return false; cost = get_computation_cost (data, use, cand, true, &depends_on, - &can_autoinc); + &can_autoinc, NULL); BITMAP_FREE (depends_on); @@ -4381,6 +4616,8 @@ determine_use_iv_costs (struct ivopts_data *data) if (use->cost_map[j].depends_on) bitmap_print (dump_file, use->cost_map[j].depends_on, "",""); + if (use->cost_map[j].inv_expr_id != -1) + fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id); fprintf (dump_file, "\n"); } @@ -4556,14 +4793,54 @@ cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b) return false; } + +/* Returns candidate by that USE is expressed in IVS. */ + +static struct cost_pair * +iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use) +{ + return ivs->cand_for_use[use->id]; +} + + +/* Returns the number of temps needed for new loop invariant + expressions. */ + +static int +iv_ca_get_num_inv_exprs (struct ivopts_data *data, struct iv_ca *ivs) +{ + unsigned i, n = 0; + unsigned *used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1); + + for (i = 0; i < ivs->upto; i++) + { + struct iv_use *use = iv_use (data, i); + struct cost_pair *cp = iv_ca_cand_for_use (ivs, use); + if (cp && cp->inv_expr_id != -1) + { + used_inv_expr[cp->inv_expr_id]++; + if (used_inv_expr[cp->inv_expr_id] == 1) + n++; + } + } + + free (used_inv_expr); + return n; +} + /* Computes the cost field of IVS structure. */ static void iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs) { + unsigned n_inv_exprs = 0; comp_cost cost = ivs->cand_use_cost; + cost.cost += ivs->cand_cost; - cost.cost += ivopts_global_cost_for_size (data, ivs->n_regs); + + n_inv_exprs = iv_ca_get_num_inv_exprs (data, ivs); + cost.cost += ivopts_global_cost_for_size (data, + ivs->n_regs + n_inv_exprs); ivs->cost = cost; } @@ -4583,7 +4860,7 @@ iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs) { ivs->n_invariant_uses[iid]--; if (ivs->n_invariant_uses[iid] == 0) - ivs->n_regs--; + ivs->n_regs--; } } @@ -4638,7 +4915,7 @@ iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs) { ivs->n_invariant_uses[iid]++; if (ivs->n_invariant_uses[iid] == 1) - ivs->n_regs++; + ivs->n_regs++; } } @@ -4682,14 +4959,16 @@ iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs, } /* Extend set IVS by expressing USE by some of the candidates in it - if possible. */ + if possible. All important candidates will be considered + if IMPORTANT_CANDIDATES is true. */ static void iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs, - struct iv_use *use) + struct iv_use *use, bool important_candidates) { struct cost_pair *best_cp = NULL, *cp; bitmap_iterator bi; + bitmap cands; unsigned i; gcc_assert (ivs->upto >= use->id); @@ -4700,9 +4979,12 @@ iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs, ivs->bad_uses++; } - EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi) + cands = (important_candidates ? data->important_candidates : ivs->cands); + EXECUTE_IF_SET_IN_BITMAP (cands, 0, i, bi) { - cp = get_use_iv_cost (data, use, iv_cand (data, i)); + struct iv_cand *cand = iv_cand (data, i); + + cp = get_use_iv_cost (data, use, cand); if (cheaper_cost_pair (cp, best_cp)) best_cp = cp; @@ -4782,14 +5064,6 @@ iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2) return l1; } -/* Returns candidate by that USE is expressed in IVS. */ - -static struct cost_pair * -iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use) -{ - return ivs->cand_for_use[use->id]; -} - /* Reverse the list of changes DELTA, forming the inverse to it. */ static struct iv_ca_delta * @@ -4913,8 +5187,21 @@ iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs) unsigned i; comp_cost cost = iv_ca_cost (ivs); - fprintf (file, " cost %d (complexity %d)\n", cost.cost, cost.complexity); - bitmap_print (file, ivs->cands, " candidates ","\n"); + fprintf (file, " cost: %d (complexity %d)\n", cost.cost, cost.complexity); + fprintf (file, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n", + ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity); + bitmap_print (file, ivs->cands, " candidates: ","\n"); + + for (i = 0; i < ivs->upto; i++) + { + struct iv_use *use = iv_use (data, i); + struct cost_pair *cp = iv_ca_cand_for_use (ivs, use); + if (cp) + fprintf (file, " use:%d --> iv_cand:%d, cost=(%d,%d)\n", + use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity); + else + fprintf (file, " use:%d --> ??\n", use->id); + } for (i = 1; i <= data->max_inv_id; i++) if (ivs->n_invariant_uses[i]) @@ -4922,17 +5209,18 @@ iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs) fprintf (file, "%s%d", pref, i); pref = ", "; } - fprintf (file, "\n"); + fprintf (file, "\n\n"); } /* Try changing candidate in IVS to CAND for each use. Return cost of the new set, and store differences in DELTA. Number of induction variables - in the new set is stored to N_IVS. */ + in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true + the function will try to find a solution with mimimal iv candidates. */ static comp_cost iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs, struct iv_cand *cand, struct iv_ca_delta **delta, - unsigned *n_ivs) + unsigned *n_ivs, bool min_ncand) { unsigned i; comp_cost cost; @@ -4953,11 +5241,11 @@ iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs, if (!new_cp) continue; - if (!iv_ca_has_deps (ivs, new_cp)) + if (!min_ncand && !iv_ca_has_deps (ivs, new_cp)) continue; - if (!cheaper_cost_pair (new_cp, old_cp)) - continue; + if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp)) + continue; *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta); } @@ -5008,8 +5296,9 @@ iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs, cp = get_use_iv_cost (data, use, cnd); if (!cp) continue; + if (!iv_ca_has_deps (ivs, cp)) - continue; + continue; if (!cheaper_cost_pair (cp, new_cp)) continue; @@ -5121,10 +5410,18 @@ try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs, struct iv_ca_delta *best_delta = NULL, *act_delta; struct cost_pair *cp; - iv_ca_add_use (data, ivs, use); + iv_ca_add_use (data, ivs, use, false); best_cost = iv_ca_cost (ivs); cp = iv_ca_cand_for_use (ivs, use); + if (!cp) + { + ivs->upto--; + ivs->bad_uses--; + iv_ca_add_use (data, ivs, use, true); + best_cost = iv_ca_cost (ivs); + cp = iv_ca_cand_for_use (ivs, use); + } if (cp) { best_delta = iv_ca_delta_add (use, NULL, cp, NULL); @@ -5151,14 +5448,15 @@ try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs, continue; if (iv_ca_cand_used_p (ivs, cand)) - continue; + continue; cp = get_use_iv_cost (data, use, cand); if (!cp) continue; iv_ca_set_cp (data, ivs, use, cp); - act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL); + act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, + true); iv_ca_set_no_cp (data, ivs, use); act_delta = iv_ca_delta_add (use, NULL, cp, act_delta); @@ -5196,7 +5494,7 @@ try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs, act_delta = NULL; iv_ca_set_cp (data, ivs, use, cp); - act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL); + act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true); iv_ca_set_no_cp (data, ivs, use); act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use), cp, act_delta); @@ -5256,7 +5554,7 @@ try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs) if (iv_ca_cand_used_p (ivs, cand)) continue; - acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs); + acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false); if (!act_delta) continue; @@ -5416,7 +5714,6 @@ create_new_iv (struct ivopts_data *data, struct iv_cand *cand) /* Rewrite the increment so that it uses var_before directly. */ find_interesting_uses_op (data, cand->var_after)->selected = cand; - return; } @@ -5444,8 +5741,18 @@ create_new_ivs (struct ivopts_data *data, struct iv_ca *set) cand = iv_cand (data, i); create_new_iv (data, cand); } -} + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "\nSelected IV set: \n"); + EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi) + { + cand = iv_cand (data, i); + dump_cand (dump_file, cand); + } + fprintf (dump_file, "\n"); + } +} /* Rewrites USE (definition of iv used in a nonlinear expression) using candidate CAND. */ @@ -5795,6 +6102,11 @@ rewrite_use_compare (struct ivopts_data *data, tree var_type = TREE_TYPE (var); gimple_seq stmts; + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Replacing exit test: "); + print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM); + } compare = iv_elimination_compare (data, use); bound = unshare_expr (fold_convert (var_type, bound)); op = force_gimple_operand (bound, &stmts, true, NULL_TREE); @@ -5979,6 +6291,9 @@ free_loop_data (struct ivopts_data *data) SET_DECL_RTL (obj, NULL_RTX); VEC_truncate (tree, decl_rtl_to_reset, 0); + + htab_empty (data->inv_expr_tab); + data->inv_expr_id = 0; } /* Finalizes data structures used by the iv optimization pass. LOOPS is the @@ -5995,6 +6310,7 @@ tree_ssa_iv_optimize_finalize (struct ivopts_data *data) VEC_free (tree, heap, decl_rtl_to_reset); VEC_free (iv_use_p, heap, data->iv_uses); VEC_free (iv_cand_p, heap, data->iv_candidates); + htab_delete (data->inv_expr_tab); } /* Returns true if the loop body BODY includes any function calls. */ |