diff options
author | Zdenek Dvorak <dvorakz@suse.cz> | 2004-10-27 22:27:20 +0200 |
---|---|---|
committer | Zdenek Dvorak <rakdver@gcc.gnu.org> | 2004-10-27 20:27:20 +0000 |
commit | 38b0dcb81effec70e61bb1e652c0cab254245115 (patch) | |
tree | 36323e61beca852219425c346a9efad5a6b0dfb5 /gcc | |
parent | 89e73849fd8fe510b1e5b8b91b5ce1211d377f95 (diff) | |
download | gcc-38b0dcb81effec70e61bb1e652c0cab254245115.zip gcc-38b0dcb81effec70e61bb1e652c0cab254245115.tar.gz gcc-38b0dcb81effec70e61bb1e652c0cab254245115.tar.bz2 |
re PR tree-optimization/18048 (mgrid loop performance regression with ivopts (register pressure))
PR tree-optimization/18048
* fold-const.c (try_move_mult_to_index): New function.
(fold): Use try_move_mult_to_index.
* tree-ssa-loop-ivopts.c (try_add_cand_for): Prefer common candidates.
* tree-ssa-loop-niter.c (number_of_iterations_cond): Produce
an all-ones unsigned constant without extra bits.
* tree.c (build_low_bits_mask): New function.
* tree.h (build_low_bits_mask): Declare.
From-SVN: r89708
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 11 | ||||
-rw-r--r-- | gcc/fold-const.c | 107 | ||||
-rw-r--r-- | gcc/tree-ssa-loop-ivopts.c | 53 | ||||
-rw-r--r-- | gcc/tree-ssa-loop-niter.c | 7 | ||||
-rw-r--r-- | gcc/tree.c | 34 | ||||
-rw-r--r-- | gcc/tree.h | 1 |
6 files changed, 204 insertions, 9 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 96089fb..e517dd3 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,14 @@ +2004-10-27 Zdenek Dvorak <dvorakz@suse.cz> + + PR tree-optimization/18048 + * fold-const.c (try_move_mult_to_index): New function. + (fold): Use try_move_mult_to_index. + * tree-ssa-loop-ivopts.c (try_add_cand_for): Prefer common candidates. + * tree-ssa-loop-niter.c (number_of_iterations_cond): Produce + an all-ones unsigned constant without extra bits. + * tree.c (build_low_bits_mask): New function. + * tree.h (build_low_bits_mask): Declare. + 2004-10-27 David Edelsohn <edelsohn@gnu.org> PR target/17956 diff --git a/gcc/fold-const.c b/gcc/fold-const.c index 1dba1fb..00892f4 100644 --- a/gcc/fold-const.c +++ b/gcc/fold-const.c @@ -5967,6 +5967,84 @@ tree_swap_operands_p (tree arg0, tree arg1, bool reorder) return 0; } +/* Tries to replace &a[idx] CODE s * delta with &a[idx CODE delta], if s is + step of the array. TYPE is the type of the expression. ADDR is the address. + MULT is the multiplicative expression. If the function succeeds, the new + address expression is returned. Otherwise NULL_TREE is returned. */ + +static tree +try_move_mult_to_index (tree type, enum tree_code code, tree addr, tree mult) +{ + tree s, delta, step; + tree arg0 = TREE_OPERAND (mult, 0), arg1 = TREE_OPERAND (mult, 1); + tree ref = TREE_OPERAND (addr, 0), pref; + tree ret, pos; + tree itype; + + STRIP_NOPS (arg0); + STRIP_NOPS (arg1); + + if (TREE_CODE (arg0) == INTEGER_CST) + { + s = arg0; + delta = arg1; + } + else if (TREE_CODE (arg1) == INTEGER_CST) + { + s = arg1; + delta = arg0; + } + else + return NULL_TREE; + + for (;; ref = TREE_OPERAND (ref, 0)) + { + if (TREE_CODE (ref) == ARRAY_REF) + { + step = array_ref_element_size (ref); + + if (TREE_CODE (step) != INTEGER_CST) + continue; + + itype = TREE_TYPE (step); + + /* If the type sizes do not match, we might run into problems + when one of them would overflow. */ + if (TYPE_PRECISION (itype) != TYPE_PRECISION (type)) + continue; + + if (!operand_equal_p (step, fold_convert (itype, s), 0)) + continue; + + delta = fold_convert (itype, delta); + break; + } + + if (!handled_component_p (ref)) + return NULL_TREE; + } + + /* We found the suitable array reference. So copy everything up to it, + and replace the index. */ + + pref = TREE_OPERAND (addr, 0); + ret = copy_node (pref); + pos = ret; + + while (pref != ref) + { + pref = TREE_OPERAND (pref, 0); + TREE_OPERAND (pos, 0) = copy_node (pref); + pos = TREE_OPERAND (pos, 0); + } + + TREE_OPERAND (pos, 1) = fold (build2 (code, itype, + TREE_OPERAND (pos, 1), + delta)); + + return build1 (ADDR_EXPR, type, ret); +} + /* Perform constant folding and related simplification of EXPR. The related simplifications include x*1 => x, x*0 => 0, etc., and application of the associative law. @@ -6602,6 +6680,24 @@ fold (tree expr) alt0, alt1)), same)); } + + /* Try replacing &a[i1] + c * i2 with &a[i1 + i2], if c is step + of the array. Loop optimizer sometimes produce this type of + expressions. */ + if (TREE_CODE (arg0) == ADDR_EXPR + && TREE_CODE (arg1) == MULT_EXPR) + { + tem = try_move_mult_to_index (type, PLUS_EXPR, arg0, arg1); + if (tem) + return fold (tem); + } + else if (TREE_CODE (arg1) == ADDR_EXPR + && TREE_CODE (arg0) == MULT_EXPR) + { + tem = try_move_mult_to_index (type, PLUS_EXPR, arg1, arg0); + if (tem) + return fold (tem); + } } else { @@ -6974,6 +7070,17 @@ fold (tree expr) &diff)) return build_int_cst_type (type, diff); } + + /* Try replacing &a[i1] - c * i2 with &a[i1 - i2], if c is step + of the array. Loop optimizer sometimes produce this type of + expressions. */ + if (TREE_CODE (arg0) == ADDR_EXPR + && TREE_CODE (arg1) == MULT_EXPR) + { + tem = try_move_mult_to_index (type, MINUS_EXPR, arg0, arg1); + if (tem) + return fold (tem); + } if (TREE_CODE (arg0) == MULT_EXPR && TREE_CODE (arg1) == MULT_EXPR diff --git a/gcc/tree-ssa-loop-ivopts.c b/gcc/tree-ssa-loop-ivopts.c index a620aca..c0c800a7 100644 --- a/gcc/tree-ssa-loop-ivopts.c +++ b/gcc/tree-ssa-loop-ivopts.c @@ -3603,20 +3603,32 @@ try_add_cand_for (struct ivopts_data *data, bitmap ivs, bitmap inv, bitmap act_inv = BITMAP_XMALLOC (); unsigned i; struct cost_pair *cp; + bitmap_iterator bi; + struct iv_cand *cand; + bitmap depends_on; bitmap_copy (best_ivs, ivs); bitmap_copy (best_inv, inv); - for (i = 0; i < use->n_map_members; i++) + /* First try important candidates. Only if it fails, try the specific ones. + Rationale -- in loops with many variables the best choice often is to use + just one generic biv. If we added here many ivs specific to the uses, + the optimization algorithm later would be likely to get stuck in a local + minimum, thus causing us to create too many ivs. The approach from + few ivs to more seems more likely to be succesful -- starting from few + ivs, replacing an expensive use by a specific iv should always be a + win. */ + EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi) { - cp = use->cost_map + i; - if (cp->cost == INFTY) + cand = iv_cand (data, i); + + if (get_use_iv_cost (data, use, cand, &depends_on) == INFTY) continue; bitmap_copy (act_ivs, ivs); - bitmap_set_bit (act_ivs, cp->cand->id); - if (cp->depends_on) - bitmap_a_or_b (act_inv, inv, cp->depends_on); + bitmap_set_bit (act_ivs, cand->id); + if (depends_on) + bitmap_a_or_b (act_inv, inv, depends_on); else bitmap_copy (act_inv, inv); act_cost = set_cost_up_to (data, act_ivs, act_inv, use->id + 1); @@ -3629,6 +3641,35 @@ try_add_cand_for (struct ivopts_data *data, bitmap ivs, bitmap inv, } } + if (best_cost == INFTY) + { + for (i = 0; i < use->n_map_members; i++) + { + cp = use->cost_map + i; + if (cp->cost == INFTY) + continue; + + /* Already tried this. */ + if (cp->cand->important) + continue; + + bitmap_copy (act_ivs, ivs); + bitmap_set_bit (act_ivs, cp->cand->id); + if (cp->depends_on) + bitmap_a_or_b (act_inv, inv, cp->depends_on); + else + bitmap_copy (act_inv, inv); + act_cost = set_cost_up_to (data, act_ivs, act_inv, use->id + 1); + + if (act_cost < best_cost) + { + best_cost = act_cost; + bitmap_copy (best_ivs, act_ivs); + bitmap_copy (best_inv, act_inv); + } + } + } + bitmap_copy (ivs, best_ivs); bitmap_copy (inv, best_inv); diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c index b879b9e..496b3f3 100644 --- a/gcc/tree-ssa-loop-niter.c +++ b/gcc/tree-ssa-loop-niter.c @@ -419,9 +419,10 @@ number_of_iterations_cond (tree type, tree base0, tree step0, d = EXEC_BINARY (LSHIFT_EXPR, niter_type, build_int_cst_type (niter_type, 1), bits); s = EXEC_BINARY (RSHIFT_EXPR, niter_type, step0, bits); - bound = EXEC_BINARY (RSHIFT_EXPR, niter_type, - build_int_cst (niter_type, -1), - bits); + + bound = build_low_bits_mask (niter_type, + (TYPE_PRECISION (niter_type) + - tree_low_cst (bits, 1))); assumption = fold (build2 (FLOOR_MOD_EXPR, niter_type, base1, d)); assumption = fold (build2 (EQ_EXPR, boolean_type_node, @@ -609,6 +609,40 @@ build_int_cst_wide (tree type, unsigned HOST_WIDE_INT low, HOST_WIDE_INT hi) return t; } +/* Builds an integer constant in TYPE such that lowest BITS bits are ones + and the rest are zeros. */ + +tree +build_low_bits_mask (tree type, unsigned bits) +{ + unsigned HOST_WIDE_INT low; + HOST_WIDE_INT high; + unsigned HOST_WIDE_INT all_ones = ~(unsigned HOST_WIDE_INT) 0; + + gcc_assert (bits <= TYPE_PRECISION (type)); + + if (bits == TYPE_PRECISION (type) + && !TYPE_UNSIGNED (type)) + { + /* Sign extended all-ones mask. */ + low = all_ones; + high = -1; + } + else if (bits <= HOST_BITS_PER_WIDE_INT) + { + low = all_ones >> (HOST_BITS_PER_WIDE_INT - bits); + high = 0; + } + else + { + bits -= HOST_BITS_PER_WIDE_INT; + low = all_ones; + high = all_ones >> (HOST_BITS_PER_WIDE_INT - bits); + } + + return build_int_cst_wide (type, low, high); +} + /* Checks that X is integer constant that can be expressed in (unsigned) HOST_WIDE_INT without loss of precision. */ @@ -3543,6 +3543,7 @@ tree fold_build_cleanup_point_expr (tree type, tree expr); extern tree build_fold_addr_expr_with_type (tree, tree); extern tree build_fold_indirect_ref (tree); extern tree constant_boolean_node (int, tree); +extern tree build_low_bits_mask (tree, unsigned); extern bool tree_swap_operands_p (tree, tree, bool); extern enum tree_code swap_tree_comparison (enum tree_code); |