aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorZdenek Dvorak <dvorakz@suse.cz>2004-10-27 22:27:20 +0200
committerZdenek Dvorak <rakdver@gcc.gnu.org>2004-10-27 20:27:20 +0000
commit38b0dcb81effec70e61bb1e652c0cab254245115 (patch)
tree36323e61beca852219425c346a9efad5a6b0dfb5 /gcc
parent89e73849fd8fe510b1e5b8b91b5ce1211d377f95 (diff)
downloadgcc-38b0dcb81effec70e61bb1e652c0cab254245115.zip
gcc-38b0dcb81effec70e61bb1e652c0cab254245115.tar.gz
gcc-38b0dcb81effec70e61bb1e652c0cab254245115.tar.bz2
re PR tree-optimization/18048 (mgrid loop performance regression with ivopts (register pressure))
PR tree-optimization/18048 * fold-const.c (try_move_mult_to_index): New function. (fold): Use try_move_mult_to_index. * tree-ssa-loop-ivopts.c (try_add_cand_for): Prefer common candidates. * tree-ssa-loop-niter.c (number_of_iterations_cond): Produce an all-ones unsigned constant without extra bits. * tree.c (build_low_bits_mask): New function. * tree.h (build_low_bits_mask): Declare. From-SVN: r89708
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog11
-rw-r--r--gcc/fold-const.c107
-rw-r--r--gcc/tree-ssa-loop-ivopts.c53
-rw-r--r--gcc/tree-ssa-loop-niter.c7
-rw-r--r--gcc/tree.c34
-rw-r--r--gcc/tree.h1
6 files changed, 204 insertions, 9 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 96089fb..e517dd3 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,14 @@
+2004-10-27 Zdenek Dvorak <dvorakz@suse.cz>
+
+ PR tree-optimization/18048
+ * fold-const.c (try_move_mult_to_index): New function.
+ (fold): Use try_move_mult_to_index.
+ * tree-ssa-loop-ivopts.c (try_add_cand_for): Prefer common candidates.
+ * tree-ssa-loop-niter.c (number_of_iterations_cond): Produce
+ an all-ones unsigned constant without extra bits.
+ * tree.c (build_low_bits_mask): New function.
+ * tree.h (build_low_bits_mask): Declare.
+
2004-10-27 David Edelsohn <edelsohn@gnu.org>
PR target/17956
diff --git a/gcc/fold-const.c b/gcc/fold-const.c
index 1dba1fb..00892f4 100644
--- a/gcc/fold-const.c
+++ b/gcc/fold-const.c
@@ -5967,6 +5967,84 @@ tree_swap_operands_p (tree arg0, tree arg1, bool reorder)
return 0;
}
+/* Tries to replace &a[idx] CODE s * delta with &a[idx CODE delta], if s is
+ step of the array. TYPE is the type of the expression. ADDR is the address.
+ MULT is the multiplicative expression. If the function succeeds, the new
+ address expression is returned. Otherwise NULL_TREE is returned. */
+
+static tree
+try_move_mult_to_index (tree type, enum tree_code code, tree addr, tree mult)
+{
+ tree s, delta, step;
+ tree arg0 = TREE_OPERAND (mult, 0), arg1 = TREE_OPERAND (mult, 1);
+ tree ref = TREE_OPERAND (addr, 0), pref;
+ tree ret, pos;
+ tree itype;
+
+ STRIP_NOPS (arg0);
+ STRIP_NOPS (arg1);
+
+ if (TREE_CODE (arg0) == INTEGER_CST)
+ {
+ s = arg0;
+ delta = arg1;
+ }
+ else if (TREE_CODE (arg1) == INTEGER_CST)
+ {
+ s = arg1;
+ delta = arg0;
+ }
+ else
+ return NULL_TREE;
+
+ for (;; ref = TREE_OPERAND (ref, 0))
+ {
+ if (TREE_CODE (ref) == ARRAY_REF)
+ {
+ step = array_ref_element_size (ref);
+
+ if (TREE_CODE (step) != INTEGER_CST)
+ continue;
+
+ itype = TREE_TYPE (step);
+
+ /* If the type sizes do not match, we might run into problems
+ when one of them would overflow. */
+ if (TYPE_PRECISION (itype) != TYPE_PRECISION (type))
+ continue;
+
+ if (!operand_equal_p (step, fold_convert (itype, s), 0))
+ continue;
+
+ delta = fold_convert (itype, delta);
+ break;
+ }
+
+ if (!handled_component_p (ref))
+ return NULL_TREE;
+ }
+
+ /* We found the suitable array reference. So copy everything up to it,
+ and replace the index. */
+
+ pref = TREE_OPERAND (addr, 0);
+ ret = copy_node (pref);
+ pos = ret;
+
+ while (pref != ref)
+ {
+ pref = TREE_OPERAND (pref, 0);
+ TREE_OPERAND (pos, 0) = copy_node (pref);
+ pos = TREE_OPERAND (pos, 0);
+ }
+
+ TREE_OPERAND (pos, 1) = fold (build2 (code, itype,
+ TREE_OPERAND (pos, 1),
+ delta));
+
+ return build1 (ADDR_EXPR, type, ret);
+}
+
/* Perform constant folding and related simplification of EXPR.
The related simplifications include x*1 => x, x*0 => 0, etc.,
and application of the associative law.
@@ -6602,6 +6680,24 @@ fold (tree expr)
alt0, alt1)),
same));
}
+
+ /* Try replacing &a[i1] + c * i2 with &a[i1 + i2], if c is step
+ of the array. Loop optimizer sometimes produce this type of
+ expressions. */
+ if (TREE_CODE (arg0) == ADDR_EXPR
+ && TREE_CODE (arg1) == MULT_EXPR)
+ {
+ tem = try_move_mult_to_index (type, PLUS_EXPR, arg0, arg1);
+ if (tem)
+ return fold (tem);
+ }
+ else if (TREE_CODE (arg1) == ADDR_EXPR
+ && TREE_CODE (arg0) == MULT_EXPR)
+ {
+ tem = try_move_mult_to_index (type, PLUS_EXPR, arg1, arg0);
+ if (tem)
+ return fold (tem);
+ }
}
else
{
@@ -6974,6 +7070,17 @@ fold (tree expr)
&diff))
return build_int_cst_type (type, diff);
}
+
+ /* Try replacing &a[i1] - c * i2 with &a[i1 - i2], if c is step
+ of the array. Loop optimizer sometimes produce this type of
+ expressions. */
+ if (TREE_CODE (arg0) == ADDR_EXPR
+ && TREE_CODE (arg1) == MULT_EXPR)
+ {
+ tem = try_move_mult_to_index (type, MINUS_EXPR, arg0, arg1);
+ if (tem)
+ return fold (tem);
+ }
if (TREE_CODE (arg0) == MULT_EXPR
&& TREE_CODE (arg1) == MULT_EXPR
diff --git a/gcc/tree-ssa-loop-ivopts.c b/gcc/tree-ssa-loop-ivopts.c
index a620aca..c0c800a7 100644
--- a/gcc/tree-ssa-loop-ivopts.c
+++ b/gcc/tree-ssa-loop-ivopts.c
@@ -3603,20 +3603,32 @@ try_add_cand_for (struct ivopts_data *data, bitmap ivs, bitmap inv,
bitmap act_inv = BITMAP_XMALLOC ();
unsigned i;
struct cost_pair *cp;
+ bitmap_iterator bi;
+ struct iv_cand *cand;
+ bitmap depends_on;
bitmap_copy (best_ivs, ivs);
bitmap_copy (best_inv, inv);
- for (i = 0; i < use->n_map_members; i++)
+ /* First try important candidates. Only if it fails, try the specific ones.
+ Rationale -- in loops with many variables the best choice often is to use
+ just one generic biv. If we added here many ivs specific to the uses,
+ the optimization algorithm later would be likely to get stuck in a local
+ minimum, thus causing us to create too many ivs. The approach from
+ few ivs to more seems more likely to be succesful -- starting from few
+ ivs, replacing an expensive use by a specific iv should always be a
+ win. */
+ EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
{
- cp = use->cost_map + i;
- if (cp->cost == INFTY)
+ cand = iv_cand (data, i);
+
+ if (get_use_iv_cost (data, use, cand, &depends_on) == INFTY)
continue;
bitmap_copy (act_ivs, ivs);
- bitmap_set_bit (act_ivs, cp->cand->id);
- if (cp->depends_on)
- bitmap_a_or_b (act_inv, inv, cp->depends_on);
+ bitmap_set_bit (act_ivs, cand->id);
+ if (depends_on)
+ bitmap_a_or_b (act_inv, inv, depends_on);
else
bitmap_copy (act_inv, inv);
act_cost = set_cost_up_to (data, act_ivs, act_inv, use->id + 1);
@@ -3629,6 +3641,35 @@ try_add_cand_for (struct ivopts_data *data, bitmap ivs, bitmap inv,
}
}
+ if (best_cost == INFTY)
+ {
+ for (i = 0; i < use->n_map_members; i++)
+ {
+ cp = use->cost_map + i;
+ if (cp->cost == INFTY)
+ continue;
+
+ /* Already tried this. */
+ if (cp->cand->important)
+ continue;
+
+ bitmap_copy (act_ivs, ivs);
+ bitmap_set_bit (act_ivs, cp->cand->id);
+ if (cp->depends_on)
+ bitmap_a_or_b (act_inv, inv, cp->depends_on);
+ else
+ bitmap_copy (act_inv, inv);
+ act_cost = set_cost_up_to (data, act_ivs, act_inv, use->id + 1);
+
+ if (act_cost < best_cost)
+ {
+ best_cost = act_cost;
+ bitmap_copy (best_ivs, act_ivs);
+ bitmap_copy (best_inv, act_inv);
+ }
+ }
+ }
+
bitmap_copy (ivs, best_ivs);
bitmap_copy (inv, best_inv);
diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c
index b879b9e..496b3f3 100644
--- a/gcc/tree-ssa-loop-niter.c
+++ b/gcc/tree-ssa-loop-niter.c
@@ -419,9 +419,10 @@ number_of_iterations_cond (tree type, tree base0, tree step0,
d = EXEC_BINARY (LSHIFT_EXPR, niter_type,
build_int_cst_type (niter_type, 1), bits);
s = EXEC_BINARY (RSHIFT_EXPR, niter_type, step0, bits);
- bound = EXEC_BINARY (RSHIFT_EXPR, niter_type,
- build_int_cst (niter_type, -1),
- bits);
+
+ bound = build_low_bits_mask (niter_type,
+ (TYPE_PRECISION (niter_type)
+ - tree_low_cst (bits, 1)));
assumption = fold (build2 (FLOOR_MOD_EXPR, niter_type, base1, d));
assumption = fold (build2 (EQ_EXPR, boolean_type_node,
diff --git a/gcc/tree.c b/gcc/tree.c
index 18dec5e..9531e69 100644
--- a/gcc/tree.c
+++ b/gcc/tree.c
@@ -609,6 +609,40 @@ build_int_cst_wide (tree type, unsigned HOST_WIDE_INT low, HOST_WIDE_INT hi)
return t;
}
+/* Builds an integer constant in TYPE such that lowest BITS bits are ones
+ and the rest are zeros. */
+
+tree
+build_low_bits_mask (tree type, unsigned bits)
+{
+ unsigned HOST_WIDE_INT low;
+ HOST_WIDE_INT high;
+ unsigned HOST_WIDE_INT all_ones = ~(unsigned HOST_WIDE_INT) 0;
+
+ gcc_assert (bits <= TYPE_PRECISION (type));
+
+ if (bits == TYPE_PRECISION (type)
+ && !TYPE_UNSIGNED (type))
+ {
+ /* Sign extended all-ones mask. */
+ low = all_ones;
+ high = -1;
+ }
+ else if (bits <= HOST_BITS_PER_WIDE_INT)
+ {
+ low = all_ones >> (HOST_BITS_PER_WIDE_INT - bits);
+ high = 0;
+ }
+ else
+ {
+ bits -= HOST_BITS_PER_WIDE_INT;
+ low = all_ones;
+ high = all_ones >> (HOST_BITS_PER_WIDE_INT - bits);
+ }
+
+ return build_int_cst_wide (type, low, high);
+}
+
/* Checks that X is integer constant that can be expressed in (unsigned)
HOST_WIDE_INT without loss of precision. */
diff --git a/gcc/tree.h b/gcc/tree.h
index e0a8fc7..31a5903e 100644
--- a/gcc/tree.h
+++ b/gcc/tree.h
@@ -3543,6 +3543,7 @@ tree fold_build_cleanup_point_expr (tree type, tree expr);
extern tree build_fold_addr_expr_with_type (tree, tree);
extern tree build_fold_indirect_ref (tree);
extern tree constant_boolean_node (int, tree);
+extern tree build_low_bits_mask (tree, unsigned);
extern bool tree_swap_operands_p (tree, tree, bool);
extern enum tree_code swap_tree_comparison (enum tree_code);