aboutsummaryrefslogtreecommitdiff
path: root/gcc/tree-ssa-loop-niter.cc
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/tree-ssa-loop-niter.cc')
-rw-r--r--gcc/tree-ssa-loop-niter.cc412
1 files changed, 404 insertions, 8 deletions
diff --git a/gcc/tree-ssa-loop-niter.cc b/gcc/tree-ssa-loop-niter.cc
index 58a9d05..65b9604 100644
--- a/gcc/tree-ssa-loop-niter.cc
+++ b/gcc/tree-ssa-loop-niter.cc
@@ -42,6 +42,7 @@ along with GCC; see the file COPYING3. If not see
#include "tree-chrec.h"
#include "tree-scalar-evolution.h"
#include "tree-dfa.h"
+#include "internal-fn.h"
#include "gimple-range.h"
@@ -2032,11 +2033,18 @@ static tree
build_popcount_expr (tree src)
{
tree fn;
+ bool use_ifn = false;
int prec = TYPE_PRECISION (TREE_TYPE (src));
int i_prec = TYPE_PRECISION (integer_type_node);
int li_prec = TYPE_PRECISION (long_integer_type_node);
int lli_prec = TYPE_PRECISION (long_long_integer_type_node);
- if (prec <= i_prec)
+
+ tree utype = unsigned_type_for (TREE_TYPE (src));
+ src = fold_convert (utype, src);
+
+ if (direct_internal_fn_supported_p (IFN_POPCOUNT, utype, OPTIMIZE_FOR_BOTH))
+ use_ifn = true;
+ else if (prec <= i_prec)
fn = builtin_decl_implicit (BUILT_IN_POPCOUNT);
else if (prec == li_prec)
fn = builtin_decl_implicit (BUILT_IN_POPCOUNTL);
@@ -2045,12 +2053,11 @@ build_popcount_expr (tree src)
else
return NULL_TREE;
- tree utype = unsigned_type_for (TREE_TYPE (src));
- src = fold_convert (utype, src);
- if (prec < i_prec)
- src = fold_convert (unsigned_type_node, src);
tree call;
- if (prec == 2 * lli_prec)
+ if (use_ifn)
+ call = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_POPCOUNT,
+ integer_type_node, 1, src);
+ else if (prec == 2 * lli_prec)
{
tree src1 = fold_convert (long_long_unsigned_type_node,
fold_build2 (RSHIFT_EXPR, TREE_TYPE (src),
@@ -2063,7 +2070,12 @@ build_popcount_expr (tree src)
call = fold_build2 (PLUS_EXPR, integer_type_node, call1, call2);
}
else
- call = build_call_expr (fn, 1, src);
+ {
+ if (prec < i_prec)
+ src = fold_convert (unsigned_type_node, src);
+
+ call = build_call_expr (fn, 1, src);
+ }
return call;
}
@@ -2198,6 +2210,385 @@ number_of_iterations_popcount (loop_p loop, edge exit,
return true;
}
+/* Return an expression that counts the leading/trailing zeroes of src.
+
+ If define_at_zero is true, then the built expression will be defined to
+ return the precision of src when src == 0 (using either a conditional
+ expression or a suitable internal function).
+ Otherwise, we can elide the conditional expression and let src = 0 invoke
+ undefined behaviour. */
+
+static tree
+build_cltz_expr (tree src, bool leading, bool define_at_zero)
+{
+ tree fn;
+ internal_fn ifn = leading ? IFN_CLZ : IFN_CTZ;
+ bool use_ifn = false;
+ int prec = TYPE_PRECISION (TREE_TYPE (src));
+ int i_prec = TYPE_PRECISION (integer_type_node);
+ int li_prec = TYPE_PRECISION (long_integer_type_node);
+ int lli_prec = TYPE_PRECISION (long_long_integer_type_node);
+
+ tree utype = unsigned_type_for (TREE_TYPE (src));
+ src = fold_convert (utype, src);
+
+ if (direct_internal_fn_supported_p (ifn, utype, OPTIMIZE_FOR_BOTH))
+ use_ifn = true;
+ else if (prec <= i_prec)
+ fn = leading ? builtin_decl_implicit (BUILT_IN_CLZ)
+ : builtin_decl_implicit (BUILT_IN_CTZ);
+ else if (prec == li_prec)
+ fn = leading ? builtin_decl_implicit (BUILT_IN_CLZL)
+ : builtin_decl_implicit (BUILT_IN_CTZL);
+ else if (prec == lli_prec || prec == 2 * lli_prec)
+ fn = leading ? builtin_decl_implicit (BUILT_IN_CLZLL)
+ : builtin_decl_implicit (BUILT_IN_CTZLL);
+ else
+ return NULL_TREE;
+
+ tree call;
+ if (use_ifn)
+ {
+ call = build_call_expr_internal_loc (UNKNOWN_LOCATION, ifn,
+ integer_type_node, 1, src);
+ int val;
+ scalar_int_mode mode = SCALAR_INT_TYPE_MODE (utype);
+ int optab_defined_at_zero
+ = leading ? CLZ_DEFINED_VALUE_AT_ZERO (mode, val)
+ : CTZ_DEFINED_VALUE_AT_ZERO (mode, val);
+ if (define_at_zero && !(optab_defined_at_zero == 2 && val == prec))
+ {
+ tree is_zero = fold_build2 (NE_EXPR, boolean_type_node, src,
+ build_zero_cst (TREE_TYPE (src)));
+ call = fold_build3(COND_EXPR, integer_type_node, is_zero, call,
+ build_int_cst (integer_type_node, prec));
+ }
+ }
+ else if (prec == 2 * lli_prec)
+ {
+ tree src1 = fold_convert (long_long_unsigned_type_node,
+ fold_build2 (RSHIFT_EXPR, TREE_TYPE (src),
+ unshare_expr (src),
+ build_int_cst (integer_type_node,
+ lli_prec)));
+ tree src2 = fold_convert (long_long_unsigned_type_node, src);
+ /* We count the zeroes in src1, and add the number in src2 when src1
+ is 0. */
+ if (!leading)
+ std::swap(src1, src2);
+ tree call1 = build_call_expr (fn, 1, src1);
+ tree call2 = build_call_expr (fn, 1, src2);
+ if (define_at_zero)
+ {
+ tree is_zero2 = fold_build2 (NE_EXPR, boolean_type_node, src2,
+ build_zero_cst (TREE_TYPE (src2)));
+ call2 = fold_build3(COND_EXPR, integer_type_node, is_zero2, call2,
+ build_int_cst (integer_type_node, lli_prec));
+ }
+ tree is_zero1 = fold_build2 (NE_EXPR, boolean_type_node, src1,
+ build_zero_cst (TREE_TYPE (src1)));
+ call = fold_build3(COND_EXPR, integer_type_node, is_zero1, call1,
+ fold_build2 (PLUS_EXPR, integer_type_node, call2,
+ build_int_cst (integer_type_node,
+ lli_prec)));
+ }
+ else
+ {
+ if (prec < i_prec)
+ src = fold_convert (unsigned_type_node, src);
+
+ call = build_call_expr (fn, 1, src);
+ if (define_at_zero)
+ {
+ tree is_zero = fold_build2 (NE_EXPR, boolean_type_node, src,
+ build_zero_cst (TREE_TYPE (src)));
+ call = fold_build3(COND_EXPR, integer_type_node, is_zero, call,
+ build_int_cst (integer_type_node, prec));
+ }
+
+ if (leading && prec < i_prec)
+ call = fold_build2(MINUS_EXPR, integer_type_node, call,
+ build_int_cst (integer_type_node,
+ i_prec - prec));
+ }
+
+ return call;
+}
+
+/* See comment below for number_of_iterations_bitcount.
+ For c[lt]z, we have:
+
+ modify:
+ iv_2 = iv_1 << 1 OR iv_1 >> 1
+
+ test:
+ if (iv & 1 << (prec-1)) OR (iv & 1)
+
+ modification count:
+ src precision - c[lt]z (src)
+
+ */
+
+static bool
+number_of_iterations_cltz (loop_p loop, edge exit,
+ enum tree_code code,
+ class tree_niter_desc *niter)
+{
+ bool modify_before_test = true;
+ HOST_WIDE_INT max;
+ int checked_bit;
+ tree iv_2;
+
+ /* Check that condition for staying inside the loop is like
+ if (iv == 0). */
+ gimple *cond_stmt = last_stmt (exit->src);
+ if (!cond_stmt
+ || gimple_code (cond_stmt) != GIMPLE_COND
+ || (code != EQ_EXPR && code != GE_EXPR)
+ || !integer_zerop (gimple_cond_rhs (cond_stmt))
+ || TREE_CODE (gimple_cond_lhs (cond_stmt)) != SSA_NAME)
+ return false;
+
+ if (code == EQ_EXPR)
+ {
+ /* Make sure we check a bitwise and with a suitable constant */
+ gimple *and_stmt = SSA_NAME_DEF_STMT (gimple_cond_lhs (cond_stmt));
+ if (!is_gimple_assign (and_stmt)
+ || gimple_assign_rhs_code (and_stmt) != BIT_AND_EXPR
+ || !integer_pow2p (gimple_assign_rhs2 (and_stmt)))
+ return false;
+
+ checked_bit = tree_log2 (gimple_assign_rhs2 (and_stmt));
+
+ iv_2 = gimple_assign_rhs1 (and_stmt);
+ }
+ else
+ {
+ /* We have a GE_EXPR - a signed comparison with zero is equivalent to
+ testing the leading bit, so check for this pattern too. */
+
+ iv_2 = gimple_cond_lhs (cond_stmt);
+ tree test_value_type = TREE_TYPE (iv_2);
+
+ if (TYPE_UNSIGNED (test_value_type))
+ return false;
+
+ gimple *test_value_stmt = SSA_NAME_DEF_STMT (iv_2);
+
+ if (is_gimple_assign (test_value_stmt)
+ && gimple_assign_rhs_code (test_value_stmt) == NOP_EXPR)
+ {
+ /* If the test value comes from a NOP_EXPR, then we need to unwrap
+ this. We conservatively require that both types have the same
+ precision. */
+ iv_2 = gimple_assign_rhs1 (test_value_stmt);
+ tree rhs_type = TREE_TYPE (iv_2);
+ if (TREE_CODE (rhs_type) != INTEGER_TYPE
+ || (TYPE_PRECISION (rhs_type)
+ != TYPE_PRECISION (test_value_type)))
+ return false;
+ }
+
+ checked_bit = TYPE_PRECISION (test_value_type) - 1;
+ }
+
+ gimple *iv_2_stmt = SSA_NAME_DEF_STMT (iv_2);
+
+ /* If the test comes before the iv modification, then these will actually be
+ iv_1 and a phi node. */
+ if (gimple_code (iv_2_stmt) == GIMPLE_PHI
+ && gimple_bb (iv_2_stmt) == loop->header
+ && gimple_phi_num_args (iv_2_stmt) == 2
+ && (TREE_CODE (gimple_phi_arg_def (iv_2_stmt,
+ loop_latch_edge (loop)->dest_idx))
+ == SSA_NAME))
+ {
+ /* iv_2 is actually one of the inputs to the phi. */
+ iv_2 = gimple_phi_arg_def (iv_2_stmt, loop_latch_edge (loop)->dest_idx);
+ iv_2_stmt = SSA_NAME_DEF_STMT (iv_2);
+ modify_before_test = false;
+ }
+
+ /* Make sure iv_2_stmt is a logical shift by one stmt:
+ iv_2 = iv_1 {<<|>>} 1 */
+ if (!is_gimple_assign (iv_2_stmt)
+ || (gimple_assign_rhs_code (iv_2_stmt) != LSHIFT_EXPR
+ && (gimple_assign_rhs_code (iv_2_stmt) != RSHIFT_EXPR
+ || !TYPE_UNSIGNED (TREE_TYPE (gimple_assign_lhs (iv_2_stmt)))))
+ || !integer_onep (gimple_assign_rhs2 (iv_2_stmt)))
+ return false;
+
+ bool left_shift = (gimple_assign_rhs_code (iv_2_stmt) == LSHIFT_EXPR);
+
+ tree iv_1 = gimple_assign_rhs1 (iv_2_stmt);
+
+ /* Check the recurrence. */
+ gimple *phi = SSA_NAME_DEF_STMT (iv_1);
+ if (gimple_code (phi) != GIMPLE_PHI
+ || (gimple_bb (phi) != loop_latch_edge (loop)->dest)
+ || (iv_2 != gimple_phi_arg_def (phi, loop_latch_edge (loop)->dest_idx)))
+ return false;
+
+ /* We found a match. */
+ tree src = gimple_phi_arg_def (phi, loop_preheader_edge (loop)->dest_idx);
+ int src_precision = TYPE_PRECISION (TREE_TYPE (src));
+
+ /* Apply any needed preprocessing to src. */
+ int num_ignored_bits;
+ if (left_shift)
+ num_ignored_bits = src_precision - checked_bit - 1;
+ else
+ num_ignored_bits = checked_bit;
+
+ if (modify_before_test)
+ num_ignored_bits++;
+
+ if (num_ignored_bits != 0)
+ src = fold_build2 (left_shift ? LSHIFT_EXPR : RSHIFT_EXPR,
+ TREE_TYPE (src), src,
+ build_int_cst (integer_type_node, num_ignored_bits));
+
+ /* Get the corresponding c[lt]z builtin. */
+ tree expr = build_cltz_expr (src, left_shift, false);
+
+ if (!expr)
+ return false;
+
+ max = src_precision - num_ignored_bits - 1;
+
+ expr = fold_convert (unsigned_type_node, expr);
+
+ tree assumptions = fold_build2 (NE_EXPR, boolean_type_node, src,
+ build_zero_cst (TREE_TYPE (src)));
+
+ niter->assumptions = simplify_using_initial_conditions (loop, assumptions);
+ niter->may_be_zero = boolean_false_node;
+ niter->niter = simplify_using_initial_conditions (loop, expr);
+
+ if (TREE_CODE (niter->niter) == INTEGER_CST)
+ niter->max = tree_to_uhwi (niter->niter);
+ else
+ niter->max = max;
+
+ niter->bound = NULL_TREE;
+ niter->cmp = ERROR_MARK;
+
+ return true;
+}
+
+/* See comment below for number_of_iterations_bitcount.
+ For c[lt]z complement, we have:
+
+ modify:
+ iv_2 = iv_1 >> 1 OR iv_1 << 1
+
+ test:
+ if (iv != 0)
+
+ modification count:
+ src precision - c[lt]z (src)
+
+ */
+
+static bool
+number_of_iterations_cltz_complement (loop_p loop, edge exit,
+ enum tree_code code,
+ class tree_niter_desc *niter)
+{
+ bool modify_before_test = true;
+ HOST_WIDE_INT max;
+
+ /* Check that condition for staying inside the loop is like
+ if (iv != 0). */
+ gimple *cond_stmt = last_stmt (exit->src);
+ if (!cond_stmt
+ || gimple_code (cond_stmt) != GIMPLE_COND
+ || code != NE_EXPR
+ || !integer_zerop (gimple_cond_rhs (cond_stmt))
+ || TREE_CODE (gimple_cond_lhs (cond_stmt)) != SSA_NAME)
+ return false;
+
+ tree iv_2 = gimple_cond_lhs (cond_stmt);
+ gimple *iv_2_stmt = SSA_NAME_DEF_STMT (iv_2);
+
+ /* If the test comes before the iv modification, then these will actually be
+ iv_1 and a phi node. */
+ if (gimple_code (iv_2_stmt) == GIMPLE_PHI
+ && gimple_bb (iv_2_stmt) == loop->header
+ && gimple_phi_num_args (iv_2_stmt) == 2
+ && (TREE_CODE (gimple_phi_arg_def (iv_2_stmt,
+ loop_latch_edge (loop)->dest_idx))
+ == SSA_NAME))
+ {
+ /* iv_2 is actually one of the inputs to the phi. */
+ iv_2 = gimple_phi_arg_def (iv_2_stmt, loop_latch_edge (loop)->dest_idx);
+ iv_2_stmt = SSA_NAME_DEF_STMT (iv_2);
+ modify_before_test = false;
+ }
+
+ /* Make sure iv_2_stmt is a logical shift by one stmt:
+ iv_2 = iv_1 {>>|<<} 1 */
+ if (!is_gimple_assign (iv_2_stmt)
+ || (gimple_assign_rhs_code (iv_2_stmt) != LSHIFT_EXPR
+ && (gimple_assign_rhs_code (iv_2_stmt) != RSHIFT_EXPR
+ || !TYPE_UNSIGNED (TREE_TYPE (gimple_assign_lhs (iv_2_stmt)))))
+ || !integer_onep (gimple_assign_rhs2 (iv_2_stmt)))
+ return false;
+
+ bool left_shift = (gimple_assign_rhs_code (iv_2_stmt) == LSHIFT_EXPR);
+
+ tree iv_1 = gimple_assign_rhs1 (iv_2_stmt);
+
+ /* Check the recurrence. */
+ gimple *phi = SSA_NAME_DEF_STMT (iv_1);
+ if (gimple_code (phi) != GIMPLE_PHI
+ || (gimple_bb (phi) != loop_latch_edge (loop)->dest)
+ || (iv_2 != gimple_phi_arg_def (phi, loop_latch_edge (loop)->dest_idx)))
+ return false;
+
+ /* We found a match. */
+ tree src = gimple_phi_arg_def (phi, loop_preheader_edge (loop)->dest_idx);
+ int src_precision = TYPE_PRECISION (TREE_TYPE (src));
+
+ /* Get the corresponding c[lt]z builtin. */
+ tree expr = build_cltz_expr (src, !left_shift, true);
+
+ if (!expr)
+ return false;
+
+ expr = fold_build2 (MINUS_EXPR, integer_type_node,
+ build_int_cst (integer_type_node, src_precision),
+ expr);
+
+ max = src_precision;
+
+ tree may_be_zero = boolean_false_node;
+
+ if (modify_before_test)
+ {
+ expr = fold_build2 (MINUS_EXPR, integer_type_node, expr,
+ integer_one_node);
+ max = max - 1;
+ may_be_zero = fold_build2 (EQ_EXPR, boolean_type_node, src,
+ build_zero_cst (TREE_TYPE (src)));
+ }
+
+ expr = fold_convert (unsigned_type_node, expr);
+
+ niter->assumptions = boolean_true_node;
+ niter->may_be_zero = simplify_using_initial_conditions (loop, may_be_zero);
+ niter->niter = simplify_using_initial_conditions (loop, expr);
+
+ if (TREE_CODE (niter->niter) == INTEGER_CST)
+ niter->max = tree_to_uhwi (niter->niter);
+ else
+ niter->max = max;
+
+ niter->bound = NULL_TREE;
+ niter->cmp = ERROR_MARK;
+ return true;
+}
+
/* See if LOOP contains a bit counting idiom. The idiom consists of two parts:
1. A modification to the induction variabler;.
2. A test to determine whether or not to exit the loop.
@@ -2244,7 +2635,9 @@ number_of_iterations_bitcount (loop_p loop, edge exit,
enum tree_code code,
class tree_niter_desc *niter)
{
- return number_of_iterations_popcount (loop, exit, code, niter);
+ return (number_of_iterations_popcount (loop, exit, code, niter)
+ || number_of_iterations_cltz (loop, exit, code, niter)
+ || number_of_iterations_cltz_complement (loop, exit, code, niter));
}
/* Substitute NEW_TREE for OLD in EXPR and fold the result.
@@ -2772,6 +3165,9 @@ number_of_iterations_exit_assumptions (class loop *loop, edge exit,
case NE_EXPR:
break;
+ case EQ_EXPR:
+ return number_of_iterations_cltz (loop, exit, code, niter);
+
default:
return false;
}