aboutsummaryrefslogtreecommitdiff
path: root/gcc/tree-vect-data-refs.c
diff options
context:
space:
mode:
authorRichard Sandiford <richard.sandiford@linaro.org>2018-01-13 18:02:10 +0000
committerRichard Sandiford <rsandifo@gcc.gnu.org>2018-01-13 18:02:10 +0000
commita57776a11369621f9e9e8a8a3db6cb406c8bf27b (patch)
treea9dd82784464ea1a418d7c88730dad9f05ef9840 /gcc/tree-vect-data-refs.c
parentf307441ac4d58d5a1690081f95b63b70b3e90b48 (diff)
downloadgcc-a57776a11369621f9e9e8a8a3db6cb406c8bf27b.zip
gcc-a57776a11369621f9e9e8a8a3db6cb406c8bf27b.tar.gz
gcc-a57776a11369621f9e9e8a8a3db6cb406c8bf27b.tar.bz2
Support for aliasing with variable strides
This patch adds runtime alias checks for loops with variable strides, so that we can vectorise them even without a restrict qualifier. There are several parts to doing this: 1) For accesses like: x[i * n] += 1; we need to check whether n (and thus the DR_STEP) is nonzero. vect_analyze_data_ref_dependence records values that need to be checked in this way, then prune_runtime_alias_test_list records a bounds check on DR_STEP being outside the range [0, 0]. 2) For accesses like: x[i * n] = x[i * n + 1] + 1; we simply need to test whether abs (n) >= 2. prune_runtime_alias_test_list looks for cases like this and tries to guess whether it is better to use this kind of check or a check for non-overlapping ranges. (We could do an OR of the two conditions at runtime, but that isn't implemented yet.) 3) Checks for overlapping ranges need to cope with variable strides. At present the "length" of each segment in a range check is represented as an offset from the base that lies outside the touched range, in the same direction as DR_STEP. The length can therefore be negative and is sometimes conservative. With variable steps it's easier to reaon about if we split this into two: seg_len: distance travelled from the first iteration of interest to the last, e.g. DR_STEP * (VF - 1) access_size: the number of bytes accessed in each iteration with access_size always being a positive constant and seg_len possibly being variable. We can then combine alias checks for two accesses that are a constant number of bytes apart by adjusting the access size to account for the gap. This leaves the segment length unchanged, which allows the check to be combined with further accesses. When seg_len is positive, the runtime alias check has the form: base_a >= base_b + seg_len_b + access_size_b || base_b >= base_a + seg_len_a + access_size_a In many accesses the base will be aligned to the access size, which allows us to skip the addition: base_a > base_b + seg_len_b || base_b > base_a + seg_len_a A similar saving is possible with "negative" lengths. The patch therefore tracks the alignment in addition to seg_len and access_size. 2018-01-13 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * tree-vectorizer.h (vec_lower_bound): New structure. (_loop_vec_info): Add check_nonzero and lower_bounds. (LOOP_VINFO_CHECK_NONZERO): New macro. (LOOP_VINFO_LOWER_BOUNDS): Likewise. (LOOP_REQUIRES_VERSIONING_FOR_ALIAS): Check lower_bounds too. * tree-data-ref.h (dr_with_seg_len): Add access_size and align fields. Make seg_len the distance travelled, not including the access size. (dr_direction_indicator): Declare. (dr_zero_step_indicator): Likewise. (dr_known_forward_stride_p): Likewise. * tree-data-ref.c: Include stringpool.h, tree-vrp.h and tree-ssanames.h. (runtime_alias_check_p): Allow runtime alias checks with variable strides. (operator ==): Compare access_size and align. (prune_runtime_alias_test_list): Rework for new distinction between the access_size and seg_len. (create_intersect_range_checks_index): Likewise. Cope with polynomial segment lengths. (get_segment_min_max): New function. (create_intersect_range_checks): Use it. (dr_step_indicator): New function. (dr_direction_indicator): Likewise. (dr_zero_step_indicator): Likewise. (dr_known_forward_stride_p): Likewise. * tree-loop-distribution.c (data_ref_segment_size): Return DR_STEP * (niters - 1). (compute_alias_check_pairs): Update call to the dr_with_seg_len constructor. * tree-vect-data-refs.c (vect_check_nonzero_value): New function. (vect_preserves_scalar_order_p): New function, split out from... (vect_analyze_data_ref_dependence): ...here. Check for zero steps. (vect_vfa_segment_size): Return DR_STEP * (length_factor - 1). (vect_vfa_access_size): New function. (vect_vfa_align): Likewise. (vect_compile_time_alias): Take access_size_a and access_b arguments. (dump_lower_bound): New function. (vect_check_lower_bound): Likewise. (vect_small_gap_p): Likewise. (vectorizable_with_step_bound_p): Likewise. (vect_prune_runtime_alias_test_list): Ignore cross-iteration depencies if the vectorization factor is 1. Convert the checks for nonzero steps into checks on the bounds of DR_STEP. Try using a bunds check for variable steps if the minimum required step is relatively small. Update calls to the dr_with_seg_len constructor and to vect_compile_time_alias. * tree-vect-loop-manip.c (vect_create_cond_for_lower_bounds): New function. (vect_loop_versioning): Call it. * tree-vect-loop.c (vect_analyze_loop_2): Clear LOOP_VINFO_LOWER_BOUNDS when retrying. (vect_estimate_min_profitable_iters): Account for any bounds checks. gcc/testsuite/ * gcc.dg/vect/bb-slp-cond-1.c: Expect loop vectorization rather than SLP vectorization. * gcc.dg/vect/vect-alias-check-10.c: New test. * gcc.dg/vect/vect-alias-check-11.c: Likewise. * gcc.dg/vect/vect-alias-check-12.c: Likewise. * gcc.dg/vect/vect-alias-check-8.c: Likewise. * gcc.dg/vect/vect-alias-check-9.c: Likewise. * gcc.target/aarch64/sve/strided_load_8.c: Likewise. * gcc.target/aarch64/sve/var_stride_1.c: Likewise. * gcc.target/aarch64/sve/var_stride_1.h: Likewise. * gcc.target/aarch64/sve/var_stride_1_run.c: Likewise. * gcc.target/aarch64/sve/var_stride_2.c: Likewise. * gcc.target/aarch64/sve/var_stride_2_run.c: Likewise. * gcc.target/aarch64/sve/var_stride_3.c: Likewise. * gcc.target/aarch64/sve/var_stride_3_run.c: Likewise. * gcc.target/aarch64/sve/var_stride_4.c: Likewise. * gcc.target/aarch64/sve/var_stride_4_run.c: Likewise. * gcc.target/aarch64/sve/var_stride_5.c: Likewise. * gcc.target/aarch64/sve/var_stride_5_run.c: Likewise. * gcc.target/aarch64/sve/var_stride_6.c: Likewise. * gcc.target/aarch64/sve/var_stride_6_run.c: Likewise. * gcc.target/aarch64/sve/var_stride_7.c: Likewise. * gcc.target/aarch64/sve/var_stride_7_run.c: Likewise. * gcc.target/aarch64/sve/var_stride_8.c: Likewise. * gcc.target/aarch64/sve/var_stride_8_run.c: Likewise. * gfortran.dg/vect/vect-alias-check-1.F90: Likewise. Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r256644
Diffstat (limited to 'gcc/tree-vect-data-refs.c')
-rw-r--r--gcc/tree-vect-data-refs.c365
1 files changed, 320 insertions, 45 deletions
diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c
index c6bfe45..684b7c5 100644
--- a/gcc/tree-vect-data-refs.c
+++ b/gcc/tree-vect-data-refs.c
@@ -169,6 +169,50 @@ vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
return true;
}
+/* Record that loop LOOP_VINFO needs to check that VALUE is nonzero. */
+
+static void
+vect_check_nonzero_value (loop_vec_info loop_vinfo, tree value)
+{
+ vec<tree> checks = LOOP_VINFO_CHECK_NONZERO (loop_vinfo);
+ for (unsigned int i = 0; i < checks.length(); ++i)
+ if (checks[i] == value)
+ return;
+
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_NOTE, vect_location, "need run-time check that ");
+ dump_generic_expr (MSG_NOTE, TDF_SLIM, value);
+ dump_printf (MSG_NOTE, " is nonzero\n");
+ }
+ LOOP_VINFO_CHECK_NONZERO (loop_vinfo).safe_push (value);
+}
+
+/* Return true if we know that the order of vectorized STMT_A and
+ vectorized STMT_B will be the same as the order of STMT_A and STMT_B.
+ At least one of the statements is a write. */
+
+static bool
+vect_preserves_scalar_order_p (gimple *stmt_a, gimple *stmt_b)
+{
+ stmt_vec_info stmtinfo_a = vinfo_for_stmt (stmt_a);
+ stmt_vec_info stmtinfo_b = vinfo_for_stmt (stmt_b);
+
+ /* Single statements are always kept in their original order. */
+ if (!STMT_VINFO_GROUPED_ACCESS (stmtinfo_a)
+ && !STMT_VINFO_GROUPED_ACCESS (stmtinfo_b))
+ return true;
+
+ /* STMT_A and STMT_B belong to overlapping groups. All loads in a
+ group are emitted at the position of the first scalar load and all
+ stores in a group are emitted at the position of the last scalar store.
+ Thus writes will happen no earlier than their current position
+ (but could happen later) while reads will happen no later than their
+ current position (but could happen earlier). Reordering is therefore
+ only possible if the first access is a write. */
+ gimple *earlier_stmt = get_earlier_stmt (stmt_a, stmt_b);
+ return !DR_IS_WRITE (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt)));
+}
/* A subroutine of vect_analyze_data_ref_dependence. Handle
DDR_COULD_BE_INDEPENDENT_P ddr DDR that has a known set of dependence
@@ -414,22 +458,27 @@ vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
... = a[i];
a[i+1] = ...;
where loads from the group interleave with the store. */
- if (STMT_VINFO_GROUPED_ACCESS (stmtinfo_a)
- || STMT_VINFO_GROUPED_ACCESS (stmtinfo_b))
+ if (!vect_preserves_scalar_order_p (DR_STMT (dra), DR_STMT (drb)))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "READ_WRITE dependence in interleaving.\n");
+ return true;
+ }
+
+ if (!loop->force_vectorize)
{
- gimple *earlier_stmt;
- earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb));
- if (DR_IS_WRITE
- (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt))))
+ tree indicator = dr_zero_step_indicator (dra);
+ if (TREE_CODE (indicator) != INTEGER_CST)
+ vect_check_nonzero_value (loop_vinfo, indicator);
+ else if (integer_zerop (indicator))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "READ_WRITE dependence in interleaving."
- "\n");
+ "access also has a zero step\n");
return true;
}
}
-
continue;
}
@@ -3030,38 +3079,57 @@ vect_analyze_data_ref_accesses (vec_info *vinfo)
/* Function vect_vfa_segment_size.
- Create an expression that computes the size of segment
- that will be accessed for a data reference. The functions takes into
- account that realignment loads may access one more vector.
-
Input:
DR: The data reference.
LENGTH_FACTOR: segment length to consider.
- Return an expression whose value is the size of segment which will be
- accessed by DR. */
+ Return a value suitable for the dr_with_seg_len::seg_len field.
+ This is the "distance travelled" by the pointer from the first
+ iteration in the segment to the last. Note that it does not include
+ the size of the access; in effect it only describes the first byte. */
static tree
vect_vfa_segment_size (struct data_reference *dr, tree length_factor)
{
- tree segment_length;
+ length_factor = size_binop (MINUS_EXPR,
+ fold_convert (sizetype, length_factor),
+ size_one_node);
+ return size_binop (MULT_EXPR, fold_convert (sizetype, DR_STEP (dr)),
+ length_factor);
+}
- if (integer_zerop (DR_STEP (dr)))
- segment_length = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
- else
- segment_length = size_binop (MULT_EXPR,
- fold_convert (sizetype, DR_STEP (dr)),
- fold_convert (sizetype, length_factor));
+/* Return a value that, when added to abs (vect_vfa_segment_size (dr)),
+ gives the worst-case number of bytes covered by the segment. */
- if (vect_supportable_dr_alignment (dr, false)
- == dr_explicit_realign_optimized)
+static unsigned HOST_WIDE_INT
+vect_vfa_access_size (data_reference *dr)
+{
+ stmt_vec_info stmt_vinfo = vinfo_for_stmt (DR_STMT (dr));
+ tree ref_type = TREE_TYPE (DR_REF (dr));
+ unsigned HOST_WIDE_INT ref_size = tree_to_uhwi (TYPE_SIZE_UNIT (ref_type));
+ unsigned HOST_WIDE_INT access_size = ref_size;
+ if (GROUP_FIRST_ELEMENT (stmt_vinfo))
{
- tree vector_size = TYPE_SIZE_UNIT
- (STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr))));
-
- segment_length = size_binop (PLUS_EXPR, segment_length, vector_size);
+ gcc_assert (GROUP_FIRST_ELEMENT (stmt_vinfo) == DR_STMT (dr));
+ access_size *= GROUP_SIZE (stmt_vinfo) - GROUP_GAP (stmt_vinfo);
+ }
+ if (STMT_VINFO_VEC_STMT (stmt_vinfo)
+ && (vect_supportable_dr_alignment (dr, false)
+ == dr_explicit_realign_optimized))
+ {
+ /* We might access a full vector's worth. */
+ tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
+ access_size += tree_to_uhwi (TYPE_SIZE_UNIT (vectype)) - ref_size;
}
- return segment_length;
+ return access_size;
+}
+
+/* Get the minimum alignment for all the scalar accesses that DR describes. */
+
+static unsigned int
+vect_vfa_align (const data_reference *dr)
+{
+ return TYPE_ALIGN_UNIT (TREE_TYPE (DR_REF (dr)));
}
/* Function vect_no_alias_p.
@@ -3069,13 +3137,15 @@ vect_vfa_segment_size (struct data_reference *dr, tree length_factor)
Given data references A and B with equal base and offset, see whether
the alias relation can be decided at compilation time. Return 1 if
it can and the references alias, 0 if it can and the references do
- not alias, and -1 if we cannot decide at compile time. SEGMENT_LENGTH_A
- and SEGMENT_LENGTH_B are the memory lengths accessed by A and B
- respectively. */
+ not alias, and -1 if we cannot decide at compile time. SEGMENT_LENGTH_A,
+ SEGMENT_LENGTH_B, ACCESS_SIZE_A and ACCESS_SIZE_B are the equivalent
+ of dr_with_seg_len::{seg_len,access_size} for A and B. */
static int
vect_compile_time_alias (struct data_reference *a, struct data_reference *b,
- tree segment_length_a, tree segment_length_b)
+ tree segment_length_a, tree segment_length_b,
+ unsigned HOST_WIDE_INT access_size_a,
+ unsigned HOST_WIDE_INT access_size_b)
{
poly_offset_int offset_a = wi::to_poly_offset (DR_INIT (a));
poly_offset_int offset_b = wi::to_poly_offset (DR_INIT (b));
@@ -3088,18 +3158,21 @@ vect_compile_time_alias (struct data_reference *a, struct data_reference *b,
if (tree_int_cst_compare (DR_STEP (a), size_zero_node) < 0)
{
const_length_a = (-wi::to_poly_wide (segment_length_a)).force_uhwi ();
- offset_a = (offset_a + vect_get_scalar_dr_size (a)) - const_length_a;
+ offset_a = (offset_a + access_size_a) - const_length_a;
}
else
const_length_a = tree_to_poly_uint64 (segment_length_a);
if (tree_int_cst_compare (DR_STEP (b), size_zero_node) < 0)
{
const_length_b = (-wi::to_poly_wide (segment_length_b)).force_uhwi ();
- offset_b = (offset_b + vect_get_scalar_dr_size (b)) - const_length_b;
+ offset_b = (offset_b + access_size_b) - const_length_b;
}
else
const_length_b = tree_to_poly_uint64 (segment_length_b);
+ const_length_a += access_size_a;
+ const_length_b += access_size_b;
+
if (ranges_known_overlap_p (offset_a, const_length_a,
offset_b, const_length_b))
return 1;
@@ -3149,6 +3222,108 @@ dependence_distance_ge_vf (data_dependence_relation *ddr,
return true;
}
+/* Dump LOWER_BOUND using flags DUMP_KIND. Dumps are known to be enabled. */
+
+static void
+dump_lower_bound (int dump_kind, const vec_lower_bound &lower_bound)
+{
+ dump_printf (dump_kind, "%s (", lower_bound.unsigned_p ? "unsigned" : "abs");
+ dump_generic_expr (dump_kind, TDF_SLIM, lower_bound.expr);
+ dump_printf (dump_kind, ") >= ");
+ dump_dec (dump_kind, lower_bound.min_value);
+}
+
+/* Record that the vectorized loop requires the vec_lower_bound described
+ by EXPR, UNSIGNED_P and MIN_VALUE. */
+
+static void
+vect_check_lower_bound (loop_vec_info loop_vinfo, tree expr, bool unsigned_p,
+ poly_uint64 min_value)
+{
+ vec<vec_lower_bound> lower_bounds = LOOP_VINFO_LOWER_BOUNDS (loop_vinfo);
+ for (unsigned int i = 0; i < lower_bounds.length (); ++i)
+ if (operand_equal_p (lower_bounds[i].expr, expr, 0))
+ {
+ unsigned_p &= lower_bounds[i].unsigned_p;
+ min_value = upper_bound (lower_bounds[i].min_value, min_value);
+ if (lower_bounds[i].unsigned_p != unsigned_p
+ || maybe_lt (lower_bounds[i].min_value, min_value))
+ {
+ lower_bounds[i].unsigned_p = unsigned_p;
+ lower_bounds[i].min_value = min_value;
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "updating run-time check to ");
+ dump_lower_bound (MSG_NOTE, lower_bounds[i]);
+ dump_printf (MSG_NOTE, "\n");
+ }
+ }
+ return;
+ }
+
+ vec_lower_bound lower_bound (expr, unsigned_p, min_value);
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_NOTE, vect_location, "need a run-time check that ");
+ dump_lower_bound (MSG_NOTE, lower_bound);
+ dump_printf (MSG_NOTE, "\n");
+ }
+ LOOP_VINFO_LOWER_BOUNDS (loop_vinfo).safe_push (lower_bound);
+}
+
+/* Return true if it's unlikely that the step of the vectorized form of DR
+ will span fewer than GAP bytes. */
+
+static bool
+vect_small_gap_p (loop_vec_info loop_vinfo, data_reference *dr, poly_int64 gap)
+{
+ stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
+ HOST_WIDE_INT count
+ = estimated_poly_value (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
+ if (GROUP_FIRST_ELEMENT (stmt_info))
+ count *= GROUP_SIZE (vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info)));
+ return estimated_poly_value (gap) <= count * vect_get_scalar_dr_size (dr);
+}
+
+/* Return true if we know that there is no alias between DR_A and DR_B
+ when abs (DR_STEP (DR_A)) >= N for some N. When returning true, set
+ *LOWER_BOUND_OUT to this N. */
+
+static bool
+vectorizable_with_step_bound_p (data_reference *dr_a, data_reference *dr_b,
+ poly_uint64 *lower_bound_out)
+{
+ /* Check that there is a constant gap of known sign between DR_A
+ and DR_B. */
+ poly_int64 init_a, init_b;
+ if (!operand_equal_p (DR_BASE_ADDRESS (dr_a), DR_BASE_ADDRESS (dr_b), 0)
+ || !operand_equal_p (DR_OFFSET (dr_a), DR_OFFSET (dr_b), 0)
+ || !operand_equal_p (DR_STEP (dr_a), DR_STEP (dr_b), 0)
+ || !poly_int_tree_p (DR_INIT (dr_a), &init_a)
+ || !poly_int_tree_p (DR_INIT (dr_b), &init_b)
+ || !ordered_p (init_a, init_b))
+ return false;
+
+ /* Sort DR_A and DR_B by the address they access. */
+ if (maybe_lt (init_b, init_a))
+ {
+ std::swap (init_a, init_b);
+ std::swap (dr_a, dr_b);
+ }
+
+ /* If the two accesses could be dependent within a scalar iteration,
+ make sure that we'd retain their order. */
+ if (maybe_gt (init_a + vect_get_scalar_dr_size (dr_a), init_b)
+ && !vect_preserves_scalar_order_p (DR_STMT (dr_a), DR_STMT (dr_b)))
+ return false;
+
+ /* There is no alias if abs (DR_STEP) is greater than or equal to
+ the bytes spanned by the combination of the two accesses. */
+ *lower_bound_out = init_b + vect_get_scalar_dr_size (dr_b) - init_a;
+ return true;
+}
+
/* Function vect_prune_runtime_alias_test_list.
Prune a list of ddrs to be tested at run-time by versioning for alias.
@@ -3178,6 +3353,19 @@ vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
dump_printf_loc (MSG_NOTE, vect_location,
"=== vect_prune_runtime_alias_test_list ===\n");
+ /* Step values are irrelevant for aliasing if the number of vector
+ iterations is equal to the number of scalar iterations (which can
+ happen for fully-SLP loops). */
+ bool ignore_step_p = known_eq (LOOP_VINFO_VECT_FACTOR (loop_vinfo), 1U);
+
+ if (!ignore_step_p)
+ {
+ /* Convert the checks for nonzero steps into bound tests. */
+ tree value;
+ FOR_EACH_VEC_ELT (LOOP_VINFO_CHECK_NONZERO (loop_vinfo), i, value)
+ vect_check_lower_bound (loop_vinfo, value, true, 1);
+ }
+
if (may_alias_ddrs.is_empty ())
return true;
@@ -3191,9 +3379,12 @@ vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
FOR_EACH_VEC_ELT (may_alias_ddrs, i, ddr)
{
int comp_res;
+ poly_uint64 lower_bound;
struct data_reference *dr_a, *dr_b;
gimple *dr_group_first_a, *dr_group_first_b;
tree segment_length_a, segment_length_b;
+ unsigned HOST_WIDE_INT access_size_a, access_size_b;
+ unsigned int align_a, align_b;
gimple *stmt_a, *stmt_b;
/* Ignore the alias if the VF we chose ended up being no greater
@@ -3221,6 +3412,64 @@ vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
dr_a = DDR_A (ddr);
stmt_a = DR_STMT (DDR_A (ddr));
+
+ dr_b = DDR_B (ddr);
+ stmt_b = DR_STMT (DDR_B (ddr));
+
+ /* Skip the pair if inter-iteration dependencies are irrelevant
+ and intra-iteration dependencies are guaranteed to be honored. */
+ if (ignore_step_p
+ && (vect_preserves_scalar_order_p (stmt_a, stmt_b)
+ || vectorizable_with_step_bound_p (dr_a, dr_b, &lower_bound)))
+ {
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "no need for alias check between ");
+ dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a));
+ dump_printf (MSG_NOTE, " and ");
+ dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b));
+ dump_printf (MSG_NOTE, " when VF is 1\n");
+ }
+ continue;
+ }
+
+ /* See whether we can handle the alias using a bounds check on
+ the step, and whether that's likely to be the best approach.
+ (It might not be, for example, if the minimum step is much larger
+ than the number of bytes handled by one vector iteration.) */
+ if (!ignore_step_p
+ && TREE_CODE (DR_STEP (dr_a)) != INTEGER_CST
+ && vectorizable_with_step_bound_p (dr_a, dr_b, &lower_bound)
+ && (vect_small_gap_p (loop_vinfo, dr_a, lower_bound)
+ || vect_small_gap_p (loop_vinfo, dr_b, lower_bound)))
+ {
+ bool unsigned_p = dr_known_forward_stride_p (dr_a);
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_NOTE, vect_location, "no alias between ");
+ dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a));
+ dump_printf (MSG_NOTE, " and ");
+ dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b));
+ dump_printf (MSG_NOTE, " when the step ");
+ dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_STEP (dr_a));
+ dump_printf (MSG_NOTE, " is outside ");
+ if (unsigned_p)
+ dump_printf (MSG_NOTE, "[0");
+ else
+ {
+ dump_printf (MSG_NOTE, "(");
+ dump_dec (MSG_NOTE, poly_int64 (-lower_bound));
+ }
+ dump_printf (MSG_NOTE, ", ");
+ dump_dec (MSG_NOTE, lower_bound);
+ dump_printf (MSG_NOTE, ")\n");
+ }
+ vect_check_lower_bound (loop_vinfo, DR_STEP (dr_a), unsigned_p,
+ lower_bound);
+ continue;
+ }
+
dr_group_first_a = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_a));
if (dr_group_first_a)
{
@@ -3228,8 +3477,6 @@ vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
dr_a = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_a));
}
- dr_b = DDR_B (ddr);
- stmt_b = DR_STMT (DDR_B (ddr));
dr_group_first_b = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_b));
if (dr_group_first_b)
{
@@ -3237,12 +3484,24 @@ vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
dr_b = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_b));
}
- if (!operand_equal_p (DR_STEP (dr_a), DR_STEP (dr_b), 0))
- length_factor = scalar_loop_iters;
+ if (ignore_step_p)
+ {
+ segment_length_a = size_zero_node;
+ segment_length_b = size_zero_node;
+ }
else
- length_factor = size_int (vect_factor);
- segment_length_a = vect_vfa_segment_size (dr_a, length_factor);
- segment_length_b = vect_vfa_segment_size (dr_b, length_factor);
+ {
+ if (!operand_equal_p (DR_STEP (dr_a), DR_STEP (dr_b), 0))
+ length_factor = scalar_loop_iters;
+ else
+ length_factor = size_int (vect_factor);
+ segment_length_a = vect_vfa_segment_size (dr_a, length_factor);
+ segment_length_b = vect_vfa_segment_size (dr_b, length_factor);
+ }
+ access_size_a = vect_vfa_access_size (dr_a);
+ access_size_b = vect_vfa_access_size (dr_b);
+ align_a = vect_vfa_align (dr_a);
+ align_b = vect_vfa_align (dr_b);
comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (dr_a),
DR_BASE_ADDRESS (dr_b));
@@ -3259,7 +3518,22 @@ vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
{
int res = vect_compile_time_alias (dr_a, dr_b,
segment_length_a,
- segment_length_b);
+ segment_length_b,
+ access_size_a,
+ access_size_b);
+ if (res >= 0 && dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "can tell at compile time that ");
+ dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a));
+ dump_printf (MSG_NOTE, " and ");
+ dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b));
+ if (res == 0)
+ dump_printf (MSG_NOTE, " do not alias\n");
+ else
+ dump_printf (MSG_NOTE, " alias\n");
+ }
+
if (res == 0)
continue;
@@ -3273,8 +3547,8 @@ vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
}
dr_with_seg_len_pair_t dr_with_seg_len_pair
- (dr_with_seg_len (dr_a, segment_length_a),
- dr_with_seg_len (dr_b, segment_length_b));
+ (dr_with_seg_len (dr_a, segment_length_a, access_size_a, align_a),
+ dr_with_seg_len (dr_b, segment_length_b, access_size_b, align_b));
/* Canonicalize pairs by sorting the two DR members. */
if (comp_res > 0)
@@ -3287,6 +3561,7 @@ vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
unsigned int count = (comp_alias_ddrs.length ()
+ check_unequal_addrs.length ());
+
dump_printf_loc (MSG_NOTE, vect_location,
"improved number of alias checks from %d to %d\n",
may_alias_ddrs.length (), count);