aboutsummaryrefslogtreecommitdiff
path: root/gcc/tree-loop-distribution.c
diff options
context:
space:
mode:
authorRichard Sandiford <richard.sandiford@linaro.org>2018-01-13 18:02:10 +0000
committerRichard Sandiford <rsandifo@gcc.gnu.org>2018-01-13 18:02:10 +0000
commita57776a11369621f9e9e8a8a3db6cb406c8bf27b (patch)
treea9dd82784464ea1a418d7c88730dad9f05ef9840 /gcc/tree-loop-distribution.c
parentf307441ac4d58d5a1690081f95b63b70b3e90b48 (diff)
downloadgcc-a57776a11369621f9e9e8a8a3db6cb406c8bf27b.zip
gcc-a57776a11369621f9e9e8a8a3db6cb406c8bf27b.tar.gz
gcc-a57776a11369621f9e9e8a8a3db6cb406c8bf27b.tar.bz2
Support for aliasing with variable strides
This patch adds runtime alias checks for loops with variable strides, so that we can vectorise them even without a restrict qualifier. There are several parts to doing this: 1) For accesses like: x[i * n] += 1; we need to check whether n (and thus the DR_STEP) is nonzero. vect_analyze_data_ref_dependence records values that need to be checked in this way, then prune_runtime_alias_test_list records a bounds check on DR_STEP being outside the range [0, 0]. 2) For accesses like: x[i * n] = x[i * n + 1] + 1; we simply need to test whether abs (n) >= 2. prune_runtime_alias_test_list looks for cases like this and tries to guess whether it is better to use this kind of check or a check for non-overlapping ranges. (We could do an OR of the two conditions at runtime, but that isn't implemented yet.) 3) Checks for overlapping ranges need to cope with variable strides. At present the "length" of each segment in a range check is represented as an offset from the base that lies outside the touched range, in the same direction as DR_STEP. The length can therefore be negative and is sometimes conservative. With variable steps it's easier to reaon about if we split this into two: seg_len: distance travelled from the first iteration of interest to the last, e.g. DR_STEP * (VF - 1) access_size: the number of bytes accessed in each iteration with access_size always being a positive constant and seg_len possibly being variable. We can then combine alias checks for two accesses that are a constant number of bytes apart by adjusting the access size to account for the gap. This leaves the segment length unchanged, which allows the check to be combined with further accesses. When seg_len is positive, the runtime alias check has the form: base_a >= base_b + seg_len_b + access_size_b || base_b >= base_a + seg_len_a + access_size_a In many accesses the base will be aligned to the access size, which allows us to skip the addition: base_a > base_b + seg_len_b || base_b > base_a + seg_len_a A similar saving is possible with "negative" lengths. The patch therefore tracks the alignment in addition to seg_len and access_size. 2018-01-13 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * tree-vectorizer.h (vec_lower_bound): New structure. (_loop_vec_info): Add check_nonzero and lower_bounds. (LOOP_VINFO_CHECK_NONZERO): New macro. (LOOP_VINFO_LOWER_BOUNDS): Likewise. (LOOP_REQUIRES_VERSIONING_FOR_ALIAS): Check lower_bounds too. * tree-data-ref.h (dr_with_seg_len): Add access_size and align fields. Make seg_len the distance travelled, not including the access size. (dr_direction_indicator): Declare. (dr_zero_step_indicator): Likewise. (dr_known_forward_stride_p): Likewise. * tree-data-ref.c: Include stringpool.h, tree-vrp.h and tree-ssanames.h. (runtime_alias_check_p): Allow runtime alias checks with variable strides. (operator ==): Compare access_size and align. (prune_runtime_alias_test_list): Rework for new distinction between the access_size and seg_len. (create_intersect_range_checks_index): Likewise. Cope with polynomial segment lengths. (get_segment_min_max): New function. (create_intersect_range_checks): Use it. (dr_step_indicator): New function. (dr_direction_indicator): Likewise. (dr_zero_step_indicator): Likewise. (dr_known_forward_stride_p): Likewise. * tree-loop-distribution.c (data_ref_segment_size): Return DR_STEP * (niters - 1). (compute_alias_check_pairs): Update call to the dr_with_seg_len constructor. * tree-vect-data-refs.c (vect_check_nonzero_value): New function. (vect_preserves_scalar_order_p): New function, split out from... (vect_analyze_data_ref_dependence): ...here. Check for zero steps. (vect_vfa_segment_size): Return DR_STEP * (length_factor - 1). (vect_vfa_access_size): New function. (vect_vfa_align): Likewise. (vect_compile_time_alias): Take access_size_a and access_b arguments. (dump_lower_bound): New function. (vect_check_lower_bound): Likewise. (vect_small_gap_p): Likewise. (vectorizable_with_step_bound_p): Likewise. (vect_prune_runtime_alias_test_list): Ignore cross-iteration depencies if the vectorization factor is 1. Convert the checks for nonzero steps into checks on the bounds of DR_STEP. Try using a bunds check for variable steps if the minimum required step is relatively small. Update calls to the dr_with_seg_len constructor and to vect_compile_time_alias. * tree-vect-loop-manip.c (vect_create_cond_for_lower_bounds): New function. (vect_loop_versioning): Call it. * tree-vect-loop.c (vect_analyze_loop_2): Clear LOOP_VINFO_LOWER_BOUNDS when retrying. (vect_estimate_min_profitable_iters): Account for any bounds checks. gcc/testsuite/ * gcc.dg/vect/bb-slp-cond-1.c: Expect loop vectorization rather than SLP vectorization. * gcc.dg/vect/vect-alias-check-10.c: New test. * gcc.dg/vect/vect-alias-check-11.c: Likewise. * gcc.dg/vect/vect-alias-check-12.c: Likewise. * gcc.dg/vect/vect-alias-check-8.c: Likewise. * gcc.dg/vect/vect-alias-check-9.c: Likewise. * gcc.target/aarch64/sve/strided_load_8.c: Likewise. * gcc.target/aarch64/sve/var_stride_1.c: Likewise. * gcc.target/aarch64/sve/var_stride_1.h: Likewise. * gcc.target/aarch64/sve/var_stride_1_run.c: Likewise. * gcc.target/aarch64/sve/var_stride_2.c: Likewise. * gcc.target/aarch64/sve/var_stride_2_run.c: Likewise. * gcc.target/aarch64/sve/var_stride_3.c: Likewise. * gcc.target/aarch64/sve/var_stride_3_run.c: Likewise. * gcc.target/aarch64/sve/var_stride_4.c: Likewise. * gcc.target/aarch64/sve/var_stride_4_run.c: Likewise. * gcc.target/aarch64/sve/var_stride_5.c: Likewise. * gcc.target/aarch64/sve/var_stride_5_run.c: Likewise. * gcc.target/aarch64/sve/var_stride_6.c: Likewise. * gcc.target/aarch64/sve/var_stride_6_run.c: Likewise. * gcc.target/aarch64/sve/var_stride_7.c: Likewise. * gcc.target/aarch64/sve/var_stride_7_run.c: Likewise. * gcc.target/aarch64/sve/var_stride_8.c: Likewise. * gcc.target/aarch64/sve/var_stride_8_run.c: Likewise. * gfortran.dg/vect/vect-alias-check-1.F90: Likewise. Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r256644
Diffstat (limited to 'gcc/tree-loop-distribution.c')
-rw-r--r--gcc/tree-loop-distribution.c27
1 files changed, 15 insertions, 12 deletions
diff --git a/gcc/tree-loop-distribution.c b/gcc/tree-loop-distribution.c
index eca8dd2..a3d76e4 100644
--- a/gcc/tree-loop-distribution.c
+++ b/gcc/tree-loop-distribution.c
@@ -2330,16 +2330,12 @@ break_alias_scc_partitions (struct graph *rdg,
static tree
data_ref_segment_size (struct data_reference *dr, tree niters)
{
- tree segment_length;
-
- if (integer_zerop (DR_STEP (dr)))
- segment_length = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
- else
- segment_length = size_binop (MULT_EXPR,
- fold_convert (sizetype, DR_STEP (dr)),
- fold_convert (sizetype, niters));
-
- return segment_length;
+ niters = size_binop (MINUS_EXPR,
+ fold_convert (sizetype, niters),
+ size_one_node);
+ return size_binop (MULT_EXPR,
+ fold_convert (sizetype, DR_STEP (dr)),
+ fold_convert (sizetype, niters));
}
/* Return true if LOOP's latch is dominated by statement for data reference
@@ -2394,9 +2390,16 @@ compute_alias_check_pairs (struct loop *loop, vec<ddr_p> *alias_ddrs,
else
seg_length_b = data_ref_segment_size (dr_b, niters);
+ unsigned HOST_WIDE_INT access_size_a
+ = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a))));
+ unsigned HOST_WIDE_INT access_size_b
+ = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_b))));
+ unsigned int align_a = TYPE_ALIGN_UNIT (TREE_TYPE (DR_REF (dr_a)));
+ unsigned int align_b = TYPE_ALIGN_UNIT (TREE_TYPE (DR_REF (dr_b)));
+
dr_with_seg_len_pair_t dr_with_seg_len_pair
- (dr_with_seg_len (dr_a, seg_length_a),
- dr_with_seg_len (dr_b, seg_length_b));
+ (dr_with_seg_len (dr_a, seg_length_a, access_size_a, align_a),
+ dr_with_seg_len (dr_b, seg_length_b, access_size_b, align_b));
/* Canonicalize pairs by sorting the two DR members. */
if (comp_res > 0)