diff options
Diffstat (limited to 'gcc')
34 files changed, 1720 insertions, 245 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 2c6f9eb..37656fa 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -2,6 +2,64 @@ Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> + * tree-vectorizer.h (vec_lower_bound): New structure. + (_loop_vec_info): Add check_nonzero and lower_bounds. + (LOOP_VINFO_CHECK_NONZERO): New macro. + (LOOP_VINFO_LOWER_BOUNDS): Likewise. + (LOOP_REQUIRES_VERSIONING_FOR_ALIAS): Check lower_bounds too. + * tree-data-ref.h (dr_with_seg_len): Add access_size and align + fields. Make seg_len the distance travelled, not including the + access size. + (dr_direction_indicator): Declare. + (dr_zero_step_indicator): Likewise. + (dr_known_forward_stride_p): Likewise. + * tree-data-ref.c: Include stringpool.h, tree-vrp.h and + tree-ssanames.h. + (runtime_alias_check_p): Allow runtime alias checks with + variable strides. + (operator ==): Compare access_size and align. + (prune_runtime_alias_test_list): Rework for new distinction between + the access_size and seg_len. + (create_intersect_range_checks_index): Likewise. Cope with polynomial + segment lengths. + (get_segment_min_max): New function. + (create_intersect_range_checks): Use it. + (dr_step_indicator): New function. + (dr_direction_indicator): Likewise. + (dr_zero_step_indicator): Likewise. + (dr_known_forward_stride_p): Likewise. + * tree-loop-distribution.c (data_ref_segment_size): Return + DR_STEP * (niters - 1). + (compute_alias_check_pairs): Update call to the dr_with_seg_len + constructor. + * tree-vect-data-refs.c (vect_check_nonzero_value): New function. + (vect_preserves_scalar_order_p): New function, split out from... + (vect_analyze_data_ref_dependence): ...here. Check for zero steps. + (vect_vfa_segment_size): Return DR_STEP * (length_factor - 1). + (vect_vfa_access_size): New function. + (vect_vfa_align): Likewise. + (vect_compile_time_alias): Take access_size_a and access_b arguments. + (dump_lower_bound): New function. + (vect_check_lower_bound): Likewise. + (vect_small_gap_p): Likewise. + (vectorizable_with_step_bound_p): Likewise. + (vect_prune_runtime_alias_test_list): Ignore cross-iteration + depencies if the vectorization factor is 1. Convert the checks + for nonzero steps into checks on the bounds of DR_STEP. Try using + a bunds check for variable steps if the minimum required step is + relatively small. Update calls to the dr_with_seg_len + constructor and to vect_compile_time_alias. + * tree-vect-loop-manip.c (vect_create_cond_for_lower_bounds): New + function. + (vect_loop_versioning): Call it. + * tree-vect-loop.c (vect_analyze_loop_2): Clear LOOP_VINFO_LOWER_BOUNDS + when retrying. + (vect_estimate_min_profitable_iters): Account for any bounds checks. + +2018-01-13 Richard Sandiford <richard.sandiford@linaro.org> + Alan Hayward <alan.hayward@arm.com> + David Sherwood <david.sherwood@arm.com> + * doc/sourcebuild.texi (vect_scatter_store): Document. * optabs.def (scatter_store_optab, mask_scatter_store_optab): New optabs. diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 9955037..612489f 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -2,6 +2,37 @@ Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> + * gcc.dg/vect/bb-slp-cond-1.c: Expect loop vectorization rather + than SLP vectorization. + * gcc.dg/vect/vect-alias-check-10.c: New test. + * gcc.dg/vect/vect-alias-check-11.c: Likewise. + * gcc.dg/vect/vect-alias-check-12.c: Likewise. + * gcc.dg/vect/vect-alias-check-8.c: Likewise. + * gcc.dg/vect/vect-alias-check-9.c: Likewise. + * gcc.target/aarch64/sve/strided_load_8.c: Likewise. + * gcc.target/aarch64/sve/var_stride_1.c: Likewise. + * gcc.target/aarch64/sve/var_stride_1.h: Likewise. + * gcc.target/aarch64/sve/var_stride_1_run.c: Likewise. + * gcc.target/aarch64/sve/var_stride_2.c: Likewise. + * gcc.target/aarch64/sve/var_stride_2_run.c: Likewise. + * gcc.target/aarch64/sve/var_stride_3.c: Likewise. + * gcc.target/aarch64/sve/var_stride_3_run.c: Likewise. + * gcc.target/aarch64/sve/var_stride_4.c: Likewise. + * gcc.target/aarch64/sve/var_stride_4_run.c: Likewise. + * gcc.target/aarch64/sve/var_stride_5.c: Likewise. + * gcc.target/aarch64/sve/var_stride_5_run.c: Likewise. + * gcc.target/aarch64/sve/var_stride_6.c: Likewise. + * gcc.target/aarch64/sve/var_stride_6_run.c: Likewise. + * gcc.target/aarch64/sve/var_stride_7.c: Likewise. + * gcc.target/aarch64/sve/var_stride_7_run.c: Likewise. + * gcc.target/aarch64/sve/var_stride_8.c: Likewise. + * gcc.target/aarch64/sve/var_stride_8_run.c: Likewise. + * gfortran.dg/vect/vect-alias-check-1.F90: Likewise. + +2018-01-13 Richard Sandiford <richard.sandiford@linaro.org> + Alan Hayward <alan.hayward@arm.com> + David Sherwood <david.sherwood@arm.com> + * lib/target-supports.exp (check_effective_target_vect_scatter_store): New proc. * gcc.dg/vect/pr25413a.c: Expect both loops to be optimized on diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-cond-1.c b/gcc/testsuite/gcc.dg/vect/bb-slp-cond-1.c index 2c4c36f..4bd286b 100644 --- a/gcc/testsuite/gcc.dg/vect/bb-slp-cond-1.c +++ b/gcc/testsuite/gcc.dg/vect/bb-slp-cond-1.c @@ -45,10 +45,6 @@ int main () return 0; } -/* Basic blocks of if-converted loops are vectorized from within the loop - vectorizer pass. In this case it is really a deficiency in loop - vectorization data dependence analysis that causes us to require - basic block vectorization in the first place. */ - -/* { dg-final { scan-tree-dump-times "basic block vectorized" 1 "vect" { target vect_element_align } } } */ +/* { dg-final { scan-tree-dump {(no need for alias check [^\n]* when VF is 1|no alias between [^\n]* when [^\n]* is outside \(-16, 16\))} "vect" { target vect_element_align } } } */ +/* { dg-final { scan-tree-dump-times "loop vectorized" 1 "vect" { target vect_element_align } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-alias-check-10.c b/gcc/testsuite/gcc.dg/vect/vect-alias-check-10.c new file mode 100644 index 0000000..d4eea87 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-alias-check-10.c @@ -0,0 +1,69 @@ +/* { dg-do run } */ + +#define N 87 +#define M 6 + +typedef signed char sc; +typedef unsigned char uc; +typedef signed short ss; +typedef unsigned short us; +typedef int si; +typedef unsigned int ui; +typedef signed long long sll; +typedef unsigned long long ull; + +#define FOR_EACH_TYPE(M) \ + M (sc) M (uc) \ + M (ss) M (us) \ + M (si) M (ui) \ + M (sll) M (ull) \ + M (float) M (double) + +#define TEST_VALUE(I) ((I) * 5 / 2) + +#define ADD_TEST(TYPE) \ + void __attribute__((noinline, noclone)) \ + test_##TYPE (TYPE *a, int step) \ + { \ + for (int i = 0; i < N; ++i) \ + { \ + a[i * step + 0] = a[i * step + 0] + 1; \ + a[i * step + 1] = a[i * step + 1] + 2; \ + a[i * step + 2] = a[i * step + 2] + 4; \ + a[i * step + 3] = a[i * step + 3] + 8; \ + } \ + } \ + void __attribute__((noinline, noclone)) \ + ref_##TYPE (TYPE *a, int step) \ + { \ + for (int i = 0; i < N; ++i) \ + { \ + a[i * step + 0] = a[i * step + 0] + 1; \ + a[i * step + 1] = a[i * step + 1] + 2; \ + a[i * step + 2] = a[i * step + 2] + 4; \ + a[i * step + 3] = a[i * step + 3] + 8; \ + asm volatile (""); \ + } \ + } + +#define DO_TEST(TYPE) \ + for (int j = -M; j <= M; ++j) \ + { \ + TYPE a[N * M], b[N * M]; \ + for (int i = 0; i < N * M; ++i) \ + a[i] = b[i] = TEST_VALUE (i); \ + int offset = (j < 0 ? N * M - 4 : 0); \ + test_##TYPE (a + offset, j); \ + ref_##TYPE (b + offset, j); \ + if (__builtin_memcmp (a, b, sizeof (a)) != 0) \ + __builtin_abort (); \ + } + +FOR_EACH_TYPE (ADD_TEST) + +int +main (void) +{ + FOR_EACH_TYPE (DO_TEST) + return 0; +} diff --git a/gcc/testsuite/gcc.dg/vect/vect-alias-check-11.c b/gcc/testsuite/gcc.dg/vect/vect-alias-check-11.c new file mode 100644 index 0000000..601e17f --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-alias-check-11.c @@ -0,0 +1,99 @@ +/* { dg-do run } */ + +#define N 87 +#define M 6 + +typedef signed char sc; +typedef unsigned char uc; +typedef signed short ss; +typedef unsigned short us; +typedef int si; +typedef unsigned int ui; +typedef signed long long sll; +typedef unsigned long long ull; + +#define FOR_EACH_TYPE(M) \ + M (sc) M (uc) \ + M (ss) M (us) \ + M (si) M (ui) \ + M (sll) M (ull) \ + M (float) M (double) + +#define TEST_VALUE1(I) ((I) * 5 / 2) +#define TEST_VALUE2(I) ((I) * 11 / 5) + +#define ADD_TEST(TYPE) \ + void __attribute__((noinline, noclone)) \ + test_##TYPE (TYPE *restrict a, TYPE *restrict b, \ + int step) \ + { \ + for (int i = 0; i < N; ++i) \ + { \ + TYPE r1 = a[i * step + 0] += 1; \ + a[i * step + 1] += 2; \ + a[i * step + 2] += 4; \ + a[i * step + 3] += 8; \ + b[i] += r1; \ + } \ + } \ + \ + void __attribute__((noinline, noclone)) \ + ref_##TYPE (TYPE *restrict a, TYPE *restrict b, \ + int step) \ + { \ + for (int i = 0; i < N; ++i) \ + { \ + TYPE r1 = a[i * step + 0] += 1; \ + a[i * step + 1] += 2; \ + a[i * step + 2] += 4; \ + a[i * step + 3] += 8; \ + b[i] += r1; \ + asm volatile (""); \ + } \ + } + +#define DO_TEST(TYPE) \ + for (int j = -M; j <= M; ++j) \ + { \ + TYPE a1[N * M], a2[N * M], b1[N], b2[N]; \ + for (int i = 0; i < N * M; ++i) \ + a1[i] = a2[i] = TEST_VALUE1 (i); \ + for (int i = 0; i < N; ++i) \ + b1[i] = b2[i] = TEST_VALUE2 (i); \ + int offset = (j < 0 ? N * M - 4 : 0); \ + test_##TYPE (a1 + offset, b1, j); \ + ref_##TYPE (a2 + offset, b2, j); \ + if (__builtin_memcmp (a1, a2, sizeof (a1)) != 0) \ + __builtin_abort (); \ + if (__builtin_memcmp (b1, b2, sizeof (b1)) != 0) \ + __builtin_abort (); \ + } + +FOR_EACH_TYPE (ADD_TEST) + +int +main (void) +{ + FOR_EACH_TYPE (DO_TEST) + return 0; +} + +/* { dg-final { scan-tree-dump {no alias between [^\n]* when [^\n]* step[^ ]* is outside \(-2, 2\)} "vect" { target vect_int } } } */ +/* { dg-final { scan-tree-dump {no alias between [^\n]* when [^\n]* step[^ ]* is outside \(-3, 3\)} "vect" { target vect_int } } } */ +/* { dg-final { scan-tree-dump {no alias between [^\n]* when [^\n]* step[^ ]* is outside \(-4, 4\)} "vect" { target vect_int } } } */ +/* { dg-final { scan-tree-dump {run-time check [^\n]* abs \([^*]*\) >= 4} "vect" { target vect_int } } } */ + +/* { dg-final { scan-tree-dump {no alias between [^\n]* when [^\n]* step[^ ]* \* 2[)]* is outside \(-4, 4\)} "vect" { target vect_int } } } */ +/* { dg-final { scan-tree-dump {no alias between [^\n]* when [^\n]* step[^ ]* \* 2[)]* is outside \(-6, 6\)} "vect" { target vect_int } } } */ +/* { dg-final { scan-tree-dump {no alias between [^\n]* when [^\n]* step[^ ]* \* 2[)]* is outside \(-8, 8\)} "vect" { target vect_int } } } */ +/* { dg-final { scan-tree-dump {run-time check [^\n]* abs \([^*]* \* 2[)]* >= 8} "vect" { target vect_int } } } */ + +/* { dg-final { scan-tree-dump {no alias between [^\n]* when [^\n]* step[^ ]* \* 4[)]* is outside \(-8, 8\)} "vect" { target { vect_int || vect_float } } } } */ +/* { dg-final { scan-tree-dump {no alias between [^\n]* when [^\n]* step[^ ]* \* 4[)]* is outside \(-12, 12\)} "vect" { target { vect_int || vect_float } } } } */ +/* { dg-final { scan-tree-dump {no alias between [^\n]* when [^\n]* step[^ ]* \* 4[)]* is outside \(-16, 16\)} "vect" { target { vect_int || vect_float } } } } */ +/* { dg-final { scan-tree-dump {run-time check [^\n]* abs \([^*]* \* 4[)]* >= 16} "vect" { target { vect_int || vect_float } } } } */ + +/* { dg-final { scan-tree-dump {no alias between [^\n]* when [^\n]* step[^ ]* \* 8[)]* is outside \(-16, 16\)} "vect" { target vect_double } } } */ +/* { dg-final { scan-tree-dump {no alias between [^\n]* when [^\n]* step[^ ]* \* 8[)]* is outside \(-24, 24\)} "vect" { target vect_double } } } */ +/* { dg-final { scan-tree-dump {no alias between [^\n]* when [^\n]* step[^ ]* \* 8[)]* is outside \(-32, 32\)} "vect" { target vect_double } } } */ +/* { dg-final { scan-tree-dump {run-time check [^\n]* abs \([^*]* \* 8[)]* >= 32} "vect" { target vect_double } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-alias-check-12.c b/gcc/testsuite/gcc.dg/vect/vect-alias-check-12.c new file mode 100644 index 0000000..a44c9bb --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-alias-check-12.c @@ -0,0 +1,99 @@ +/* { dg-do run } */ + +#define N 87 +#define M 7 + +typedef signed char sc; +typedef unsigned char uc; +typedef signed short ss; +typedef unsigned short us; +typedef int si; +typedef unsigned int ui; +typedef signed long long sll; +typedef unsigned long long ull; + +#define FOR_EACH_TYPE(M) \ + M (sc) M (uc) \ + M (ss) M (us) \ + M (si) M (ui) \ + M (sll) M (ull) \ + M (float) M (double) + +#define TEST_VALUE1(I) ((I) * 5 / 2) +#define TEST_VALUE2(I) ((I) * 11 / 5) + +#define ADD_TEST(TYPE) \ + void __attribute__((noinline, noclone)) \ + test_##TYPE (TYPE *restrict a, TYPE *restrict b, \ + int step) \ + { \ + step = step & M; \ + for (int i = 0; i < N; ++i) \ + { \ + TYPE r1 = a[i * step + 0] += 1; \ + a[i * step + 1] += 2; \ + a[i * step + 2] += 4; \ + a[i * step + 3] += 8; \ + b[i] += r1; \ + } \ + } \ + \ + void __attribute__((noinline, noclone)) \ + ref_##TYPE (TYPE *restrict a, TYPE *restrict b, \ + int step) \ + { \ + for (unsigned short i = 0; i < N; ++i) \ + { \ + TYPE r1 = a[i * step + 0] += 1; \ + a[i * step + 1] += 2; \ + a[i * step + 2] += 4; \ + a[i * step + 3] += 8; \ + b[i] += r1; \ + asm volatile (""); \ + } \ + } + +#define DO_TEST(TYPE) \ + for (int j = 0; j <= M; ++j) \ + { \ + TYPE a1[N * M], a2[N * M], b1[N], b2[N]; \ + for (int i = 0; i < N * M; ++i) \ + a1[i] = a2[i] = TEST_VALUE1 (i); \ + for (int i = 0; i < N; ++i) \ + b1[i] = b2[i] = TEST_VALUE2 (i); \ + test_##TYPE (a1, b1, j); \ + ref_##TYPE (a2, b2, j); \ + if (__builtin_memcmp (a1, a2, sizeof (a1)) != 0) \ + __builtin_abort (); \ + if (__builtin_memcmp (b1, b2, sizeof (b1)) != 0) \ + __builtin_abort (); \ + } + +FOR_EACH_TYPE (ADD_TEST) + +int +main (void) +{ + FOR_EACH_TYPE (DO_TEST) + return 0; +} + +/* { dg-final { scan-tree-dump {no alias between [^\n]* when [^\n]* [_a-z][^ ]* is outside \[0, 2\)} "vect" { target vect_int } } } */ +/* { dg-final { scan-tree-dump {no alias between [^\n]* when [^\n]* [_a-z][^ ]* is outside \[0, 3\)} "vect" { target vect_int } } } */ +/* { dg-final { scan-tree-dump {no alias between [^\n]* when [^\n]* [_a-z][^ ]* is outside \[0, 4\)} "vect" { target vect_int } } } */ +/* { dg-final { scan-tree-dump {run-time check [^\n]* unsigned \([^*]*\) >= 4} "vect" { target vect_int } } } */ + +/* { dg-final { scan-tree-dump {no alias between [^\n]* when [^\n]* [_a-z][^ ]* \* 2[)]* is outside \[0, 4\)} "vect" { target vect_int } } } */ +/* { dg-final { scan-tree-dump {no alias between [^\n]* when [^\n]* [_a-z][^ ]* \* 2[)]* is outside \[0, 6\)} "vect" { target vect_int } } } */ +/* { dg-final { scan-tree-dump {no alias between [^\n]* when [^\n]* [_a-z][^ ]* \* 2[)]* is outside \[0, 8\)} "vect" { target vect_int } } } */ +/* { dg-final { scan-tree-dump {run-time check [^\n]* unsigned \([^*]* \* 2[)]* >= 8} "vect" { target vect_int } } } */ + +/* { dg-final { scan-tree-dump {no alias between [^\n]* when [^\n]* [_a-z][^ ]* \* 4[)]* is outside \[0, 8\)} "vect" { target { vect_int || vect_float } }} } */ +/* { dg-final { scan-tree-dump {no alias between [^\n]* when [^\n]* [_a-z][^ ]* \* 4[)]* is outside \[0, 12\)} "vect" { target { vect_int || vect_float } }} } */ +/* { dg-final { scan-tree-dump {no alias between [^\n]* when [^\n]* [_a-z][^ ]* \* 4[)]* is outside \[0, 16\)} "vect" { target { vect_int || vect_float } }} } */ +/* { dg-final { scan-tree-dump {run-time check [^\n]* unsigned \([^*]* \* 4[)]* >= 16} "vect" { target { vect_int || vect_float } }} } */ + +/* { dg-final { scan-tree-dump {no alias between [^\n]* when [^\n]* [_a-z][^ ]* \* 8[)]* is outside \[0, 16\)} "vect" { target vect_double } } } */ +/* { dg-final { scan-tree-dump {no alias between [^\n]* when [^\n]* [_a-z][^ ]* \* 8[)]* is outside \[0, 24\)} "vect" { target vect_double } } } */ +/* { dg-final { scan-tree-dump {no alias between [^\n]* when [^\n]* [_a-z][^ ]* \* 8[)]* is outside \[0, 32\)} "vect" { target vect_double } } } */ +/* { dg-final { scan-tree-dump {run-time check [^\n]* unsigned \([^*]* \* 8[)]* >= 32} "vect" { target vect_double } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-alias-check-8.c b/gcc/testsuite/gcc.dg/vect/vect-alias-check-8.c new file mode 100644 index 0000000..5aeaf21 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-alias-check-8.c @@ -0,0 +1,62 @@ +/* { dg-do run } */ + +#define N 200 +#define DIST 32 + +typedef signed char sc; +typedef unsigned char uc; +typedef signed short ss; +typedef unsigned short us; +typedef int si; +typedef unsigned int ui; +typedef signed long long sll; +typedef unsigned long long ull; + +#define FOR_EACH_TYPE(M) \ + M (sc) M (uc) \ + M (ss) M (us) \ + M (si) M (ui) \ + M (sll) M (ull) \ + M (float) M (double) + +#define TEST_VALUE(I) ((I) * 5 / 2) + +#define ADD_TEST(TYPE) \ + TYPE a_##TYPE[N * 2]; \ + void __attribute__((noinline, noclone)) \ + test_##TYPE (int x, int y) \ + { \ + for (int i = 0; i < N; ++i) \ + a_##TYPE[i + x] += a_##TYPE[i + y]; \ + } + +#define DO_TEST(TYPE) \ + for (int i = 0; i < DIST * 2; ++i) \ + { \ + for (int j = 0; j < N + DIST * 2; ++j) \ + a_##TYPE[j] = TEST_VALUE (j); \ + test_##TYPE (i, DIST); \ + for (int j = 0; j < N + DIST * 2; ++j) \ + { \ + TYPE expected; \ + if (j < i || j >= i + N) \ + expected = TEST_VALUE (j); \ + else if (i <= DIST) \ + expected = ((TYPE) TEST_VALUE (j) \ + + (TYPE) TEST_VALUE (j - i + DIST)); \ + else \ + expected = ((TYPE) TEST_VALUE (j) \ + + a_##TYPE[j - i + DIST]); \ + if (expected != a_##TYPE[j]) \ + __builtin_abort (); \ + } \ + } + +FOR_EACH_TYPE (ADD_TEST) + +int +main (void) +{ + FOR_EACH_TYPE (DO_TEST) + return 0; +} diff --git a/gcc/testsuite/gcc.dg/vect/vect-alias-check-9.c b/gcc/testsuite/gcc.dg/vect/vect-alias-check-9.c new file mode 100644 index 0000000..9bc38af --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-alias-check-9.c @@ -0,0 +1,55 @@ +/* { dg-do run } */ + +#define N 200 +#define M 4 + +typedef signed char sc; +typedef unsigned char uc; +typedef signed short ss; +typedef unsigned short us; +typedef int si; +typedef unsigned int ui; +typedef signed long long sll; +typedef unsigned long long ull; + +#define FOR_EACH_TYPE(M) \ + M (sc) M (uc) \ + M (ss) M (us) \ + M (si) M (ui) \ + M (sll) M (ull) \ + M (float) M (double) + +#define TEST_VALUE(I) ((I) * 5 / 2) + +#define ADD_TEST(TYPE) \ + void __attribute__((noinline, noclone)) \ + test_##TYPE (TYPE *a, TYPE *b) \ + { \ + for (int i = 0; i < N; i += 2) \ + { \ + a[i + 0] = b[i + 0] + 2; \ + a[i + 1] = b[i + 1] + 3; \ + } \ + } + +#define DO_TEST(TYPE) \ + for (int j = 1; j < M; ++j) \ + { \ + TYPE a[N + M]; \ + for (int i = 0; i < N + M; ++i) \ + a[i] = TEST_VALUE (i); \ + test_##TYPE (a + j, a); \ + for (int i = 0; i < N; i += 2) \ + if (a[i + j] != (TYPE) (a[i] + 2) \ + || a[i + j + 1] != (TYPE) (a[i + 1] + 3)) \ + __builtin_abort (); \ + } + +FOR_EACH_TYPE (ADD_TEST) + +int +main (void) +{ + FOR_EACH_TYPE (DO_TEST) + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/strided_load_8.c b/gcc/testsuite/gcc.target/aarch64/sve/strided_load_8.c new file mode 100644 index 0000000..cd61801 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/strided_load_8.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize" } */ + +void +foo (double *x, int m) +{ + for (int i = 0; i < 256; ++i) + x[i * m] += x[i * m]; +} + +/* { dg-final { scan-assembler-times {\tcbz\tw1,} 1 } } */ +/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, } 1 } } */ +/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, } 1 } } */ +/* { dg-final { scan-assembler-times {\tldr\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tstr\t} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/var_stride_1.c b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_1.c new file mode 100644 index 0000000..68baba9 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_1.c @@ -0,0 +1,27 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#define TYPE int +#define SIZE 257 + +void __attribute__ ((weak)) +f (TYPE *x, TYPE *y, unsigned short n, long m __attribute__((unused))) +{ + for (int i = 0; i < SIZE; ++i) + x[i * n] += y[i * n]; +} + +/* { dg-final { scan-assembler {\tld1w\tz[0-9]+} } } */ +/* { dg-final { scan-assembler {\tst1w\tz[0-9]+} } } */ +/* { dg-final { scan-assembler {\tldr\tw[0-9]+} } } */ +/* { dg-final { scan-assembler {\tstr\tw[0-9]+} } } */ +/* Should multiply by (VF-1)*4 rather than (257-1)*4. */ +/* { dg-final { scan-assembler-not {, 1024} } } */ +/* { dg-final { scan-assembler-not {\t.bfiz\t} } } */ +/* { dg-final { scan-assembler-not {lsl[^\n]*[, ]10} } } */ +/* { dg-final { scan-assembler-not {\tcmp\tx[0-9]+, 0} } } */ +/* { dg-final { scan-assembler-not {\tcmp\tw[0-9]+, 0} } } */ +/* { dg-final { scan-assembler-not {\tcsel\tx[0-9]+} } } */ +/* Two range checks and a check for n being zero. */ +/* { dg-final { scan-assembler-times {\tcmp\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tccmp\t} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/var_stride_1.h b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_1.h new file mode 100644 index 0000000..43b098f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_1.h @@ -0,0 +1,61 @@ +extern void abort (void) __attribute__ ((noreturn)); + +#define MARGIN 6 + +void __attribute__ ((weak, optimize ("no-tree-vectorize"))) +test (int n, int m, int offset) +{ + int abs_n = (n < 0 ? -n : n); + int abs_m = (m < 0 ? -m : m); + int max_i = (abs_n > abs_m ? abs_n : abs_m); + int abs_offset = (offset < 0 ? -offset : offset); + int size = MARGIN * 2 + max_i * SIZE + abs_offset; + TYPE *array = (TYPE *) __builtin_alloca (size * sizeof (TYPE)); + for (int i = 0; i < size; ++i) + array[i] = i; + int base_x = offset < 0 ? MARGIN - offset : MARGIN; + int base_y = offset < 0 ? MARGIN : MARGIN + offset; + int start_x = n < 0 ? base_x - n * (SIZE - 1) : base_x; + int start_y = m < 0 ? base_y - m * (SIZE - 1) : base_y; + f (&array[start_x], &array[start_y], n, m); + int j = 0; + int start = (n < 0 ? size - 1 : 0); + int end = (n < 0 ? -1 : size); + int inc = (n < 0 ? -1 : 1); + for (int i = start; i != end; i += inc) + { + if (j == SIZE || i != start_x + j * n) + { + if (array[i] != i) + abort (); + } + else if (n == 0) + { + TYPE sum = i; + for (; j < SIZE; j++) + { + int next_y = start_y + j * m; + if (n >= 0 ? next_y < i : next_y > i) + sum += array[next_y]; + else if (next_y == i) + sum += sum; + else + sum += next_y; + } + if (array[i] != sum) + abort (); + } + else + { + int next_y = start_y + j * m; + TYPE base = i; + if (n >= 0 ? next_y < i : next_y > i) + base += array[next_y]; + else + base += next_y; + if (array[i] != base) + abort (); + j += 1; + } + } +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/var_stride_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_1_run.c new file mode 100644 index 0000000..65bb9fd --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_1_run.c @@ -0,0 +1,14 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#include "var_stride_1.c" +#include "var_stride_1.h" + +int +main (void) +{ + for (int n = 0; n < 10; ++n) + for (int offset = -33; offset <= 33; ++offset) + test (n, n, offset); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/var_stride_2.c b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_2.c new file mode 100644 index 0000000..112e84f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_2.c @@ -0,0 +1,25 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#define TYPE int +#define SIZE 257 + +void __attribute__ ((weak)) +f (TYPE *x, TYPE *y, unsigned short n, unsigned short m) +{ + for (int i = 0; i < SIZE; ++i) + x[i * n] += y[i * m]; +} + +/* { dg-final { scan-assembler {\tld1w\tz[0-9]+} } } */ +/* { dg-final { scan-assembler {\tst1w\tz[0-9]+} } } */ +/* { dg-final { scan-assembler {\tldr\tw[0-9]+} } } */ +/* { dg-final { scan-assembler {\tstr\tw[0-9]+} } } */ +/* Should multiply by (257-1)*4 rather than (VF-1)*4. */ +/* { dg-final { scan-assembler-times {\tadd\tx[0-9]+, x[0-9]+, x[0-9]+, lsl 10\n} 2 } } */ +/* { dg-final { scan-assembler-not {\tcmp\tx[0-9]+, 0} } } */ +/* { dg-final { scan-assembler-not {\tcmp\tw[0-9]+, 0} } } */ +/* { dg-final { scan-assembler-not {\tcsel\tx[0-9]+} } } */ +/* Two range checks and a check for n being zero. (m being zero is OK.) */ +/* { dg-final { scan-assembler-times {\tcmp\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tccmp\t} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/var_stride_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_2_run.c new file mode 100644 index 0000000..313136a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_2_run.c @@ -0,0 +1,18 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#include "var_stride_2.c" +#include "var_stride_1.h" + +int +main (void) +{ + for (int n = 0; n < 10; ++n) + for (int m = 0; m < 10; ++m) + for (int offset = -17; offset <= 17; ++offset) + { + test (n, m, offset); + test (n, m, offset + n * (SIZE - 1)); + } + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/var_stride_3.c b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_3.c new file mode 100644 index 0000000..70792ff --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_3.c @@ -0,0 +1,27 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#define TYPE int +#define SIZE 257 + +void __attribute__ ((weak)) +f (TYPE *x, TYPE *y, int n, long m __attribute__((unused))) +{ + for (int i = 0; i < SIZE; ++i) + x[i * n] += y[i * n]; +} + +/* { dg-final { scan-assembler {\tld1w\tz[0-9]+} } } */ +/* { dg-final { scan-assembler {\tst1w\tz[0-9]+} } } */ +/* { dg-final { scan-assembler {\tldr\tw[0-9]+} } } */ +/* { dg-final { scan-assembler {\tstr\tw[0-9]+} } } */ +/* Should multiply by (VF-1)*4 rather than (257-1)*4. */ +/* { dg-final { scan-assembler-not {, 1024} } } */ +/* { dg-final { scan-assembler-not {\t.bfiz\t} } } */ +/* { dg-final { scan-assembler-not {lsl[^\n]*[, ]10} } } */ +/* { dg-final { scan-assembler-not {\tcmp\tx[0-9]+, 0} } } */ +/* { dg-final { scan-assembler {\tcmp\tw2, 0} } } */ +/* { dg-final { scan-assembler-times {\tcsel\tx[0-9]+} 2 } } */ +/* Two range checks and a check for n being zero. */ +/* { dg-final { scan-assembler {\tcmp\t} } } */ +/* { dg-final { scan-assembler-times {\tccmp\t} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/var_stride_3_run.c b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_3_run.c new file mode 100644 index 0000000..e10e70d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_3_run.c @@ -0,0 +1,14 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#include "var_stride_3.c" +#include "var_stride_1.h" + +int +main (void) +{ + for (int n = -10; n < 10; ++n) + for (int offset = -33; offset <= 33; ++offset) + test (n, n, offset); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/var_stride_4.c b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_4.c new file mode 100644 index 0000000..4bcdb5d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_4.c @@ -0,0 +1,25 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#define TYPE int +#define SIZE 257 + +void __attribute__ ((weak)) +f (TYPE *x, TYPE *y, int n, int m) +{ + for (int i = 0; i < SIZE; ++i) + x[i * n] += y[i * m]; +} + +/* { dg-final { scan-assembler {\tld1w\tz[0-9]+} } } */ +/* { dg-final { scan-assembler {\tst1w\tz[0-9]+} } } */ +/* { dg-final { scan-assembler {\tldr\tw[0-9]+} } } */ +/* { dg-final { scan-assembler {\tstr\tw[0-9]+} } } */ +/* Should multiply by (257-1)*4 rather than (VF-1)*4. */ +/* { dg-final { scan-assembler-times {\tlsl\tx[0-9]+, x[0-9]+, 10\n} 2 } } */ +/* { dg-final { scan-assembler {\tcmp\tw2, 0} } } */ +/* { dg-final { scan-assembler {\tcmp\tw3, 0} } } */ +/* { dg-final { scan-assembler-times {\tcsel\tx[0-9]+} 4 } } */ +/* Two range checks and a check for n being zero. (m being zero is OK.) */ +/* { dg-final { scan-assembler {\tcmp\t} } } */ +/* { dg-final { scan-assembler-times {\tccmp\t} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/var_stride_4_run.c b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_4_run.c new file mode 100644 index 0000000..7b8645e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_4_run.c @@ -0,0 +1,18 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#include "var_stride_4.c" +#include "var_stride_1.h" + +int +main (void) +{ + for (int n = -10; n < 10; ++n) + for (int m = -10; m < 10; ++m) + for (int offset = -17; offset <= 17; ++offset) + { + test (n, m, offset); + test (n, m, offset + n * (SIZE - 1)); + } + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/var_stride_5.c b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_5.c new file mode 100644 index 0000000..688f3be6 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_5.c @@ -0,0 +1,27 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#define TYPE double +#define SIZE 257 + +void __attribute__ ((weak)) +f (TYPE *x, TYPE *y, long n, long m __attribute__((unused))) +{ + for (int i = 0; i < SIZE; ++i) + x[i * n] += y[i * n]; +} + +/* { dg-final { scan-assembler {\tld1d\tz[0-9]+} } } */ +/* { dg-final { scan-assembler {\tst1d\tz[0-9]+} } } */ +/* { dg-final { scan-assembler {\tldr\td[0-9]+} } } */ +/* { dg-final { scan-assembler {\tstr\td[0-9]+} } } */ +/* Should multiply by (VF-1)*8 rather than (257-1)*8. */ +/* { dg-final { scan-assembler-not {, 2048} } } */ +/* { dg-final { scan-assembler-not {\t.bfiz\t} } } */ +/* { dg-final { scan-assembler-not {lsl[^\n]*[, ]11} } } */ +/* { dg-final { scan-assembler {\tcmp\tx[0-9]+, 0} } } */ +/* { dg-final { scan-assembler-not {\tcmp\tw[0-9]+, 0} } } */ +/* { dg-final { scan-assembler-times {\tcsel\tx[0-9]+} 2 } } */ +/* Two range checks and a check for n being zero. */ +/* { dg-final { scan-assembler {\tcmp\t} } } */ +/* { dg-final { scan-assembler-times {\tccmp\t} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/var_stride_5_run.c b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_5_run.c new file mode 100644 index 0000000..3360d0e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_5_run.c @@ -0,0 +1,14 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#include "var_stride_5.c" +#include "var_stride_1.h" + +int +main (void) +{ + for (int n = -10; n < 10; ++n) + for (int offset = -33; offset <= 33; ++offset) + test (n, n, offset); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/var_stride_6.c b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_6.c new file mode 100644 index 0000000..37e5d0f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_6.c @@ -0,0 +1,25 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#define TYPE long +#define SIZE 257 + +void __attribute__ ((weak)) +f (TYPE *x, TYPE *y, long n, long m) +{ + for (int i = 0; i < SIZE; ++i) + x[i * n] += y[i * m]; +} + +/* { dg-final { scan-assembler {\tld1d\tz[0-9]+} } } */ +/* { dg-final { scan-assembler {\tst1d\tz[0-9]+} } } */ +/* { dg-final { scan-assembler {\tldr\tx[0-9]+} } } */ +/* { dg-final { scan-assembler {\tstr\tx[0-9]+} } } */ +/* Should multiply by (257-1)*8 rather than (VF-1)*8. */ +/* { dg-final { scan-assembler-times {lsl\tx[0-9]+, x[0-9]+, 11} 2 } } */ +/* { dg-final { scan-assembler {\tcmp\tx[0-9]+, 0} } } */ +/* { dg-final { scan-assembler-not {\tcmp\tw[0-9]+, 0} } } */ +/* { dg-final { scan-assembler-times {\tcsel\tx[0-9]+} 4 } } */ +/* Two range checks and a check for n being zero. (m being zero is OK.) */ +/* { dg-final { scan-assembler {\tcmp\t} } } */ +/* { dg-final { scan-assembler-times {\tccmp\t} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/var_stride_6_run.c b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_6_run.c new file mode 100644 index 0000000..8efaf4b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_6_run.c @@ -0,0 +1,18 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#include "var_stride_6.c" +#include "var_stride_1.h" + +int +main (void) +{ + for (int n = -10; n < 10; ++n) + for (int m = -10; m < 10; ++m) + for (int offset = -17; offset <= 17; ++offset) + { + test (n, m, offset); + test (n, m, offset + n * (SIZE - 1)); + } + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/var_stride_7.c b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_7.c new file mode 100644 index 0000000..65e2035 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_7.c @@ -0,0 +1,26 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#define TYPE double +#define SIZE 257 + +void __attribute__ ((weak)) +f (TYPE *x, TYPE *y, long n, long m __attribute__((unused))) +{ + for (int i = 0; i < SIZE; ++i) + x[i * n] += y[i]; +} + +/* { dg-final { scan-assembler {\tld1d\tz[0-9]+} } } */ +/* { dg-final { scan-assembler {\tst1d\tz[0-9]+} } } */ +/* { dg-final { scan-assembler {\tldr\td[0-9]+} } } */ +/* { dg-final { scan-assembler {\tstr\td[0-9]+} } } */ +/* Should multiply by (257-1)*8 rather than (VF-1)*8. */ +/* { dg-final { scan-assembler-times {\tadd\tx[0-9]+, x1, 2048} 1 } } */ +/* { dg-final { scan-assembler-times {lsl\tx[0-9]+, x[0-9]+, 11} 1 } } */ +/* { dg-final { scan-assembler {\tcmp\tx[0-9]+, 0} } } */ +/* { dg-final { scan-assembler-not {\tcmp\tw[0-9]+, 0} } } */ +/* { dg-final { scan-assembler-times {\tcsel\tx[0-9]+} 2 } } */ +/* Two range checks and a check for n being zero. */ +/* { dg-final { scan-assembler {\tcmp\t} } } */ +/* { dg-final { scan-assembler-times {\tccmp\t} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/var_stride_7_run.c b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_7_run.c new file mode 100644 index 0000000..5795145 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_7_run.c @@ -0,0 +1,14 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#include "var_stride_7.c" +#include "var_stride_1.h" + +int +main (void) +{ + for (int n = -10; n < 10; ++n) + for (int offset = -33; offset <= 33; ++offset) + test (n, 1, offset); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/var_stride_8.c b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_8.c new file mode 100644 index 0000000..5224a5f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_8.c @@ -0,0 +1,26 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#define TYPE long +#define SIZE 257 + +void +f (TYPE *x, TYPE *y, long n __attribute__((unused)), long m) +{ + for (int i = 0; i < SIZE; ++i) + x[i] += y[i * m]; +} + +/* { dg-final { scan-assembler {\tld1d\tz[0-9]+} } } */ +/* { dg-final { scan-assembler {\tst1d\tz[0-9]+} } } */ +/* { dg-final { scan-assembler {\tldr\tx[0-9]+} } } */ +/* { dg-final { scan-assembler {\tstr\tx[0-9]+} } } */ +/* Should multiply by (257-1)*8 rather than (VF-1)*8. */ +/* { dg-final { scan-assembler-times {\tadd\tx[0-9]+, x0, 2048} 1 } } */ +/* { dg-final { scan-assembler-times {lsl\tx[0-9]+, x[0-9]+, 11} 1 } } */ +/* { dg-final { scan-assembler {\tcmp\tx[0-9]+, 0} } } */ +/* { dg-final { scan-assembler-not {\tcmp\tw[0-9]+, 0} } } */ +/* { dg-final { scan-assembler-times {\tcsel\tx[0-9]+} 2 } } */ +/* Two range checks only; doesn't matter whether n is zero. */ +/* { dg-final { scan-assembler {\tcmp\t} } } */ +/* { dg-final { scan-assembler-times {\tccmp\t} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/var_stride_8_run.c b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_8_run.c new file mode 100644 index 0000000..6d4b2a1 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_8_run.c @@ -0,0 +1,14 @@ +/* { dg-do run { target { aarch64_sve_hw } } } */ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#include "var_stride_8.c" +#include "var_stride_1.h" + +int +main (void) +{ + for (int n = -10; n < 10; ++n) + for (int offset = -33; offset <= 33; ++offset) + test (1, n, offset); + return 0; +} diff --git a/gcc/testsuite/gfortran.dg/vect/vect-alias-check-1.F90 b/gcc/testsuite/gfortran.dg/vect/vect-alias-check-1.F90 new file mode 100644 index 0000000..ea9ba85 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/vect/vect-alias-check-1.F90 @@ -0,0 +1,102 @@ +! { dg-do run } +! { dg-additional-options "-fno-inline" } + +#define N 200 + +#define TEST_VALUE(I) ((I) * 5 / 2) + +subroutine setup(a) + real :: a(N) + do i = 1, N + a(i) = TEST_VALUE(i) + end do +end subroutine + +subroutine check(a, x, gap) + real :: a(N), temp, x + integer :: gap + do i = 1, N - gap + temp = a(i + gap) + x + if (a(i) /= temp) call abort + end do + do i = N - gap + 1, N + temp = TEST_VALUE(i) + if (a(i) /= temp) call abort + end do +end subroutine + +subroutine testa(a, x, base, n) + real :: a(n), x + integer :: base, n + do i = n, 2, -1 + a(base + i - 1) = a(base + i) + x + end do +end subroutine testa + +subroutine testb(a, x, base, n) + real :: a(n), x + integer :: base + do i = n, 4, -1 + a(base + i - 3) = a(base + i) + x + end do +end subroutine testb + +subroutine testc(a, x, base, n) + real :: a(n), x + integer :: base + do i = n, 8, -1 + a(base + i - 7) = a(base + i) + x + end do +end subroutine testc + +subroutine testd(a, x, base, n) + real :: a(n), x + integer :: base + do i = n, 16, -1 + a(base + i - 15) = a(base + i) + x + end do +end subroutine testd + +subroutine teste(a, x, base, n) + real :: a(n), x + integer :: base + do i = n, 32, -1 + a(base + i - 31) = a(base + i) + x + end do +end subroutine teste + +subroutine testf(a, x, base, n) + real :: a(n), x + integer :: base + do i = n, 64, -1 + a(base + i - 63) = a(base + i) + x + end do +end subroutine testf + +program main + real :: a(N) + + call setup(a) + call testa(a, 91.0, 0, N) + call check(a, 91.0, 1) + + call setup(a) + call testb(a, 55.0, 0, N) + call check(a, 55.0, 3) + + call setup(a) + call testc(a, 72.0, 0, N) + call check(a, 72.0, 7) + + call setup(a) + call testd(a, 69.0, 0, N) + call check(a, 69.0, 15) + + call setup(a) + call teste(a, 44.0, 0, N) + call check(a, 44.0, 31) + + call setup(a) + call testf(a, 39.0, 0, N) + call check(a, 39.0, 63) +end program diff --git a/gcc/tree-data-ref.c b/gcc/tree-data-ref.c index 3b15b5d..e39067d 100644 --- a/gcc/tree-data-ref.c +++ b/gcc/tree-data-ref.c @@ -95,6 +95,9 @@ along with GCC; see the file COPYING3. If not see #include "tree-affine.h" #include "params.h" #include "builtins.h" +#include "stringpool.h" +#include "tree-vrp.h" +#include "tree-ssanames.h" static struct datadep_stats { @@ -1305,18 +1308,6 @@ runtime_alias_check_p (ddr_p ddr, struct loop *loop, bool speed_p) return false; } - /* FORNOW: We don't support creating runtime alias tests for non-constant - step. */ - if (TREE_CODE (DR_STEP (DDR_A (ddr))) != INTEGER_CST - || TREE_CODE (DR_STEP (DDR_B (ddr))) != INTEGER_CST) - { - if (dump_enabled_p ()) - dump_printf (MSG_MISSED_OPTIMIZATION, - "runtime alias check not supported for non-constant " - "step\n"); - return false; - } - return true; } @@ -1331,11 +1322,13 @@ static bool operator == (const dr_with_seg_len& d1, const dr_with_seg_len& d2) { - return operand_equal_p (DR_BASE_ADDRESS (d1.dr), - DR_BASE_ADDRESS (d2.dr), 0) - && data_ref_compare_tree (DR_OFFSET (d1.dr), DR_OFFSET (d2.dr)) == 0 - && data_ref_compare_tree (DR_INIT (d1.dr), DR_INIT (d2.dr)) == 0 - && data_ref_compare_tree (d1.seg_len, d2.seg_len) == 0; + return (operand_equal_p (DR_BASE_ADDRESS (d1.dr), + DR_BASE_ADDRESS (d2.dr), 0) + && data_ref_compare_tree (DR_OFFSET (d1.dr), DR_OFFSET (d2.dr)) == 0 + && data_ref_compare_tree (DR_INIT (d1.dr), DR_INIT (d2.dr)) == 0 + && data_ref_compare_tree (d1.seg_len, d2.seg_len) == 0 + && known_eq (d1.access_size, d2.access_size) + && d1.align == d2.align); } /* Comparison function for sorting objects of dr_with_seg_len_pair_t @@ -1415,7 +1408,7 @@ comp_dr_with_seg_len_pair (const void *pa_, const void *pb_) void prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs, - poly_uint64 factor) + poly_uint64) { /* Sort the collected data ref pairs so that we can scan them once to combine all possible aliasing checks. */ @@ -1461,6 +1454,8 @@ prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs, } poly_int64 init_a1, init_a2; + /* Only consider cases in which the distance between the initial + DR_A1 and the initial DR_A2 is known at compile time. */ if (!operand_equal_p (DR_BASE_ADDRESS (dr_a1->dr), DR_BASE_ADDRESS (dr_a2->dr), 0) || !operand_equal_p (DR_OFFSET (dr_a1->dr), @@ -1480,141 +1475,79 @@ prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs, std::swap (init_a1, init_a2); } - /* Only merge const step data references. */ - poly_int64 step_a1, step_a2; - if (!poly_int_tree_p (DR_STEP (dr_a1->dr), &step_a1) - || !poly_int_tree_p (DR_STEP (dr_a2->dr), &step_a2)) - continue; + /* Work out what the segment length would be if we did combine + DR_A1 and DR_A2: - bool neg_step = maybe_lt (step_a1, 0) || maybe_lt (step_a2, 0); + - If DR_A1 and DR_A2 have equal lengths, that length is + also the combined length. - /* DR_A1 and DR_A2 must go in the same direction. */ - if (neg_step && (maybe_gt (step_a1, 0) || maybe_gt (step_a2, 0))) - continue; + - If DR_A1 and DR_A2 both have negative "lengths", the combined + length is the lower bound on those lengths. - poly_uint64 seg_len_a1 = 0, seg_len_a2 = 0; - bool const_seg_len_a1 = poly_int_tree_p (dr_a1->seg_len, - &seg_len_a1); - bool const_seg_len_a2 = poly_int_tree_p (dr_a2->seg_len, - &seg_len_a2); - - /* We need to compute merged segment length at compilation time for - dr_a1 and dr_a2, which is impossible if either one has non-const - segment length. */ - if ((!const_seg_len_a1 || !const_seg_len_a2) - && maybe_ne (step_a1, step_a2)) - continue; + - If DR_A1 and DR_A2 both have positive lengths, the combined + length is the upper bound on those lengths. - bool do_remove = false; - poly_uint64 diff = init_a2 - init_a1; - poly_uint64 min_seg_len_b; - tree new_seg_len; + Other cases are unlikely to give a useful combination. - if (!poly_int_tree_p (dr_b1->seg_len, &min_seg_len_b)) + The lengths both have sizetype, so the sign is taken from + the step instead. */ + if (!operand_equal_p (dr_a1->seg_len, dr_a2->seg_len, 0)) { - tree step_b = DR_STEP (dr_b1->dr); - if (!tree_fits_shwi_p (step_b)) + poly_uint64 seg_len_a1, seg_len_a2; + if (!poly_int_tree_p (dr_a1->seg_len, &seg_len_a1) + || !poly_int_tree_p (dr_a2->seg_len, &seg_len_a2)) continue; - min_seg_len_b = factor * abs_hwi (tree_to_shwi (step_b)); - } - - /* Now we try to merge alias check dr_a1 & dr_b and dr_a2 & dr_b. - - Case A: - check if the following condition is satisfied: - - DIFF - SEGMENT_LENGTH_A < SEGMENT_LENGTH_B - where DIFF = DR_A2_INIT - DR_A1_INIT. However, - SEGMENT_LENGTH_A or SEGMENT_LENGTH_B may not be constant so we - have to make a best estimation. We can get the minimum value - of SEGMENT_LENGTH_B as a constant, represented by MIN_SEG_LEN_B, - then either of the following two conditions can guarantee the - one above: + tree indicator_a = dr_direction_indicator (dr_a1->dr); + if (TREE_CODE (indicator_a) != INTEGER_CST) + continue; - 1: DIFF <= MIN_SEG_LEN_B - 2: DIFF - SEGMENT_LENGTH_A < MIN_SEG_LEN_B - Because DIFF - SEGMENT_LENGTH_A is done in sizetype, we need - to take care of wrapping behavior in it. + tree indicator_b = dr_direction_indicator (dr_a2->dr); + if (TREE_CODE (indicator_b) != INTEGER_CST) + continue; - Case B: - If the left segment does not extend beyond the start of the - right segment the new segment length is that of the right - plus the segment distance. The condition is like: + int sign_a = tree_int_cst_sgn (indicator_a); + int sign_b = tree_int_cst_sgn (indicator_b); - DIFF >= SEGMENT_LENGTH_A ;SEGMENT_LENGTH_A is a constant. + poly_uint64 new_seg_len; + if (sign_a <= 0 && sign_b <= 0) + new_seg_len = lower_bound (seg_len_a1, seg_len_a2); + else if (sign_a >= 0 && sign_b >= 0) + new_seg_len = upper_bound (seg_len_a1, seg_len_a2); + else + continue; - Note 1: Case A.2 and B combined together effectively merges every - dr_a1 & dr_b and dr_a2 & dr_b when SEGMENT_LENGTH_A is const. + dr_a1->seg_len = build_int_cst (TREE_TYPE (dr_a1->seg_len), + new_seg_len); + dr_a1->align = MIN (dr_a1->align, known_alignment (new_seg_len)); + } - Note 2: Above description is based on positive DR_STEP, we need to - take care of negative DR_STEP for wrapping behavior. See PR80815 - for more information. */ - if (neg_step) - { - /* Adjust diff according to access size of both references. */ - diff += tree_to_poly_uint64 - (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a2->dr)))); - diff -= tree_to_poly_uint64 - (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a1->dr)))); - /* Case A.1. */ - if (known_le (diff, min_seg_len_b) - /* Case A.2 and B combined. */ - || const_seg_len_a2) - { - if (const_seg_len_a1 || const_seg_len_a2) - new_seg_len - = build_int_cstu (sizetype, - lower_bound (seg_len_a1 - diff, - seg_len_a2)); - else - new_seg_len - = size_binop (MINUS_EXPR, dr_a2->seg_len, - build_int_cstu (sizetype, diff)); + /* This is always positive due to the swap above. */ + poly_uint64 diff = init_a2 - init_a1; - dr_a2->seg_len = new_seg_len; - do_remove = true; - } - } - else + /* The new check will start at DR_A1. Make sure that its access + size encompasses the initial DR_A2. */ + if (maybe_lt (dr_a1->access_size, diff + dr_a2->access_size)) { - /* Case A.1. */ - if (known_le (diff, min_seg_len_b) - /* Case A.2 and B combined. */ - || const_seg_len_a1) - { - if (const_seg_len_a1 && const_seg_len_a2) - new_seg_len - = build_int_cstu (sizetype, - upper_bound (seg_len_a2 + diff, - seg_len_a1)); - else - new_seg_len - = size_binop (PLUS_EXPR, dr_a2->seg_len, - build_int_cstu (sizetype, diff)); - - dr_a1->seg_len = new_seg_len; - do_remove = true; - } + dr_a1->access_size = upper_bound (dr_a1->access_size, + diff + dr_a2->access_size); + unsigned int new_align = known_alignment (dr_a1->access_size); + dr_a1->align = MIN (dr_a1->align, new_align); } - - if (do_remove) + if (dump_enabled_p ()) { - if (dump_enabled_p ()) - { - dump_printf (MSG_NOTE, "merging ranges for "); - dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a1->dr)); - dump_printf (MSG_NOTE, ", "); - dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b1->dr)); - dump_printf (MSG_NOTE, " and "); - dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a2->dr)); - dump_printf (MSG_NOTE, ", "); - dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b2->dr)); - dump_printf (MSG_NOTE, "\n"); - } - alias_pairs->ordered_remove (neg_step ? i - 1 : i); - i--; + dump_printf (MSG_NOTE, "merging ranges for "); + dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a1->dr)); + dump_printf (MSG_NOTE, ", "); + dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b1->dr)); + dump_printf (MSG_NOTE, " and "); + dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a2->dr)); + dump_printf (MSG_NOTE, ", "); + dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b2->dr)); + dump_printf (MSG_NOTE, "\n"); } + alias_pairs->ordered_remove (i); + i--; } } } @@ -1654,7 +1587,9 @@ create_intersect_range_checks_index (struct loop *loop, tree *cond_expr, || DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr)) return false; - if (!tree_fits_uhwi_p (dr_a.seg_len) || !tree_fits_uhwi_p (dr_b.seg_len)) + poly_uint64 seg_len1, seg_len2; + if (!poly_int_tree_p (dr_a.seg_len, &seg_len1) + || !poly_int_tree_p (dr_b.seg_len, &seg_len2)) return false; if (!tree_fits_shwi_p (DR_STEP (dr_a.dr))) @@ -1669,19 +1604,42 @@ create_intersect_range_checks_index (struct loop *loop, tree *cond_expr, gcc_assert (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST); bool neg_step = tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0; - unsigned HOST_WIDE_INT abs_step - = absu_hwi (tree_to_shwi (DR_STEP (dr_a.dr))); + unsigned HOST_WIDE_INT abs_step = tree_to_shwi (DR_STEP (dr_a.dr)); + if (neg_step) + { + abs_step = -abs_step; + seg_len1 = -seg_len1; + seg_len2 = -seg_len2; + } + else + { + /* Include the access size in the length, so that we only have one + tree addition below. */ + seg_len1 += dr_a.access_size; + seg_len2 += dr_b.access_size; + } - unsigned HOST_WIDE_INT seg_len1 = tree_to_uhwi (dr_a.seg_len); - unsigned HOST_WIDE_INT seg_len2 = tree_to_uhwi (dr_b.seg_len); /* Infer the number of iterations with which the memory segment is accessed by DR. In other words, alias is checked if memory segment accessed by DR_A in some iterations intersect with memory segment accessed by DR_B in the same amount iterations. Note segnment length is a linear function of number of iterations with DR_STEP as the coefficient. */ - unsigned HOST_WIDE_INT niter_len1 = (seg_len1 + abs_step - 1) / abs_step; - unsigned HOST_WIDE_INT niter_len2 = (seg_len2 + abs_step - 1) / abs_step; + poly_uint64 niter_len1, niter_len2; + if (!can_div_trunc_p (seg_len1 + abs_step - 1, abs_step, &niter_len1) + || !can_div_trunc_p (seg_len2 + abs_step - 1, abs_step, &niter_len2)) + return false; + + poly_uint64 niter_access1 = 0, niter_access2 = 0; + if (neg_step) + { + /* Divide each access size by the byte step, rounding up. */ + if (!can_div_trunc_p (dr_a.access_size - abs_step - 1, + abs_step, &niter_access1) + || !can_div_trunc_p (dr_b.access_size + abs_step - 1, + abs_step, &niter_access2)) + return false; + } unsigned int i; for (i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++) @@ -1732,12 +1690,22 @@ create_intersect_range_checks_index (struct loop *loop, tree *cond_expr, /* Adjust ranges for negative step. */ if (neg_step) { - min1 = fold_build2 (MINUS_EXPR, TREE_TYPE (min1), max1, idx_step); - max1 = fold_build2 (MINUS_EXPR, TREE_TYPE (min1), - CHREC_LEFT (access1), idx_step); - min2 = fold_build2 (MINUS_EXPR, TREE_TYPE (min2), max2, idx_step); - max2 = fold_build2 (MINUS_EXPR, TREE_TYPE (min2), - CHREC_LEFT (access2), idx_step); + /* IDX_LEN1 and IDX_LEN2 are negative in this case. */ + std::swap (min1, max1); + std::swap (min2, max2); + + /* As with the lengths just calculated, we've measured the access + sizes in iterations, so multiply them by the index step. */ + tree idx_access1 + = fold_build2 (MULT_EXPR, TREE_TYPE (min1), idx_step, + build_int_cst (TREE_TYPE (min1), niter_access1)); + tree idx_access2 + = fold_build2 (MULT_EXPR, TREE_TYPE (min2), idx_step, + build_int_cst (TREE_TYPE (min2), niter_access2)); + + /* MINUS_EXPR because the above values are negative. */ + max1 = fold_build2 (MINUS_EXPR, TREE_TYPE (max1), max1, idx_access1); + max2 = fold_build2 (MINUS_EXPR, TREE_TYPE (max2), max2, idx_access2); } tree part_cond_expr = fold_build2 (TRUTH_OR_EXPR, boolean_type_node, @@ -1752,6 +1720,89 @@ create_intersect_range_checks_index (struct loop *loop, tree *cond_expr, return true; } +/* If ALIGN is nonzero, set up *SEQ_MIN_OUT and *SEQ_MAX_OUT so that for + every address ADDR accessed by D: + + *SEQ_MIN_OUT <= ADDR (== ADDR & -ALIGN) <= *SEQ_MAX_OUT + + In this case, every element accessed by D is aligned to at least + ALIGN bytes. + + If ALIGN is zero then instead set *SEG_MAX_OUT so that: + + *SEQ_MIN_OUT <= ADDR < *SEQ_MAX_OUT. */ + +static void +get_segment_min_max (const dr_with_seg_len &d, tree *seg_min_out, + tree *seg_max_out, HOST_WIDE_INT align) +{ + /* Each access has the following pattern: + + <- |seg_len| -> + <--- A: -ve step ---> + +-----+-------+-----+-------+-----+ + | n-1 | ,.... | 0 | ..... | n-1 | + +-----+-------+-----+-------+-----+ + <--- B: +ve step ---> + <- |seg_len| -> + | + base address + + where "n" is the number of scalar iterations covered by the segment. + (This should be VF for a particular pair if we know that both steps + are the same, otherwise it will be the full number of scalar loop + iterations.) + + A is the range of bytes accessed when the step is negative, + B is the range when the step is positive. + + If the access size is "access_size" bytes, the lowest addressed byte is: + + base + (step < 0 ? seg_len : 0) [LB] + + and the highest addressed byte is always below: + + base + (step < 0 ? 0 : seg_len) + access_size [UB] + + Thus: + + LB <= ADDR < UB + + If ALIGN is nonzero, all three values are aligned to at least ALIGN + bytes, so: + + LB <= ADDR <= UB - ALIGN + + where "- ALIGN" folds naturally with the "+ access_size" and often + cancels it out. + + We don't try to simplify LB and UB beyond this (e.g. by using + MIN and MAX based on whether seg_len rather than the stride is + negative) because it is possible for the absolute size of the + segment to overflow the range of a ssize_t. + + Keeping the pointer_plus outside of the cond_expr should allow + the cond_exprs to be shared with other alias checks. */ + tree indicator = dr_direction_indicator (d.dr); + tree neg_step = fold_build2 (LT_EXPR, boolean_type_node, + fold_convert (ssizetype, indicator), + ssize_int (0)); + tree addr_base = fold_build_pointer_plus (DR_BASE_ADDRESS (d.dr), + DR_OFFSET (d.dr)); + addr_base = fold_build_pointer_plus (addr_base, DR_INIT (d.dr)); + tree seg_len = fold_convert (sizetype, d.seg_len); + + tree min_reach = fold_build3 (COND_EXPR, sizetype, neg_step, + seg_len, size_zero_node); + tree max_reach = fold_build3 (COND_EXPR, sizetype, neg_step, + size_zero_node, seg_len); + max_reach = fold_build2 (PLUS_EXPR, sizetype, max_reach, + size_int (d.access_size - align)); + + *seg_min_out = fold_build_pointer_plus (addr_base, min_reach); + *seg_max_out = fold_build_pointer_plus (addr_base, max_reach); +} + /* Given two data references and segment lengths described by DR_A and DR_B, create expression checking if the two addresses ranges intersect with each other: @@ -1768,43 +1819,48 @@ create_intersect_range_checks (struct loop *loop, tree *cond_expr, if (create_intersect_range_checks_index (loop, cond_expr, dr_a, dr_b)) return; - tree segment_length_a = dr_a.seg_len; - tree segment_length_b = dr_b.seg_len; - tree addr_base_a = DR_BASE_ADDRESS (dr_a.dr); - tree addr_base_b = DR_BASE_ADDRESS (dr_b.dr); - tree offset_a = DR_OFFSET (dr_a.dr), offset_b = DR_OFFSET (dr_b.dr); - - offset_a = fold_build2 (PLUS_EXPR, TREE_TYPE (offset_a), - offset_a, DR_INIT (dr_a.dr)); - offset_b = fold_build2 (PLUS_EXPR, TREE_TYPE (offset_b), - offset_b, DR_INIT (dr_b.dr)); - addr_base_a = fold_build_pointer_plus (addr_base_a, offset_a); - addr_base_b = fold_build_pointer_plus (addr_base_b, offset_b); - - tree seg_a_min = addr_base_a; - tree seg_a_max = fold_build_pointer_plus (addr_base_a, segment_length_a); - /* For negative step, we need to adjust address range by TYPE_SIZE_UNIT - bytes, e.g., int a[3] -> a[1] range is [a+4, a+16) instead of - [a, a+12) */ - if (tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0) + unsigned HOST_WIDE_INT min_align; + tree_code cmp_code; + if (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST + && TREE_CODE (DR_STEP (dr_b.dr)) == INTEGER_CST) { - tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a.dr))); - seg_a_min = fold_build_pointer_plus (seg_a_max, unit_size); - seg_a_max = fold_build_pointer_plus (addr_base_a, unit_size); + /* In this case adding access_size to seg_len is likely to give + a simple X * step, where X is either the number of scalar + iterations or the vectorization factor. We're better off + keeping that, rather than subtracting an alignment from it. + + In this case the maximum values are exclusive and so there is + no alias if the maximum of one segment equals the minimum + of another. */ + min_align = 0; + cmp_code = LE_EXPR; } - - tree seg_b_min = addr_base_b; - tree seg_b_max = fold_build_pointer_plus (addr_base_b, segment_length_b); - if (tree_int_cst_compare (DR_STEP (dr_b.dr), size_zero_node) < 0) + else { - tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_b.dr))); - seg_b_min = fold_build_pointer_plus (seg_b_max, unit_size); - seg_b_max = fold_build_pointer_plus (addr_base_b, unit_size); + /* Calculate the minimum alignment shared by all four pointers, + then arrange for this alignment to be subtracted from the + exclusive maximum values to get inclusive maximum values. + This "- min_align" is cumulative with a "+ access_size" + in the calculation of the maximum values. In the best + (and common) case, the two cancel each other out, leaving + us with an inclusive bound based only on seg_len. In the + worst case we're simply adding a smaller number than before. + + Because the maximum values are inclusive, there is an alias + if the maximum value of one segment is equal to the minimum + value of the other. */ + min_align = MIN (dr_a.align, dr_b.align); + cmp_code = LT_EXPR; } + + tree seg_a_min, seg_a_max, seg_b_min, seg_b_max; + get_segment_min_max (dr_a, &seg_a_min, &seg_a_max, min_align); + get_segment_min_max (dr_b, &seg_b_min, &seg_b_max, min_align); + *cond_expr = fold_build2 (TRUTH_OR_EXPR, boolean_type_node, - fold_build2 (LE_EXPR, boolean_type_node, seg_a_max, seg_b_min), - fold_build2 (LE_EXPR, boolean_type_node, seg_b_max, seg_a_min)); + fold_build2 (cmp_code, boolean_type_node, seg_a_max, seg_b_min), + fold_build2 (cmp_code, boolean_type_node, seg_b_max, seg_a_min)); } /* Create a conditional expression that represents the run-time checks for @@ -5271,3 +5327,90 @@ free_data_refs (vec<data_reference_p> datarefs) free_data_ref (dr); datarefs.release (); } + +/* Common routine implementing both dr_direction_indicator and + dr_zero_step_indicator. Return USEFUL_MIN if the indicator is known + to be >= USEFUL_MIN and -1 if the indicator is known to be negative. + Return the step as the indicator otherwise. */ + +static tree +dr_step_indicator (struct data_reference *dr, int useful_min) +{ + tree step = DR_STEP (dr); + STRIP_NOPS (step); + /* Look for cases where the step is scaled by a positive constant + integer, which will often be the access size. If the multiplication + doesn't change the sign (due to overflow effects) then we can + test the unscaled value instead. */ + if (TREE_CODE (step) == MULT_EXPR + && TREE_CODE (TREE_OPERAND (step, 1)) == INTEGER_CST + && tree_int_cst_sgn (TREE_OPERAND (step, 1)) > 0) + { + tree factor = TREE_OPERAND (step, 1); + step = TREE_OPERAND (step, 0); + + /* Strip widening and truncating conversions as well as nops. */ + if (CONVERT_EXPR_P (step) + && INTEGRAL_TYPE_P (TREE_TYPE (TREE_OPERAND (step, 0)))) + step = TREE_OPERAND (step, 0); + tree type = TREE_TYPE (step); + + /* Get the range of step values that would not cause overflow. */ + widest_int minv = (wi::to_widest (TYPE_MIN_VALUE (ssizetype)) + / wi::to_widest (factor)); + widest_int maxv = (wi::to_widest (TYPE_MAX_VALUE (ssizetype)) + / wi::to_widest (factor)); + + /* Get the range of values that the unconverted step actually has. */ + wide_int step_min, step_max; + if (TREE_CODE (step) != SSA_NAME + || get_range_info (step, &step_min, &step_max) != VR_RANGE) + { + step_min = wi::to_wide (TYPE_MIN_VALUE (type)); + step_max = wi::to_wide (TYPE_MAX_VALUE (type)); + } + + /* Check whether the unconverted step has an acceptable range. */ + signop sgn = TYPE_SIGN (type); + if (wi::les_p (minv, widest_int::from (step_min, sgn)) + && wi::ges_p (maxv, widest_int::from (step_max, sgn))) + { + if (wi::ge_p (step_min, useful_min, sgn)) + return ssize_int (useful_min); + else if (wi::lt_p (step_max, 0, sgn)) + return ssize_int (-1); + else + return fold_convert (ssizetype, step); + } + } + return DR_STEP (dr); +} + +/* Return a value that is negative iff DR has a negative step. */ + +tree +dr_direction_indicator (struct data_reference *dr) +{ + return dr_step_indicator (dr, 0); +} + +/* Return a value that is zero iff DR has a zero step. */ + +tree +dr_zero_step_indicator (struct data_reference *dr) +{ + return dr_step_indicator (dr, 1); +} + +/* Return true if DR is known to have a nonnegative (but possibly zero) + step. */ + +bool +dr_known_forward_stride_p (struct data_reference *dr) +{ + tree indicator = dr_direction_indicator (dr); + tree neg_step_val = fold_binary (LT_EXPR, boolean_type_node, + fold_convert (ssizetype, indicator), + ssize_int (0)); + return neg_step_val && integer_zerop (neg_step_val); +} diff --git a/gcc/tree-data-ref.h b/gcc/tree-data-ref.h index 6df38a5..63094a7 100644 --- a/gcc/tree-data-ref.h +++ b/gcc/tree-data-ref.h @@ -203,11 +203,20 @@ typedef struct data_reference *data_reference_p; struct dr_with_seg_len { - dr_with_seg_len (data_reference_p d, tree len) - : dr (d), seg_len (len) {} + dr_with_seg_len (data_reference_p d, tree len, unsigned HOST_WIDE_INT size, + unsigned int a) + : dr (d), seg_len (len), access_size (size), align (a) {} data_reference_p dr; + /* The offset of the last access that needs to be checked minus + the offset of the first. */ tree seg_len; + /* A value that, when added to abs (SEG_LEN), gives the total number of + bytes in the segment. */ + poly_uint64 access_size; + /* The minimum common alignment of DR's start address, SEG_LEN and + ACCESS_SIZE. */ + unsigned int align; }; /* This struct contains two dr_with_seg_len objects with aliasing data @@ -475,6 +484,10 @@ extern void prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *, poly_uint64); extern void create_runtime_alias_checks (struct loop *, vec<dr_with_seg_len_pair_t> *, tree*); +extern tree dr_direction_indicator (struct data_reference *); +extern tree dr_zero_step_indicator (struct data_reference *); +extern bool dr_known_forward_stride_p (struct data_reference *); + /* Return true when the base objects of data references A and B are the same memory object. */ diff --git a/gcc/tree-loop-distribution.c b/gcc/tree-loop-distribution.c index eca8dd2..a3d76e4 100644 --- a/gcc/tree-loop-distribution.c +++ b/gcc/tree-loop-distribution.c @@ -2330,16 +2330,12 @@ break_alias_scc_partitions (struct graph *rdg, static tree data_ref_segment_size (struct data_reference *dr, tree niters) { - tree segment_length; - - if (integer_zerop (DR_STEP (dr))) - segment_length = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))); - else - segment_length = size_binop (MULT_EXPR, - fold_convert (sizetype, DR_STEP (dr)), - fold_convert (sizetype, niters)); - - return segment_length; + niters = size_binop (MINUS_EXPR, + fold_convert (sizetype, niters), + size_one_node); + return size_binop (MULT_EXPR, + fold_convert (sizetype, DR_STEP (dr)), + fold_convert (sizetype, niters)); } /* Return true if LOOP's latch is dominated by statement for data reference @@ -2394,9 +2390,16 @@ compute_alias_check_pairs (struct loop *loop, vec<ddr_p> *alias_ddrs, else seg_length_b = data_ref_segment_size (dr_b, niters); + unsigned HOST_WIDE_INT access_size_a + = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a)))); + unsigned HOST_WIDE_INT access_size_b + = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_b)))); + unsigned int align_a = TYPE_ALIGN_UNIT (TREE_TYPE (DR_REF (dr_a))); + unsigned int align_b = TYPE_ALIGN_UNIT (TREE_TYPE (DR_REF (dr_b))); + dr_with_seg_len_pair_t dr_with_seg_len_pair - (dr_with_seg_len (dr_a, seg_length_a), - dr_with_seg_len (dr_b, seg_length_b)); + (dr_with_seg_len (dr_a, seg_length_a, access_size_a, align_a), + dr_with_seg_len (dr_b, seg_length_b, access_size_b, align_b)); /* Canonicalize pairs by sorting the two DR members. */ if (comp_res > 0) diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c index c6bfe45..684b7c5 100644 --- a/gcc/tree-vect-data-refs.c +++ b/gcc/tree-vect-data-refs.c @@ -169,6 +169,50 @@ vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo) return true; } +/* Record that loop LOOP_VINFO needs to check that VALUE is nonzero. */ + +static void +vect_check_nonzero_value (loop_vec_info loop_vinfo, tree value) +{ + vec<tree> checks = LOOP_VINFO_CHECK_NONZERO (loop_vinfo); + for (unsigned int i = 0; i < checks.length(); ++i) + if (checks[i] == value) + return; + + if (dump_enabled_p ()) + { + dump_printf_loc (MSG_NOTE, vect_location, "need run-time check that "); + dump_generic_expr (MSG_NOTE, TDF_SLIM, value); + dump_printf (MSG_NOTE, " is nonzero\n"); + } + LOOP_VINFO_CHECK_NONZERO (loop_vinfo).safe_push (value); +} + +/* Return true if we know that the order of vectorized STMT_A and + vectorized STMT_B will be the same as the order of STMT_A and STMT_B. + At least one of the statements is a write. */ + +static bool +vect_preserves_scalar_order_p (gimple *stmt_a, gimple *stmt_b) +{ + stmt_vec_info stmtinfo_a = vinfo_for_stmt (stmt_a); + stmt_vec_info stmtinfo_b = vinfo_for_stmt (stmt_b); + + /* Single statements are always kept in their original order. */ + if (!STMT_VINFO_GROUPED_ACCESS (stmtinfo_a) + && !STMT_VINFO_GROUPED_ACCESS (stmtinfo_b)) + return true; + + /* STMT_A and STMT_B belong to overlapping groups. All loads in a + group are emitted at the position of the first scalar load and all + stores in a group are emitted at the position of the last scalar store. + Thus writes will happen no earlier than their current position + (but could happen later) while reads will happen no later than their + current position (but could happen earlier). Reordering is therefore + only possible if the first access is a write. */ + gimple *earlier_stmt = get_earlier_stmt (stmt_a, stmt_b); + return !DR_IS_WRITE (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt))); +} /* A subroutine of vect_analyze_data_ref_dependence. Handle DDR_COULD_BE_INDEPENDENT_P ddr DDR that has a known set of dependence @@ -414,22 +458,27 @@ vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr, ... = a[i]; a[i+1] = ...; where loads from the group interleave with the store. */ - if (STMT_VINFO_GROUPED_ACCESS (stmtinfo_a) - || STMT_VINFO_GROUPED_ACCESS (stmtinfo_b)) + if (!vect_preserves_scalar_order_p (DR_STMT (dra), DR_STMT (drb))) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "READ_WRITE dependence in interleaving.\n"); + return true; + } + + if (!loop->force_vectorize) { - gimple *earlier_stmt; - earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb)); - if (DR_IS_WRITE - (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt)))) + tree indicator = dr_zero_step_indicator (dra); + if (TREE_CODE (indicator) != INTEGER_CST) + vect_check_nonzero_value (loop_vinfo, indicator); + else if (integer_zerop (indicator)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "READ_WRITE dependence in interleaving." - "\n"); + "access also has a zero step\n"); return true; } } - continue; } @@ -3030,38 +3079,57 @@ vect_analyze_data_ref_accesses (vec_info *vinfo) /* Function vect_vfa_segment_size. - Create an expression that computes the size of segment - that will be accessed for a data reference. The functions takes into - account that realignment loads may access one more vector. - Input: DR: The data reference. LENGTH_FACTOR: segment length to consider. - Return an expression whose value is the size of segment which will be - accessed by DR. */ + Return a value suitable for the dr_with_seg_len::seg_len field. + This is the "distance travelled" by the pointer from the first + iteration in the segment to the last. Note that it does not include + the size of the access; in effect it only describes the first byte. */ static tree vect_vfa_segment_size (struct data_reference *dr, tree length_factor) { - tree segment_length; + length_factor = size_binop (MINUS_EXPR, + fold_convert (sizetype, length_factor), + size_one_node); + return size_binop (MULT_EXPR, fold_convert (sizetype, DR_STEP (dr)), + length_factor); +} - if (integer_zerop (DR_STEP (dr))) - segment_length = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))); - else - segment_length = size_binop (MULT_EXPR, - fold_convert (sizetype, DR_STEP (dr)), - fold_convert (sizetype, length_factor)); +/* Return a value that, when added to abs (vect_vfa_segment_size (dr)), + gives the worst-case number of bytes covered by the segment. */ - if (vect_supportable_dr_alignment (dr, false) - == dr_explicit_realign_optimized) +static unsigned HOST_WIDE_INT +vect_vfa_access_size (data_reference *dr) +{ + stmt_vec_info stmt_vinfo = vinfo_for_stmt (DR_STMT (dr)); + tree ref_type = TREE_TYPE (DR_REF (dr)); + unsigned HOST_WIDE_INT ref_size = tree_to_uhwi (TYPE_SIZE_UNIT (ref_type)); + unsigned HOST_WIDE_INT access_size = ref_size; + if (GROUP_FIRST_ELEMENT (stmt_vinfo)) { - tree vector_size = TYPE_SIZE_UNIT - (STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr)))); - - segment_length = size_binop (PLUS_EXPR, segment_length, vector_size); + gcc_assert (GROUP_FIRST_ELEMENT (stmt_vinfo) == DR_STMT (dr)); + access_size *= GROUP_SIZE (stmt_vinfo) - GROUP_GAP (stmt_vinfo); + } + if (STMT_VINFO_VEC_STMT (stmt_vinfo) + && (vect_supportable_dr_alignment (dr, false) + == dr_explicit_realign_optimized)) + { + /* We might access a full vector's worth. */ + tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo); + access_size += tree_to_uhwi (TYPE_SIZE_UNIT (vectype)) - ref_size; } - return segment_length; + return access_size; +} + +/* Get the minimum alignment for all the scalar accesses that DR describes. */ + +static unsigned int +vect_vfa_align (const data_reference *dr) +{ + return TYPE_ALIGN_UNIT (TREE_TYPE (DR_REF (dr))); } /* Function vect_no_alias_p. @@ -3069,13 +3137,15 @@ vect_vfa_segment_size (struct data_reference *dr, tree length_factor) Given data references A and B with equal base and offset, see whether the alias relation can be decided at compilation time. Return 1 if it can and the references alias, 0 if it can and the references do - not alias, and -1 if we cannot decide at compile time. SEGMENT_LENGTH_A - and SEGMENT_LENGTH_B are the memory lengths accessed by A and B - respectively. */ + not alias, and -1 if we cannot decide at compile time. SEGMENT_LENGTH_A, + SEGMENT_LENGTH_B, ACCESS_SIZE_A and ACCESS_SIZE_B are the equivalent + of dr_with_seg_len::{seg_len,access_size} for A and B. */ static int vect_compile_time_alias (struct data_reference *a, struct data_reference *b, - tree segment_length_a, tree segment_length_b) + tree segment_length_a, tree segment_length_b, + unsigned HOST_WIDE_INT access_size_a, + unsigned HOST_WIDE_INT access_size_b) { poly_offset_int offset_a = wi::to_poly_offset (DR_INIT (a)); poly_offset_int offset_b = wi::to_poly_offset (DR_INIT (b)); @@ -3088,18 +3158,21 @@ vect_compile_time_alias (struct data_reference *a, struct data_reference *b, if (tree_int_cst_compare (DR_STEP (a), size_zero_node) < 0) { const_length_a = (-wi::to_poly_wide (segment_length_a)).force_uhwi (); - offset_a = (offset_a + vect_get_scalar_dr_size (a)) - const_length_a; + offset_a = (offset_a + access_size_a) - const_length_a; } else const_length_a = tree_to_poly_uint64 (segment_length_a); if (tree_int_cst_compare (DR_STEP (b), size_zero_node) < 0) { const_length_b = (-wi::to_poly_wide (segment_length_b)).force_uhwi (); - offset_b = (offset_b + vect_get_scalar_dr_size (b)) - const_length_b; + offset_b = (offset_b + access_size_b) - const_length_b; } else const_length_b = tree_to_poly_uint64 (segment_length_b); + const_length_a += access_size_a; + const_length_b += access_size_b; + if (ranges_known_overlap_p (offset_a, const_length_a, offset_b, const_length_b)) return 1; @@ -3149,6 +3222,108 @@ dependence_distance_ge_vf (data_dependence_relation *ddr, return true; } +/* Dump LOWER_BOUND using flags DUMP_KIND. Dumps are known to be enabled. */ + +static void +dump_lower_bound (int dump_kind, const vec_lower_bound &lower_bound) +{ + dump_printf (dump_kind, "%s (", lower_bound.unsigned_p ? "unsigned" : "abs"); + dump_generic_expr (dump_kind, TDF_SLIM, lower_bound.expr); + dump_printf (dump_kind, ") >= "); + dump_dec (dump_kind, lower_bound.min_value); +} + +/* Record that the vectorized loop requires the vec_lower_bound described + by EXPR, UNSIGNED_P and MIN_VALUE. */ + +static void +vect_check_lower_bound (loop_vec_info loop_vinfo, tree expr, bool unsigned_p, + poly_uint64 min_value) +{ + vec<vec_lower_bound> lower_bounds = LOOP_VINFO_LOWER_BOUNDS (loop_vinfo); + for (unsigned int i = 0; i < lower_bounds.length (); ++i) + if (operand_equal_p (lower_bounds[i].expr, expr, 0)) + { + unsigned_p &= lower_bounds[i].unsigned_p; + min_value = upper_bound (lower_bounds[i].min_value, min_value); + if (lower_bounds[i].unsigned_p != unsigned_p + || maybe_lt (lower_bounds[i].min_value, min_value)) + { + lower_bounds[i].unsigned_p = unsigned_p; + lower_bounds[i].min_value = min_value; + if (dump_enabled_p ()) + { + dump_printf_loc (MSG_NOTE, vect_location, + "updating run-time check to "); + dump_lower_bound (MSG_NOTE, lower_bounds[i]); + dump_printf (MSG_NOTE, "\n"); + } + } + return; + } + + vec_lower_bound lower_bound (expr, unsigned_p, min_value); + if (dump_enabled_p ()) + { + dump_printf_loc (MSG_NOTE, vect_location, "need a run-time check that "); + dump_lower_bound (MSG_NOTE, lower_bound); + dump_printf (MSG_NOTE, "\n"); + } + LOOP_VINFO_LOWER_BOUNDS (loop_vinfo).safe_push (lower_bound); +} + +/* Return true if it's unlikely that the step of the vectorized form of DR + will span fewer than GAP bytes. */ + +static bool +vect_small_gap_p (loop_vec_info loop_vinfo, data_reference *dr, poly_int64 gap) +{ + stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr)); + HOST_WIDE_INT count + = estimated_poly_value (LOOP_VINFO_VECT_FACTOR (loop_vinfo)); + if (GROUP_FIRST_ELEMENT (stmt_info)) + count *= GROUP_SIZE (vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info))); + return estimated_poly_value (gap) <= count * vect_get_scalar_dr_size (dr); +} + +/* Return true if we know that there is no alias between DR_A and DR_B + when abs (DR_STEP (DR_A)) >= N for some N. When returning true, set + *LOWER_BOUND_OUT to this N. */ + +static bool +vectorizable_with_step_bound_p (data_reference *dr_a, data_reference *dr_b, + poly_uint64 *lower_bound_out) +{ + /* Check that there is a constant gap of known sign between DR_A + and DR_B. */ + poly_int64 init_a, init_b; + if (!operand_equal_p (DR_BASE_ADDRESS (dr_a), DR_BASE_ADDRESS (dr_b), 0) + || !operand_equal_p (DR_OFFSET (dr_a), DR_OFFSET (dr_b), 0) + || !operand_equal_p (DR_STEP (dr_a), DR_STEP (dr_b), 0) + || !poly_int_tree_p (DR_INIT (dr_a), &init_a) + || !poly_int_tree_p (DR_INIT (dr_b), &init_b) + || !ordered_p (init_a, init_b)) + return false; + + /* Sort DR_A and DR_B by the address they access. */ + if (maybe_lt (init_b, init_a)) + { + std::swap (init_a, init_b); + std::swap (dr_a, dr_b); + } + + /* If the two accesses could be dependent within a scalar iteration, + make sure that we'd retain their order. */ + if (maybe_gt (init_a + vect_get_scalar_dr_size (dr_a), init_b) + && !vect_preserves_scalar_order_p (DR_STMT (dr_a), DR_STMT (dr_b))) + return false; + + /* There is no alias if abs (DR_STEP) is greater than or equal to + the bytes spanned by the combination of the two accesses. */ + *lower_bound_out = init_b + vect_get_scalar_dr_size (dr_b) - init_a; + return true; +} + /* Function vect_prune_runtime_alias_test_list. Prune a list of ddrs to be tested at run-time by versioning for alias. @@ -3178,6 +3353,19 @@ vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo) dump_printf_loc (MSG_NOTE, vect_location, "=== vect_prune_runtime_alias_test_list ===\n"); + /* Step values are irrelevant for aliasing if the number of vector + iterations is equal to the number of scalar iterations (which can + happen for fully-SLP loops). */ + bool ignore_step_p = known_eq (LOOP_VINFO_VECT_FACTOR (loop_vinfo), 1U); + + if (!ignore_step_p) + { + /* Convert the checks for nonzero steps into bound tests. */ + tree value; + FOR_EACH_VEC_ELT (LOOP_VINFO_CHECK_NONZERO (loop_vinfo), i, value) + vect_check_lower_bound (loop_vinfo, value, true, 1); + } + if (may_alias_ddrs.is_empty ()) return true; @@ -3191,9 +3379,12 @@ vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo) FOR_EACH_VEC_ELT (may_alias_ddrs, i, ddr) { int comp_res; + poly_uint64 lower_bound; struct data_reference *dr_a, *dr_b; gimple *dr_group_first_a, *dr_group_first_b; tree segment_length_a, segment_length_b; + unsigned HOST_WIDE_INT access_size_a, access_size_b; + unsigned int align_a, align_b; gimple *stmt_a, *stmt_b; /* Ignore the alias if the VF we chose ended up being no greater @@ -3221,6 +3412,64 @@ vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo) dr_a = DDR_A (ddr); stmt_a = DR_STMT (DDR_A (ddr)); + + dr_b = DDR_B (ddr); + stmt_b = DR_STMT (DDR_B (ddr)); + + /* Skip the pair if inter-iteration dependencies are irrelevant + and intra-iteration dependencies are guaranteed to be honored. */ + if (ignore_step_p + && (vect_preserves_scalar_order_p (stmt_a, stmt_b) + || vectorizable_with_step_bound_p (dr_a, dr_b, &lower_bound))) + { + if (dump_enabled_p ()) + { + dump_printf_loc (MSG_NOTE, vect_location, + "no need for alias check between "); + dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a)); + dump_printf (MSG_NOTE, " and "); + dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b)); + dump_printf (MSG_NOTE, " when VF is 1\n"); + } + continue; + } + + /* See whether we can handle the alias using a bounds check on + the step, and whether that's likely to be the best approach. + (It might not be, for example, if the minimum step is much larger + than the number of bytes handled by one vector iteration.) */ + if (!ignore_step_p + && TREE_CODE (DR_STEP (dr_a)) != INTEGER_CST + && vectorizable_with_step_bound_p (dr_a, dr_b, &lower_bound) + && (vect_small_gap_p (loop_vinfo, dr_a, lower_bound) + || vect_small_gap_p (loop_vinfo, dr_b, lower_bound))) + { + bool unsigned_p = dr_known_forward_stride_p (dr_a); + if (dump_enabled_p ()) + { + dump_printf_loc (MSG_NOTE, vect_location, "no alias between "); + dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a)); + dump_printf (MSG_NOTE, " and "); + dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b)); + dump_printf (MSG_NOTE, " when the step "); + dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_STEP (dr_a)); + dump_printf (MSG_NOTE, " is outside "); + if (unsigned_p) + dump_printf (MSG_NOTE, "[0"); + else + { + dump_printf (MSG_NOTE, "("); + dump_dec (MSG_NOTE, poly_int64 (-lower_bound)); + } + dump_printf (MSG_NOTE, ", "); + dump_dec (MSG_NOTE, lower_bound); + dump_printf (MSG_NOTE, ")\n"); + } + vect_check_lower_bound (loop_vinfo, DR_STEP (dr_a), unsigned_p, + lower_bound); + continue; + } + dr_group_first_a = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_a)); if (dr_group_first_a) { @@ -3228,8 +3477,6 @@ vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo) dr_a = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_a)); } - dr_b = DDR_B (ddr); - stmt_b = DR_STMT (DDR_B (ddr)); dr_group_first_b = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_b)); if (dr_group_first_b) { @@ -3237,12 +3484,24 @@ vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo) dr_b = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_b)); } - if (!operand_equal_p (DR_STEP (dr_a), DR_STEP (dr_b), 0)) - length_factor = scalar_loop_iters; + if (ignore_step_p) + { + segment_length_a = size_zero_node; + segment_length_b = size_zero_node; + } else - length_factor = size_int (vect_factor); - segment_length_a = vect_vfa_segment_size (dr_a, length_factor); - segment_length_b = vect_vfa_segment_size (dr_b, length_factor); + { + if (!operand_equal_p (DR_STEP (dr_a), DR_STEP (dr_b), 0)) + length_factor = scalar_loop_iters; + else + length_factor = size_int (vect_factor); + segment_length_a = vect_vfa_segment_size (dr_a, length_factor); + segment_length_b = vect_vfa_segment_size (dr_b, length_factor); + } + access_size_a = vect_vfa_access_size (dr_a); + access_size_b = vect_vfa_access_size (dr_b); + align_a = vect_vfa_align (dr_a); + align_b = vect_vfa_align (dr_b); comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (dr_a), DR_BASE_ADDRESS (dr_b)); @@ -3259,7 +3518,22 @@ vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo) { int res = vect_compile_time_alias (dr_a, dr_b, segment_length_a, - segment_length_b); + segment_length_b, + access_size_a, + access_size_b); + if (res >= 0 && dump_enabled_p ()) + { + dump_printf_loc (MSG_NOTE, vect_location, + "can tell at compile time that "); + dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a)); + dump_printf (MSG_NOTE, " and "); + dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b)); + if (res == 0) + dump_printf (MSG_NOTE, " do not alias\n"); + else + dump_printf (MSG_NOTE, " alias\n"); + } + if (res == 0) continue; @@ -3273,8 +3547,8 @@ vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo) } dr_with_seg_len_pair_t dr_with_seg_len_pair - (dr_with_seg_len (dr_a, segment_length_a), - dr_with_seg_len (dr_b, segment_length_b)); + (dr_with_seg_len (dr_a, segment_length_a, access_size_a, align_a), + dr_with_seg_len (dr_b, segment_length_b, access_size_b, align_b)); /* Canonicalize pairs by sorting the two DR members. */ if (comp_res > 0) @@ -3287,6 +3561,7 @@ vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo) unsigned int count = (comp_alias_ddrs.length () + check_unequal_addrs.length ()); + dump_printf_loc (MSG_NOTE, vect_location, "improved number of alias checks from %d to %d\n", may_alias_ddrs.length (), count); diff --git a/gcc/tree-vect-loop-manip.c b/gcc/tree-vect-loop-manip.c index a2b4989..53684e5 100644 --- a/gcc/tree-vect-loop-manip.c +++ b/gcc/tree-vect-loop-manip.c @@ -2875,6 +2875,31 @@ vect_create_cond_for_unequal_addrs (loop_vec_info loop_vinfo, tree *cond_expr) } } +/* Create an expression that is true when all lower-bound conditions for + the vectorized loop are met. Chain this condition with *COND_EXPR. */ + +static void +vect_create_cond_for_lower_bounds (loop_vec_info loop_vinfo, tree *cond_expr) +{ + vec<vec_lower_bound> lower_bounds = LOOP_VINFO_LOWER_BOUNDS (loop_vinfo); + for (unsigned int i = 0; i < lower_bounds.length (); ++i) + { + tree expr = lower_bounds[i].expr; + tree type = unsigned_type_for (TREE_TYPE (expr)); + expr = fold_convert (type, expr); + poly_uint64 bound = lower_bounds[i].min_value; + if (!lower_bounds[i].unsigned_p) + { + expr = fold_build2 (PLUS_EXPR, type, expr, + build_int_cstu (type, bound - 1)); + bound += bound - 1; + } + tree part_cond_expr = fold_build2 (GE_EXPR, boolean_type_node, expr, + build_int_cstu (type, bound)); + chain_cond_expr (cond_expr, part_cond_expr); + } +} + /* Function vect_create_cond_for_alias_checks. Create a conditional expression that represents the run-time checks for @@ -2986,6 +3011,7 @@ vect_loop_versioning (loop_vec_info loop_vinfo, if (version_alias) { vect_create_cond_for_unequal_addrs (loop_vinfo, &cond_expr); + vect_create_cond_for_lower_bounds (loop_vinfo, &cond_expr); vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr); } diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index 7fc2215..64b9ce3 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -2475,6 +2475,7 @@ again: } } /* Free optimized alias test DDRS. */ + LOOP_VINFO_LOWER_BOUNDS (loop_vinfo).truncate (0); LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo).release (); LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo).release (); /* Reset target cost data. */ @@ -3673,6 +3674,18 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo, /* Count LEN - 1 ANDs and LEN comparisons. */ (void) add_stmt_cost (target_cost_data, len * 2 - 1, scalar_stmt, NULL, 0, vect_prologue); + len = LOOP_VINFO_LOWER_BOUNDS (loop_vinfo).length (); + if (len) + { + /* Count LEN - 1 ANDs and LEN comparisons. */ + unsigned int nstmts = len * 2 - 1; + /* +1 for each bias that needs adding. */ + for (unsigned int i = 0; i < len; ++i) + if (!LOOP_VINFO_LOWER_BOUNDS (loop_vinfo)[i].unsigned_p) + nstmts += 1; + (void) add_stmt_cost (target_cost_data, nstmts, scalar_stmt, + NULL, 0, vect_prologue); + } dump_printf (MSG_NOTE, "cost model: Adding cost of checks for loop " "versioning aliasing.\n"); diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index d3dda52..56e875f 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -174,6 +174,18 @@ typedef struct _slp_instance { loop to be valid. */ typedef std::pair<tree, tree> vec_object_pair; +/* Records that vectorization is only possible if abs (EXPR) >= MIN_VALUE. + UNSIGNED_P is true if we can assume that abs (EXPR) == EXPR. */ +struct vec_lower_bound { + vec_lower_bound () {} + vec_lower_bound (tree e, bool u, poly_uint64 m) + : expr (e), unsigned_p (u), min_value (m) {} + + tree expr; + bool unsigned_p; + poly_uint64 min_value; +}; + /* Vectorizer state common between loop and basic-block vectorization. */ struct vec_info { enum vec_kind { bb, loop }; @@ -406,6 +418,14 @@ typedef struct _loop_vec_info : public vec_info { /* Check that the addresses of each pair of objects is unequal. */ auto_vec<vec_object_pair> check_unequal_addrs; + /* List of values that are required to be nonzero. This is used to check + whether things like "x[i * n] += 1;" are safe and eventually gets added + to the checks for lower bounds below. */ + auto_vec<tree> check_nonzero; + + /* List of values that need to be checked for a minimum value. */ + auto_vec<vec_lower_bound> lower_bounds; + /* Statements in the loop that have data references that are candidates for a runtime (loop versioning) misalignment check. */ auto_vec<gimple *> may_misalign_stmts; @@ -514,6 +534,8 @@ typedef struct _loop_vec_info : public vec_info { #define LOOP_VINFO_MAY_ALIAS_DDRS(L) (L)->may_alias_ddrs #define LOOP_VINFO_COMP_ALIAS_DDRS(L) (L)->comp_alias_ddrs #define LOOP_VINFO_CHECK_UNEQUAL_ADDRS(L) (L)->check_unequal_addrs +#define LOOP_VINFO_CHECK_NONZERO(L) (L)->check_nonzero +#define LOOP_VINFO_LOWER_BOUNDS(L) (L)->lower_bounds #define LOOP_VINFO_GROUPED_STORES(L) (L)->grouped_stores #define LOOP_VINFO_SLP_INSTANCES(L) (L)->slp_instances #define LOOP_VINFO_SLP_UNROLLING_FACTOR(L) (L)->slp_unrolling_factor @@ -534,7 +556,8 @@ typedef struct _loop_vec_info : public vec_info { ((L)->may_misalign_stmts.length () > 0) #define LOOP_REQUIRES_VERSIONING_FOR_ALIAS(L) \ ((L)->comp_alias_ddrs.length () > 0 \ - || (L)->check_unequal_addrs.length () > 0) + || (L)->check_unequal_addrs.length () > 0 \ + || (L)->lower_bounds.length () > 0) #define LOOP_REQUIRES_VERSIONING_FOR_NITERS(L) \ (LOOP_VINFO_NITERS_ASSUMPTIONS (L)) #define LOOP_REQUIRES_VERSIONING(L) \ |