diff options
Diffstat (limited to 'gcc')
35 files changed, 509 insertions, 96 deletions
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index fd1ce06..d955802 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -17285,7 +17285,7 @@ Maximum number of relations the oracle will register in a basic block. Work bound when discovering transitive relations from existing relations. @item min-pagesize -Minimum page size for warning purposes. +Minimum page size for warning and early break vectorization purposes. @item openacc-kernels Specify mode of OpenACC `kernels' constructs handling. diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-pr65935.c b/gcc/testsuite/gcc.dg/vect/bb-slp-pr65935.c index 9ef1330..4c82558 100644 --- a/gcc/testsuite/gcc.dg/vect/bb-slp-pr65935.c +++ b/gcc/testsuite/gcc.dg/vect/bb-slp-pr65935.c @@ -55,7 +55,9 @@ int main() } } rephase (); +#pragma GCC novector for (i = 0; i < 32; ++i) +#pragma GCC novector for (j = 0; j < 3; ++j) #pragma GCC novector for (k = 0; k < 3; ++k) diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_121-pr114081.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_121-pr114081.c index 423ff0b..f99c57b 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-early-break_121-pr114081.c +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_121-pr114081.c @@ -5,7 +5,8 @@ /* { dg-additional-options "-O3" } */ /* { dg-additional-options "-mavx2" { target { x86_64-*-* i?86-*-* } } } */ -/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */ +/* Arm and -m32 create a group size of 3 here, which we can't support yet. AArch64 makes elementwise accesses here. */ +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { target { aarch64*-*-* } } } } */ typedef struct filter_list_entry { const char *name; diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_128.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_128.c index 6d7fb92..ed6baf2d 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-early-break_128.c +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_128.c @@ -3,7 +3,8 @@ /* { dg-require-effective-target vect_early_break } */ /* { dg-require-effective-target vect_int } */ -/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" } } */ +/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" { target vect_partial_vectors } } } */ +/* { dg-final { scan-tree-dump-not "vectorizing stmts using SLP" "vect" { target { ! vect_partial_vectors } } } } */ /* { dg-final { scan-tree-dump "Loop contains only SLP stmts" "vect" } } */ #ifndef N diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_132-pr118464.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_132-pr118464.c new file mode 100644 index 0000000..9bf0cbc --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_132-pr118464.c @@ -0,0 +1,25 @@ +/* { dg-do compile } */ +/* { dg-add-options vect_early_break } */ +/* { dg-require-effective-target vect_early_break } */ +/* { dg-require-effective-target vect_int } */ +/* { dg-additional-options "-O3" } */ + +int a, b, c, d, e, f; +short g[1]; +int main() { + int h; + while (a) { + while (h) + ; + for (b = 2; b; b--) { + while (c) + ; + f = g[a]; + if (d) + break; + } + while (e) + ; + } + return 0; +} diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_133_pfa1.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_133_pfa1.c new file mode 100644 index 0000000..dc77118 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_133_pfa1.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-add-options vect_early_break } */ +/* { dg-require-effective-target vect_early_break } */ +/* { dg-require-effective-target vect_int } */ +/* { dg-additional-options "-O3" } */ + +char string[1020]; + +char * find(int n, char c) +{ + for (int i = 1; i < n; i++) { + if (string[i] == c) + return &string[i]; + } + return 0; +} + +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */ +/* { dg-final { scan-tree-dump "Alignment of access forced using peeling" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_133_pfa10.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_133_pfa10.c new file mode 100644 index 0000000..dd05046 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_133_pfa10.c @@ -0,0 +1,25 @@ +/* { dg-add-options vect_early_break } */ +/* { dg-do compile } */ +/* { dg-require-effective-target vect_early_break } */ +/* { dg-require-effective-target vect_int } */ + +/* { dg-additional-options "-Ofast" } */ + +/* Alignment requirement too big, load lanes targets can't safely vectorize this. */ +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { target { vect_partial_vectors || vect_load_lanes } } } } */ +/* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" { target { ! { vect_partial_vectors || vect_load_lanes } } } } } */ +/* { dg-final { scan-tree-dump-not "Alignment of access forced using peeling" "vect" { target { ! { vect_partial_vectors || vect_load_lanes } } } } } */ + +unsigned test4(char x, char *restrict vect_a, char *restrict vect_b, int n) +{ + unsigned ret = 0; + for (int i = 0; i < (n - 2); i+=2) + { + if (vect_a[i] > x || vect_a[i+2] > x) + return 1; + + vect_b[i] = x; + vect_b[i+1] = x+1; + } + return ret; +} diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_133_pfa11.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_133_pfa11.c new file mode 100644 index 0000000..085dd9b --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_133_pfa11.c @@ -0,0 +1,26 @@ +/* { dg-add-options vect_early_break } */ +/* { dg-do compile } */ +/* { dg-require-effective-target vect_early_break } */ +/* { dg-require-effective-target vect_int } */ + +/* Gathers and scatters are not save to speculate across early breaks. */ +/* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" { target { ! vect_partial_vectors } } } } */ +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { target vect_partial_vectors } } } */ + +#define N 1024 +int vect_a[N]; +int vect_b[N]; + +int test4(int x, int stride) +{ + int ret = 0; + for (int i = 0; i < (N / stride); i++) + { + vect_b[i] += x + i; + if (vect_a[i*stride] == x) + return i; + vect_a[i] += x * vect_b[i]; + + } + return ret; +} diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_133_pfa2.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_133_pfa2.c new file mode 100644 index 0000000..7d56772 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_133_pfa2.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-add-options vect_early_break } */ +/* { dg-require-effective-target vect_early_break } */ +/* { dg-require-effective-target vect_int } */ +/* { dg-additional-options "-O3" } */ + +char string[1020]; + +char * find(int n, char c) +{ + for (int i = 0; i < n; i++) { + if (string[i] == c) + return &string[i]; + } + return 0; +} + +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */ +/* { dg-final { scan-tree-dump-not "Alignment of access forced using peeling" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_133_pfa3.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_133_pfa3.c new file mode 100644 index 0000000..374a051 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_133_pfa3.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-add-options vect_early_break } */ +/* { dg-require-effective-target vect_early_break } */ +/* { dg-require-effective-target vect_int } */ +/* { dg-additional-options "-O3" } */ + +char string[1020] __attribute__((aligned(1))); + +char * find(int n, char c) +{ + for (int i = 1; i < n; i++) { + if (string[i] == c) + return &string[i]; + } + return 0; +} + +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */ +/* { dg-final { scan-tree-dump "Alignment of access forced using peeling" "vect" } } */ +/* { dg-final { scan-tree-dump "force alignment of string" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_133_pfa4.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_133_pfa4.c new file mode 100644 index 0000000..297fb7e --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_133_pfa4.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-add-options vect_early_break } */ +/* { dg-require-effective-target vect_early_break } */ +/* { dg-require-effective-target vect_int } */ +/* { dg-additional-options "-O3" } */ + +char string[1020] __attribute__((aligned(1))); + +char * find(int n, char c) +{ + for (int i = 0; i < n; i++) { + if (string[i] == c) + return &string[i]; + } + return 0; +} + +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */ +/* { dg-final { scan-tree-dump-not "Alignment of access forced using peeling" "vect" } } */ +/* { dg-final { scan-tree-dump "force alignment of string" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_133_pfa5.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_133_pfa5.c new file mode 100644 index 0000000..ca95be4 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_133_pfa5.c @@ -0,0 +1,23 @@ +/* { dg-add-options vect_early_break } */ +/* { dg-do compile } */ +/* { dg-require-effective-target vect_early_break } */ +/* { dg-require-effective-target vect_int } */ + +/* { dg-additional-options "-Ofast" } */ + +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */ + +unsigned test4(char x, char *vect, int n) +{ + unsigned ret = 0; + for (int i = 0; i < n; i++) + { + if (vect[i] > x) + return 1; + + vect[i] = x; + } + return ret; +} + +/* { dg-final { scan-tree-dump "Alignment of access forced using peeling" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_133_pfa6.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_133_pfa6.c new file mode 100644 index 0000000..ee123df --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_133_pfa6.c @@ -0,0 +1,23 @@ +/* { dg-add-options vect_early_break } */ +/* { dg-do compile } */ +/* { dg-require-effective-target vect_early_break } */ +/* { dg-require-effective-target vect_int } */ + +/* { dg-additional-options "-Ofast" } */ + +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */ + +unsigned test4(char x, char *vect_a, char *vect_b, int n) +{ + unsigned ret = 0; + for (int i = 1; i < n; i++) + { + if (vect_a[i] > x || vect_b[i] > x) + return 1; + + vect_a[i] = x; + } + return ret; +} + +/* { dg-final { scan-tree-dump "Versioning for alignment will be applied" "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_133_pfa7.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_133_pfa7.c new file mode 100644 index 0000000..cf76c71 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_133_pfa7.c @@ -0,0 +1,23 @@ +/* { dg-add-options vect_early_break } */ +/* { dg-do compile } */ +/* { dg-require-effective-target vect_early_break } */ +/* { dg-require-effective-target vect_int } */ + +/* { dg-additional-options "-Ofast" } */ + +/* This should be vectorizable through load_lanes and linear targets. */ +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { target vect_load_lanes } } } */ + +unsigned test4(char x, char * restrict vect_a, char * restrict vect_b, int n) +{ + unsigned ret = 0; + for (int i = 0; i < n; i+=2) + { + if (vect_a[i] > x || vect_a[i+1] > x) + return 1; + + vect_b[i] = x; + vect_b[i+1] = x+1; + } + return ret; +} diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_133_pfa8.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_133_pfa8.c new file mode 100644 index 0000000..25d3a62 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_133_pfa8.c @@ -0,0 +1,26 @@ +/* { dg-add-options vect_early_break } */ +/* { dg-do compile } */ +/* { dg-require-effective-target vect_early_break } */ +/* { dg-require-effective-target vect_int } */ + +/* { dg-additional-options "-Ofast" } */ + +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { target vect_partial_vectors } } } */ +/* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" { target { ! vect_partial_vectors } } } } */ + +char vect_a[1025]; +char vect_b[1025]; + +unsigned test4(char x, int n) +{ + unsigned ret = 0; + for (int i = 1; i < (n - 2); i+=2) + { + if (vect_a[i] > x || vect_a[i+1] > x) + return 1; + + vect_b[i] = x; + vect_b[i+1] = x+1; + } + return ret; +} diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_133_pfa9.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_133_pfa9.c new file mode 100644 index 0000000..10eb98b --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_133_pfa9.c @@ -0,0 +1,29 @@ +/* { dg-add-options vect_early_break } */ +/* { dg-do compile } */ +/* { dg-require-effective-target vect_early_break } */ +/* { dg-require-effective-target vect_int } */ + +/* { dg-additional-options "-Ofast" } */ + +/* Group size is uneven and second group is misaligned. Needs partial vectors. */ +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { target vect_partial_vectors } } } */ +/* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" { target { ! vect_partial_vectors } } } } */ +/* { dg-final { scan-tree-dump-not "Alignment of access forced using peeling" "vect" } } */ + + +char vect_a[1025]; +char vect_b[1025]; + +unsigned test4(char x, int n) +{ + unsigned ret = 0; + for (int i = 1; i < (n - 2); i+=2) + { + if (vect_a[i-1] > x || vect_a[i+2] > x) + return 1; + + vect_b[i] = x; + vect_b[i+1] = x+1; + } + return ret; +} diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_18.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_18.c index babc79c..edddb44 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-early-break_18.c +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_18.c @@ -5,7 +5,8 @@ /* { dg-additional-options "-Ofast" } */ -/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */ +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { target vect_load_lanes } } } */ +/* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" { target { ! vect_load_lanes } } } } */ #ifndef N #define N 803 diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_2.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_2.c index dec0b49..8f5ccc4 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-early-break_2.c +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_2.c @@ -5,7 +5,9 @@ /* { dg-additional-options "-Ofast" } */ -/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { target { ! "arm*-*-*" } } } } */ +/* Complex numbers read x and x+1, which on non-load lanes targets require partial loops. */ +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { target { { ! "arm*-*-*" } && vect_load_lanes } } } } */ +/* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" { target { { "arm*-*-*" } || { ! vect_load_lanes } } } } } */ #include <complex.h> diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_20.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_20.c index 039aac7..7ac1e76 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-early-break_20.c +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_20.c @@ -5,7 +5,7 @@ /* { dg-additional-options "-Ofast" } */ -/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */ +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { target { ! ia32 } } } } */ #include <stdbool.h> diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_21.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_21.c index f73f3c2..483ea5f 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-early-break_21.c +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_21.c @@ -5,7 +5,7 @@ /* { dg-additional-options "-Ofast" } */ -/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */ +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { target { ! ia32 } } } } */ #include <stdbool.h> diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_22.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_22.c index b3f5984..f8f84fa 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-early-break_22.c +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_22.c @@ -42,4 +42,6 @@ main () return 0; } -/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 2 "vect" } } */ +/* This will fail because we cannot SLP the load groups yet. */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 2 "vect" { target vect_partial_vectors } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 1 "vect" { target { ! vect_partial_vectors } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_26.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_26.c index 47d2a50..643016b 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-early-break_26.c +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_26.c @@ -41,4 +41,6 @@ main () return 0; } -/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 2 "vect" } } */ +/* This will fail because we cannot SLP the load groups yet. */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 2 "vect" { target vect_partial_vectors } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 1 "vect" { target { ! vect_partial_vectors } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_38.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_38.c index 8062fbb..36fc6a6 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-early-break_38.c +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_38.c @@ -23,4 +23,5 @@ unsigned test4(unsigned x) return ret; } -/* { dg-final { scan-tree-dump "vectorized 1 loops in function" "vect" } } */
\ No newline at end of file +/* { dg-final { scan-tree-dump "vectorized 1 loops in function" "vect" { target vect_load_lanes } } } */ +/* { dg-final { scan-tree-dump-not "vectorized 1 loops in function" "vect" { target { ! vect_load_lanes } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_39.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_39.c index 9d3c6a5..b3f40b8 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-early-break_39.c +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_39.c @@ -23,4 +23,5 @@ unsigned test4(unsigned x, unsigned n) return ret; } -/* { dg-final { scan-tree-dump "vectorized 1 loops in function" "vect" } } */ +/* cannot safely vectorize this due due to the group misalignment. */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 0 "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_43.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_43.c index 7e9f635..0cfa242 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-early-break_43.c +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_43.c @@ -27,4 +27,6 @@ unsigned test4(unsigned x) return ret; } -/* { dg-final { scan-tree-dump "vectorized 1 loops in function" "vect" } } */
\ No newline at end of file +/* This will fail because we cannot SLP the load groups yet. */ +/* { dg-final { scan-tree-dump "vectorized 1 loops in function" "vect" { target vect_partial_vectors } } } */ +/* { dg-final { scan-tree-dump-not "vectorized 1 loops in function" "vect" { target { ! vect_partial_vectors } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_44.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_44.c index 7e9f635..0cfa242 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-early-break_44.c +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_44.c @@ -27,4 +27,6 @@ unsigned test4(unsigned x) return ret; } -/* { dg-final { scan-tree-dump "vectorized 1 loops in function" "vect" } } */
\ No newline at end of file +/* This will fail because we cannot SLP the load groups yet. */ +/* { dg-final { scan-tree-dump "vectorized 1 loops in function" "vect" { target vect_partial_vectors } } } */ +/* { dg-final { scan-tree-dump-not "vectorized 1 loops in function" "vect" { target { ! vect_partial_vectors } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_53.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_53.c index a02d598..d4fd0d3 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-early-break_53.c +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_53.c @@ -2,6 +2,7 @@ /* { dg-do compile } */ /* { dg-require-effective-target vect_early_break } */ /* { dg-require-effective-target vect_int } */ +/* { dg-require-effective-target vect_partial_vectors } */ void abort (); int a[64], b[64]; diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_56.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_56.c index 9096f66..b35e737 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-early-break_56.c +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_56.c @@ -4,6 +4,7 @@ /* { dg-require-effective-target vect_int } */ /* { dg-add-options bind_pic_locally } */ /* { dg-require-effective-target vect_early_break_hw } */ +/* { dg-require-effective-target vect_partial_vectors } */ #include <stdarg.h> #include "tree-vect.h" diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_57.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_57.c index 319bd12..a488665 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-early-break_57.c +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_57.c @@ -5,8 +5,9 @@ /* { dg-additional-options "-Ofast" } */ -/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */ -/* { dg-final { scan-tree-dump "epilog loop required" "vect" } } */ +/* Multiple loads of different alignments, we can't peel this. */ +/* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" } } */ +/* { dg-final { scan-tree-dump-not "epilog loop required" "vect" } } */ void abort (); diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_6.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_6.c index 7b870e9..c7cce81 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-early-break_6.c +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_6.c @@ -5,7 +5,9 @@ /* { dg-additional-options "-Ofast" } */ -/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */ +/* This will fail because we cannot SLP the load groups yet. */ +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { target vect_partial_vectors } } } */ +/* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" { target { ! vect_partial_vectors } } } } */ #define N 1024 unsigned vect_a[N]; diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_7.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_7.c index d218a06..34d1874 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-early-break_7.c +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_7.c @@ -5,7 +5,10 @@ /* { dg-additional-options "-Ofast" } */ -/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { target { ! "arm*-*-*" } } } } */ +/* Complex numbers read x and x+1, which on non-load lanes targets require partial loops. */ +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { target { { ! "arm*-*-*" } && vect_load_lanes } } } } */ +/* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" { target { { "arm*-*-*" } || { ! vect_load_lanes } } } } } */ + #include <complex.h> diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_81.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_81.c index 8a8c076..b58a461 100644 --- a/gcc/testsuite/gcc.dg/vect/vect-early-break_81.c +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_81.c @@ -5,8 +5,9 @@ /* { dg-additional-options "-Ofast" } */ -/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */ -/* { dg-final { scan-tree-dump "epilog loop required" "vect" } } */ +/* Multiple loads with different misalignments. Can't peel need partial loop support. */ +/* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" } } */ +/* { dg-final { scan-tree-dump-not "epilog loop required" "vect" } } */ void abort (); unsigned short sa[32]; diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc index eb43526..7c63916 100644 --- a/gcc/tree-vect-data-refs.cc +++ b/gcc/tree-vect-data-refs.cc @@ -731,7 +731,9 @@ vect_analyze_early_break_dependences (loop_vec_info loop_vinfo) if (is_gimple_debug (stmt)) continue; - stmt_vec_info stmt_vinfo = loop_vinfo->lookup_stmt (stmt); + stmt_vec_info stmt_vinfo + = vect_stmt_to_vectorize (loop_vinfo->lookup_stmt (stmt)); + stmt = STMT_VINFO_STMT (stmt_vinfo); auto dr_ref = STMT_VINFO_DATA_REF (stmt_vinfo); if (!dr_ref) continue; @@ -748,26 +750,16 @@ vect_analyze_early_break_dependences (loop_vec_info loop_vinfo) bounded by VF so accesses are within range. We only need to check the reads since writes are moved to a safe place where if we get there we know they are safe to perform. */ - if (DR_IS_READ (dr_ref) - && !ref_within_array_bound (stmt, DR_REF (dr_ref))) + if (DR_IS_READ (dr_ref)) { - if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo) - || STMT_VINFO_STRIDED_P (stmt_vinfo)) - { - const char *msg - = "early break not supported: cannot peel " - "for alignment, vectorization would read out of " - "bounds at %G"; - return opt_result::failure_at (stmt, msg, stmt); - } - - dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_vinfo); - dr_info->need_peeling_for_alignment = true; + dr_set_safe_speculative_read_required (stmt_vinfo, true); + bool inbounds = ref_within_array_bound (stmt, DR_REF (dr_ref)); + DR_SCALAR_KNOWN_BOUNDS (STMT_VINFO_DR_INFO (stmt_vinfo)) = inbounds; if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, - "marking DR (read) as needing peeling for " - "alignment at %G", stmt); + "marking DR (read) as possibly needing peeling " + "for alignment at %G", stmt); } if (DR_IS_READ (dr_ref)) @@ -1326,9 +1318,6 @@ vect_record_base_alignments (vec_info *vinfo) Compute the misalignment of the data reference DR_INFO when vectorizing with VECTYPE. - RESULT is non-NULL iff VINFO is a loop_vec_info. In that case, *RESULT will - be set appropriately on failure (but is otherwise left unchanged). - Output: 1. initialized misalignment info for DR_INFO @@ -1337,7 +1326,7 @@ vect_record_base_alignments (vec_info *vinfo) static void vect_compute_data_ref_alignment (vec_info *vinfo, dr_vec_info *dr_info, - tree vectype, opt_result *result = nullptr) + tree vectype) { stmt_vec_info stmt_info = dr_info->stmt; vec_base_alignments *base_alignments = &vinfo->base_alignments; @@ -1365,63 +1354,29 @@ vect_compute_data_ref_alignment (vec_info *vinfo, dr_vec_info *dr_info, = exact_div (targetm.vectorize.preferred_vector_alignment (vectype), BITS_PER_UNIT); - /* If this DR needs peeling for alignment for correctness, we must - ensure the target alignment is a constant power-of-two multiple of the - amount read per vector iteration (overriding the above hook where - necessary). */ - if (dr_info->need_peeling_for_alignment) + if (loop_vinfo + && dr_safe_speculative_read_required (stmt_info)) { - /* Vector size in bytes. */ - poly_uint64 safe_align = tree_to_poly_uint64 (TYPE_SIZE_UNIT (vectype)); - - /* We can only peel for loops, of course. */ - gcc_checking_assert (loop_vinfo); - - /* Calculate the number of vectors read per vector iteration. If - it is a power of two, multiply through to get the required - alignment in bytes. Otherwise, fail analysis since alignment - peeling wouldn't work in such a case. */ - poly_uint64 num_scalars = LOOP_VINFO_VECT_FACTOR (loop_vinfo); + poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); + auto vectype_size + = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))); + poly_uint64 new_alignment = vf * vectype_size; + /* If we have a grouped access we require that the alignment be N * elem. */ if (STMT_VINFO_GROUPED_ACCESS (stmt_info)) - num_scalars *= DR_GROUP_SIZE (stmt_info); + new_alignment *= DR_GROUP_SIZE (DR_GROUP_FIRST_ELEMENT (stmt_info)); - auto num_vectors = vect_get_num_vectors (num_scalars, vectype); - if (!pow2p_hwi (num_vectors)) - { - *result = opt_result::failure_at (vect_location, - "non-power-of-two num vectors %u " - "for DR needing peeling for " - "alignment at %G", - num_vectors, stmt_info->stmt); - return; - } - - safe_align *= num_vectors; - if (maybe_gt (safe_align, 4096U)) - { - pretty_printer pp; - pp_wide_integer (&pp, safe_align); - *result = opt_result::failure_at (vect_location, - "alignment required for correctness" - " (%s) may exceed page size", - pp_formatted_text (&pp)); - return; - } - - unsigned HOST_WIDE_INT multiple; - if (!constant_multiple_p (vector_alignment, safe_align, &multiple) - || !pow2p_hwi (multiple)) + unsigned HOST_WIDE_INT target_alignment; + if (new_alignment.is_constant (&target_alignment) + && pow2p_hwi (target_alignment)) { if (dump_enabled_p ()) { dump_printf_loc (MSG_NOTE, vect_location, - "forcing alignment for DR from preferred ("); - dump_dec (MSG_NOTE, vector_alignment); - dump_printf (MSG_NOTE, ") to safe align ("); - dump_dec (MSG_NOTE, safe_align); - dump_printf (MSG_NOTE, ") for stmt: %G", stmt_info->stmt); + "alignment increased due to early break to "); + dump_dec (MSG_NOTE, new_alignment); + dump_printf (MSG_NOTE, " bytes.\n"); } - vector_alignment = safe_align; + vector_alignment = target_alignment; } } @@ -2487,6 +2442,8 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) || !slpeel_can_duplicate_loop_p (loop, LOOP_VINFO_IV_EXIT (loop_vinfo), loop_preheader_edge (loop)) || loop->inner + /* We don't currently maintaing the LCSSA for prologue peeled inversed + loops. */ || LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo)) do_peeling = false; @@ -2950,12 +2907,9 @@ vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo) if (STMT_VINFO_GROUPED_ACCESS (dr_info->stmt) && DR_GROUP_FIRST_ELEMENT (dr_info->stmt) != dr_info->stmt) continue; - opt_result res = opt_result::success (); + vect_compute_data_ref_alignment (loop_vinfo, dr_info, - STMT_VINFO_VECTYPE (dr_info->stmt), - &res); - if (!res) - return res; + STMT_VINFO_VECTYPE (dr_info->stmt)); } } @@ -7226,7 +7180,7 @@ vect_supportable_dr_alignment (vec_info *vinfo, dr_vec_info *dr_info, if (misalignment == 0) return dr_aligned; - else if (dr_info->need_peeling_for_alignment) + else if (dr_safe_speculative_read_required (stmt_info)) return dr_unaligned_unsupported; /* For now assume all conditional loads/stores support unaligned diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index 6bbb16b..743631f 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -2597,6 +2597,128 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, return false; } + + /* Checks if all scalar iterations are known to be inbounds. */ + bool inbounds = DR_SCALAR_KNOWN_BOUNDS (STMT_VINFO_DR_INFO (stmt_info)); + + /* Check if we support the operation if early breaks are needed. Here we + must ensure that we don't access any more than the scalar code would + have. A masked operation would ensure this, so for these load types + force masking. */ + if (loop_vinfo + && dr_safe_speculative_read_required (stmt_info) + && LOOP_VINFO_EARLY_BREAKS (loop_vinfo) + && (*memory_access_type == VMAT_GATHER_SCATTER + || *memory_access_type == VMAT_STRIDED_SLP)) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "early break not supported: cannot peel for " + "alignment. With non-contiguous memory vectorization" + " could read out of bounds at %G ", + STMT_VINFO_STMT (stmt_info)); + if (inbounds) + LOOP_VINFO_MUST_USE_PARTIAL_VECTORS_P (loop_vinfo) = true; + else + return false; + } + + /* If this DR needs alignment for correctness, we must ensure the target + alignment is a constant power-of-two multiple of the amount read per + vector iteration or force masking. */ + if (dr_safe_speculative_read_required (stmt_info) + && *alignment_support_scheme == dr_aligned) + { + /* We can only peel for loops, of course. */ + gcc_checking_assert (loop_vinfo); + + auto target_alignment + = DR_TARGET_ALIGNMENT (STMT_VINFO_DR_INFO (stmt_info)); + unsigned HOST_WIDE_INT target_align; + + bool group_aligned = false; + if (target_alignment.is_constant (&target_align) + && nunits.is_constant ()) + { + poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); + auto vectype_size + = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))); + poly_uint64 required_alignment = vf * vectype_size; + /* If we have a grouped access we require that the alignment be N * elem. */ + if (STMT_VINFO_GROUPED_ACCESS (stmt_info)) + required_alignment *= + DR_GROUP_SIZE (DR_GROUP_FIRST_ELEMENT (stmt_info)); + if (!multiple_p (target_alignment, required_alignment)) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "desired alignment %wu not met. Instead got %wu " + "for DR alignment at %G", + required_alignment.to_constant (), + target_align, STMT_VINFO_STMT (stmt_info)); + return false; + } + + if (!pow2p_hwi (target_align)) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "non-power-of-two vector alignment %wd " + "for DR alignment at %G", + target_align, STMT_VINFO_STMT (stmt_info)); + return false; + } + + /* For VLA we have to insert a runtime check that the vector loads + per iterations don't exceed a page size. For now we can use + POLY_VALUE_MAX as a proxy as we can't peel for VLA. */ + if (known_gt (required_alignment, (unsigned)param_min_pagesize)) + { + if (dump_enabled_p ()) + { + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "alignment required for correctness ("); + dump_dec (MSG_MISSED_OPTIMIZATION, required_alignment); + dump_printf (MSG_NOTE, ") may exceed page size\n"); + } + return false; + } + + group_aligned = true; + } + + /* There are multiple loads that have a misalignment that we couldn't + align. We would need LOOP_VINFO_MUST_USE_PARTIAL_VECTORS_P to + vectorize. */ + if (!group_aligned) + { + if (inbounds) + LOOP_VINFO_MUST_USE_PARTIAL_VECTORS_P (loop_vinfo) = true; + else + return false; + } + + /* When using a group access the first element may be aligned but the + subsequent loads may not be. For LOAD_LANES since the loads are based + on the first DR then all loads in the group are aligned. For + non-LOAD_LANES this is not the case. In particular a load + blend when + there are gaps can have the non first loads issued unaligned, even + partially overlapping the memory of the first load in order to simplify + the blend. This is what the x86_64 backend does for instance. As + such only the first load in the group is aligned, the rest are not. + Because of this the permutes may break the alignment requirements that + have been set, and as such we should for now, reject them. */ + if (SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "loads with load permutations not supported for " + "speculative early break loads for %G", + STMT_VINFO_STMT (stmt_info)); + return false; + } + } + if (*alignment_support_scheme == dr_unaligned_unsupported) { if (dump_enabled_p ()) diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index b0cb081..97caf61 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -1281,7 +1281,11 @@ public: /* Set by early break vectorization when this DR needs peeling for alignment for correctness. */ - bool need_peeling_for_alignment; + bool safe_speculative_read_required; + + /* Set by early break vectorization when this DR's scalar accesses are known + to be inbounds of a known bounds loop. */ + bool scalar_access_known_in_bounds; tree base_decl; @@ -1997,6 +2001,35 @@ dr_target_alignment (dr_vec_info *dr_info) return dr_info->target_alignment; } #define DR_TARGET_ALIGNMENT(DR) dr_target_alignment (DR) +#define DR_SCALAR_KNOWN_BOUNDS(DR) (DR)->scalar_access_known_in_bounds + +/* Return if the stmt_vec_info requires peeling for alignment. */ +inline bool +dr_safe_speculative_read_required (stmt_vec_info stmt_info) +{ + dr_vec_info *dr_info; + if (STMT_VINFO_GROUPED_ACCESS (stmt_info)) + dr_info = STMT_VINFO_DR_INFO (DR_GROUP_FIRST_ELEMENT (stmt_info)); + else + dr_info = STMT_VINFO_DR_INFO (stmt_info); + + return dr_info->safe_speculative_read_required; +} + +/* Set the safe_speculative_read_required for the the stmt_vec_info, if group + access then set on the fist element otherwise set on DR directly. */ +inline void +dr_set_safe_speculative_read_required (stmt_vec_info stmt_info, + bool requires_alignment) +{ + dr_vec_info *dr_info; + if (STMT_VINFO_GROUPED_ACCESS (stmt_info)) + dr_info = STMT_VINFO_DR_INFO (DR_GROUP_FIRST_ELEMENT (stmt_info)); + else + dr_info = STMT_VINFO_DR_INFO (stmt_info); + + dr_info->safe_speculative_read_required = requires_alignment; +} inline void set_dr_target_alignment (dr_vec_info *dr_info, poly_uint64 val) |