diff options
16 files changed, 264 insertions, 170 deletions
diff --git a/gcc/omp-oacc-kernels-decompose.cc b/gcc/omp-oacc-kernels-decompose.cc index cf9718f..4ca899d 100644 --- a/gcc/omp-oacc-kernels-decompose.cc +++ b/gcc/omp-oacc-kernels-decompose.cc @@ -793,7 +793,8 @@ make_data_region_try_statement (location_t loc, gimple *body) /* If INNER_BIND_VARS holds variables, build an OpenACC data region with location LOC containing BODY and having 'create (var)' clauses for each - variable. If INNER_CLEANUP is present, add a try-finally statement with + variable (as a side effect, such variables also get TREE_ADDRESSABLE set). + If INNER_CLEANUP is present, add a try-finally statement with this cleanup code in the finally block. Return the new data region, or the original BODY if no data region was needed. */ @@ -842,6 +843,9 @@ maybe_build_inner_data_region (location_t loc, gimple *body, inner_data_clauses = new_clause; prev_mapped_var = v; + + /* See <https://gcc.gnu.org/PR100280>. */ + TREE_ADDRESSABLE (v) = 1; } } diff --git a/gcc/testsuite/c-c++-common/goacc/classify-kernels-parloops.c b/gcc/testsuite/c-c++-common/goacc/classify-kernels-parloops.c new file mode 100644 index 0000000..f3685f2 --- /dev/null +++ b/gcc/testsuite/c-c++-common/goacc/classify-kernels-parloops.c @@ -0,0 +1,41 @@ +/* Check offloaded function's attributes and classification for OpenACC + kernels. */ + +/* { dg-additional-options "--param openacc-kernels=parloops" } */ + +/* { dg-additional-options "-O2" } + { dg-additional-options "-fopt-info-optimized-omp" } + { dg-additional-options "-fdump-tree-ompexp" } + { dg-additional-options "-fdump-tree-parloops1-all" } + { dg-additional-options "-fdump-tree-oaccloops" } */ + +/* { dg-additional-options "-Wopenacc-parallelism" } for testing/documenting + aspects of that functionality. */ + +#define N 1024 + +extern unsigned int *__restrict a; +extern unsigned int *__restrict b; +extern unsigned int *__restrict c; + +void KERNELS () +{ +#pragma acc kernels copyin (a[0:N], b[0:N]) copyout (c[0:N]) /* { dg-message "optimized: assigned OpenACC gang loop parallelism" } */ + for (unsigned int i = 0; i < N; i++) + c[i] = a[i] + b[i]; +} + +/* Check the offloaded function's attributes. + { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels, omp target entrypoint\\)\\)" 1 "ompexp" } } */ + +/* Check that exactly one OpenACC kernels construct is analyzed, and that it + can be parallelized. + { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } + { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } } + { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */ + +/* Check the offloaded function's classification and compute dimensions (will + always be 1 x 1 x 1 for non-offloading compilation). + { dg-final { scan-tree-dump-times "(?n)Function is parallelized OpenACC kernels offload" 1 "oaccloops" } } + { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccloops" } } + { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "oaccloops" } } */ diff --git a/gcc/testsuite/c-c++-common/goacc/classify-kernels-unparallelized-parloops.c b/gcc/testsuite/c-c++-common/goacc/classify-kernels-unparallelized-parloops.c new file mode 100644 index 0000000..6522caf --- /dev/null +++ b/gcc/testsuite/c-c++-common/goacc/classify-kernels-unparallelized-parloops.c @@ -0,0 +1,45 @@ +/* Check offloaded function's attributes and classification for unparallelized + OpenACC kernels. */ + +/* { dg-additional-options "--param openacc-kernels=parloops" } */ + +/* { dg-additional-options "-O2" } + { dg-additional-options "-fopt-info-optimized-omp" } + { dg-additional-options "-fdump-tree-ompexp" } + { dg-additional-options "-fdump-tree-parloops1-all" } + { dg-additional-options "-fdump-tree-oaccloops" } */ + +/* { dg-additional-options "-Wopenacc-parallelism" } for testing/documenting + aspects of that functionality. */ + +#define N 1024 + +extern unsigned int *__restrict a; +extern unsigned int *__restrict b; +extern unsigned int *__restrict c; + +/* An "extern"al mapping of loop iterations/array indices makes the loop + unparallelizable. */ +extern unsigned int f (unsigned int); + +void KERNELS () +{ +#pragma acc kernels copyin (a[0:N], b[0:N]) copyout (c[0:N]) /* { dg-message "optimized: assigned OpenACC seq loop parallelism" } */ + for (unsigned int i = 0; i < N; i++) + c[i] = a[f (i)] + b[f (i)]; +} + +/* Check the offloaded function's attributes. + { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels, omp target entrypoint\\)\\)" 1 "ompexp" } } */ + +/* Check that exactly one OpenACC kernels construct is analyzed, and that it + can't be parallelized. + { dg-final { scan-tree-dump-times "FAILED:" 1 "parloops1" } } + { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } } + { dg-final { scan-tree-dump-not "SUCCESS: may be parallelized" "parloops1" } } */ + +/* Check the offloaded function's classification and compute dimensions (will + always be 1 x 1 x 1 for non-offloading compilation). + { dg-final { scan-tree-dump-times "(?n)Function is unparallelized OpenACC kernels offload" 1 "oaccloops" } } + { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccloops" } } + { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc kernels, omp target entrypoint\\)\\)" 1 "oaccloops" } } */ diff --git a/gcc/testsuite/c-c++-common/goacc/classify-kernels-unparallelized.c b/gcc/testsuite/c-c++-common/goacc/classify-kernels-unparallelized.c index 1d12658..daa8fcb 100644 --- a/gcc/testsuite/c-c++-common/goacc/classify-kernels-unparallelized.c +++ b/gcc/testsuite/c-c++-common/goacc/classify-kernels-unparallelized.c @@ -1,8 +1,10 @@ /* Check offloaded function's attributes and classification for unparallelized OpenACC kernels. */ +/* { dg-additional-options "--param openacc-kernels=decompose" } */ + /* { dg-additional-options "-O2" } - { dg-additional-options "-fopt-info-optimized-omp" } + { dg-additional-options "-fopt-info-all-omp" } { dg-additional-options "-fdump-tree-ompexp" } { dg-additional-options "-fdump-tree-parloops1-all" } { dg-additional-options "-fdump-tree-oaccloops" } */ @@ -23,6 +25,7 @@ extern unsigned int f (unsigned int); void KERNELS () { #pragma acc kernels copyin (a[0:N], b[0:N]) copyout (c[0:N]) /* { dg-message "optimized: assigned OpenACC seq loop parallelism" } */ + /* { dg-note {beginning 'parloops' part in OpenACC 'kernels' region} {} { target *-*-* } .+1 } */ for (unsigned int i = 0; i < N; i++) c[i] = a[f (i)] + b[f (i)]; } diff --git a/gcc/testsuite/c-c++-common/goacc/classify-kernels.c b/gcc/testsuite/c-c++-common/goacc/classify-kernels.c index bdf7b4a..b54a71e 100644 --- a/gcc/testsuite/c-c++-common/goacc/classify-kernels.c +++ b/gcc/testsuite/c-c++-common/goacc/classify-kernels.c @@ -1,8 +1,10 @@ /* Check offloaded function's attributes and classification for OpenACC kernels. */ +/* { dg-additional-options "--param openacc-kernels=decompose" } */ + /* { dg-additional-options "-O2" } - { dg-additional-options "-fopt-info-optimized-omp" } + { dg-additional-options "-fopt-info-all-omp" } { dg-additional-options "-fdump-tree-ompexp" } { dg-additional-options "-fdump-tree-parloops1-all" } { dg-additional-options "-fdump-tree-oaccloops" } */ @@ -19,6 +21,7 @@ extern unsigned int *__restrict c; void KERNELS () { #pragma acc kernels copyin (a[0:N], b[0:N]) copyout (c[0:N]) /* { dg-message "optimized: assigned OpenACC gang loop parallelism" } */ + /* { dg-note {beginning 'parloops' part in OpenACC 'kernels' region} {} { target *-*-* } .+1 } */ for (unsigned int i = 0; i < N; i++) c[i] = a[i] + b[i]; } diff --git a/gcc/testsuite/c-c++-common/goacc/kernels-decompose-2.c b/gcc/testsuite/c-c++-common/goacc/kernels-decompose-2.c index 4dd55eb..64ce894 100644 --- a/gcc/testsuite/c-c++-common/goacc/kernels-decompose-2.c +++ b/gcc/testsuite/c-c++-common/goacc/kernels-decompose-2.c @@ -55,7 +55,7 @@ main () ; } - { /*TODO Instead of using 'for (int i = 0; [...])', move 'int i' outside, to work around for ICE detailed in 'kernels-decompose-ice-1.c'. */ + { int i; #pragma acc kernels /* { dg-line l_compute[incr c_compute] } */ /* { dg-optimized {assigned OpenACC gang loop parallelism} {} { target *-*-* } l_compute$c_compute } */ @@ -64,6 +64,20 @@ main () a[i] = 0; } +#pragma acc kernels /* { dg-line l_compute[incr c_compute] } */ + /* { dg-note {beginning 'gang-single' part in OpenACC 'kernels' region} {} { target *-*-* } l_compute$c_compute } */ + /* { dg-note {variable 'i' declared in block is candidate for adjusting OpenACC privatization level} {} { target *-*-* } l_compute$c_compute } */ + { + int i; + } + +#pragma acc kernels /* { dg-line l_compute[incr c_compute] } */ + /* { dg-note {variable 'i' declared in block is candidate for adjusting OpenACC privatization level} {} { target *-*-* } l_compute$c_compute } */ + /* { dg-optimized {assigned OpenACC gang loop parallelism} {} { target *-*-* } l_compute$c_compute } */ + /* { dg-note {beginning 'parloops' part in OpenACC 'kernels' region} {} { target *-*-* } .+1 } */ + for (int i = 0; i < N; i++) + a[i] = 0; + #pragma acc kernels loop /* { dg-line l_loop_i[incr c_loop_i] } */ /* { dg-note {forwarded loop nest in OpenACC 'kernels' region to 'parloops' for analysis} {} { target *-*-* } l_loop_i$c_loop_i } */ /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */ diff --git a/gcc/testsuite/c-c++-common/goacc/kernels-decompose-ice-1.c b/gcc/testsuite/c-c++-common/goacc/kernels-decompose-ice-1.c deleted file mode 100644 index e83b451..0000000 --- a/gcc/testsuite/c-c++-common/goacc/kernels-decompose-ice-1.c +++ /dev/null @@ -1,114 +0,0 @@ -/* Test OpenACC 'kernels' construct decomposition. */ - -/* { dg-additional-options "-fopt-info-omp-all" } */ - -/* { dg-additional-options "-fchecking --param=openacc-kernels=decompose" } */ -/* { dg-ice "TODO" } - { dg-prune-output "during GIMPLE pass: omplower" } */ - -/* { dg-additional-options "--param=openacc-privatization=noisy" } */ - -/* Reduced from 'kernels-decompose-2.c'. - (Hopefully) similar instances: - - 'kernels-decompose-ice-2.c' - - 'libgomp.oacc-c-c++-common/declare-vla-kernels-decompose-ice-1.c' - - 'libgomp.oacc-c-c++-common/kernels-decompose-1.c' -*/ - -int -main () -{ -#define N 10 - -#pragma acc kernels - /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } .-1 } */ - /* { dg-note {beginning 'parloops' part in OpenACC 'kernels' region} {} { target *-*-* } .+1 } */ - for (int i = 0; i < N; i++) - ; - - return 0; -} - -/* - In 'gimple' we've got: - - main () - { - int D.2087; - - { - int a[10]; - - try - { - #pragma omp target oacc_kernels map(tofrom:a [len: 40]) - { - { - int i; - - i = 0; - goto <D.2085>; - [...] - - ..., which in 'omp_oacc_kernels_decompose' we turn into: - - main () - { - int D.2087; - - { - int a[10]; - - try - { - #pragma omp target oacc_data_kernels map(tofrom:a [len: 40]) - { - try - { - { - int i; - - #pragma omp target oacc_data_kernels map(alloc:i [len: 4]) - { - try - { - { - #pragma omp target oacc_kernels async(-1) map(force_present:i [len: 4]) map(force_present:a [len: 40]) - { - i = 0; - goto <D.2085>; - [...] - - ..., which results in ICE in: - - #1 0x0000000000d2247b in lower_omp_target (gsi_p=gsi_p@entry=0x7fffffffbc90, ctx=ctx@entry=0x2c994c0) at [...]/gcc/omp-low.c:11981 - 11981 gcc_assert (offloaded); - (gdb) list - 11976 talign = TYPE_ALIGN_UNIT (TREE_TYPE (TREE_TYPE (ovar))); - 11977 gimplify_assign (x, var, &ilist); - 11978 } - 11979 else if (is_gimple_reg (var)) - 11980 { - 11981 gcc_assert (offloaded); - 11982 tree avar = create_tmp_var (TREE_TYPE (var)); - 11983 mark_addressable (avar); - 11984 enum gomp_map_kind map_kind = OMP_CLAUSE_MAP_KIND (c); - 11985 if (GOMP_MAP_COPY_TO_P (map_kind) - (gdb) call debug_tree(var) - <var_decl 0x7ffff7feebd0 i - type <integer_type 0x7ffff67be5e8 int sizes-gimplified public SI - size <integer_cst 0x7ffff67a5f18 constant 32> - unit-size <integer_cst 0x7ffff67a5f30 constant 4> - align:32 warn_if_not_align:0 symtab:0 alias-set -1 canonical-type 0x7ffff67be5e8 precision:32 min <integer_cst 0x7ffff67a5ed0 -2147483648> max <integer_cst 0x7ffff67a5ee8 2147483647> - pointer_to_this <pointer_type 0x7ffff67c69d8>> - used read SI [...]:15:12 size <integer_cst 0x7ffff67a5f18 32> unit-size <integer_cst 0x7ffff67a5f30 4> - align:32 warn_if_not_align:0 context <function_decl 0x7ffff68eea00 main>> - - Just defusing the 'assert' is not sufficient: - - libgomp: present clause: !acc_is_present (0x7ffe29cba3ec, 4 (0x4)) - - TODO Can't the 'omp_oacc_kernels_decompose' transformation be much simpler, such that we avoid the intermediate 'data' if we've got just one compute construct inside it? - TODO But it's not clear if that'd just resolve one simple instance of the general problem? - -*/ diff --git a/gcc/testsuite/c-c++-common/goacc/kernels-decompose-ice-2.c b/gcc/testsuite/c-c++-common/goacc/kernels-decompose-ice-2.c deleted file mode 100644 index 16af57d..0000000 --- a/gcc/testsuite/c-c++-common/goacc/kernels-decompose-ice-2.c +++ /dev/null @@ -1,22 +0,0 @@ -/* Test OpenACC 'kernels' construct decomposition. */ - -/* { dg-additional-options "-fopt-info-omp-all" } */ - -/* { dg-additional-options "-fchecking --param=openacc-kernels=decompose" } */ -/* { dg-ice "TODO" } - { dg-prune-output "during GIMPLE pass: omplower" } */ - -/* { dg-additional-options "--param=openacc-privatization=noisy" } */ - -/* Reduced from 'kernels-decompose-ice-1.c'. */ - -int -main () -{ -#pragma acc kernels - /* { dg-note {beginning 'gang-single' part in OpenACC 'kernels' region} {} { target *-*-* } .-1 } */ - /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } .-2 } */ - { - int i; - } -} diff --git a/gcc/testsuite/c-c++-common/goacc/kernels-decompose-pr100280-1.c b/gcc/testsuite/c-c++-common/goacc/kernels-decompose-pr100280-1.c new file mode 100644 index 0000000..b497af2 --- /dev/null +++ b/gcc/testsuite/c-c++-common/goacc/kernels-decompose-pr100280-1.c @@ -0,0 +1,19 @@ +/* Reduced from 'libgomp.oacc-c-c++-common/kernels-loop-2.c'. */ + +/* { dg-additional-options "--param openacc-kernels=decompose" } */ + +/* { dg-additional-options "-fopt-info-all-omp" } */ + +/* { dg-additional-options "--param=openacc-privatization=noisy" } */ + +void +foo (void) /* { dg-line l_f_1 } */ +{ +#pragma acc kernels /* { dg-line l_k_1 } */ + /* { dg-note {variable 'i' declared in block is candidate for adjusting OpenACC privatization level} {} { target *-*-* } l_k_1 } */ + /* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target *-*-* } l_k_1 } */ + /* { dg-bogus {note: beginning 'parloops' part in OpenACC 'kernels' region} {TODO location} { xfail *-*-* } l_f_1 } + { dg-note {beginning 'parloops' part in OpenACC 'kernels' region} TODO { xfail *-*-* } .+1 } */ + for (int i;;) + ; +} diff --git a/gcc/testsuite/gfortran.dg/goacc/classify-kernels-parloops.f95 b/gcc/testsuite/gfortran.dg/goacc/classify-kernels-parloops.f95 new file mode 100644 index 0000000..b8c2d99 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/goacc/classify-kernels-parloops.f95 @@ -0,0 +1,43 @@ +! Check offloaded function's attributes and classification for OpenACC +! kernels. + +! { dg-additional-options "--param openacc-kernels=parloops" } + +! { dg-additional-options "-O2" } +! { dg-additional-options "-fopt-info-optimized-omp" } +! { dg-additional-options "-fdump-tree-ompexp" } +! { dg-additional-options "-fdump-tree-parloops1-all" } +! { dg-additional-options "-fdump-tree-oaccloops" } + +! { dg-additional-options "-Wopenacc-parallelism" } for testing/documenting +! aspects of that functionality. + +program main + implicit none + integer, parameter :: n = 1024 + integer, dimension (0:n-1) :: a, b, c + integer :: i + + call setup(a, b) + + !$acc kernels copyin (a(0:n-1), b(0:n-1)) copyout (c(0:n-1)) ! { dg-message "optimized: assigned OpenACC gang loop parallelism" } + do i = 0, n - 1 + c(i) = a(i) + b(i) + end do + !$acc end kernels +end program main + +! Check the offloaded function's attributes. +! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels, omp target entrypoint\\)\\)" 1 "ompexp" } } + +! Check that exactly one OpenACC kernels construct is analyzed, and that it +! can be parallelized. +! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } } +! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } } +! { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } + +! Check the offloaded function's classification and compute dimensions (will +! always be 1 x 1 x 1 for non-offloading compilation). +! { dg-final { scan-tree-dump-times "(?n)Function is parallelized OpenACC kernels offload" 1 "oaccloops" } } +! { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccloops" } } +! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "oaccloops" } } diff --git a/gcc/testsuite/gfortran.dg/goacc/classify-kernels-unparallelized-parloops.f95 b/gcc/testsuite/gfortran.dg/goacc/classify-kernels-unparallelized-parloops.f95 new file mode 100644 index 0000000..3773327 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/goacc/classify-kernels-unparallelized-parloops.f95 @@ -0,0 +1,47 @@ +! Check offloaded function's attributes and classification for unparallelized +! OpenACC kernels. + +! { dg-additional-options "--param openacc-kernels=parloops" } + +! { dg-additional-options "-O2" } +! { dg-additional-options "-fopt-info-optimized-omp" } +! { dg-additional-options "-fdump-tree-ompexp" } +! { dg-additional-options "-fdump-tree-parloops1-all" } +! { dg-additional-options "-fdump-tree-oaccloops" } + +! { dg-additional-options "-Wopenacc-parallelism" } for testing/documenting +! aspects of that functionality. + +program main + implicit none + integer, parameter :: n = 1024 + integer, dimension (0:n-1) :: a, b, c + integer :: i + + ! An "external" mapping of loop iterations/array indices makes the loop + ! unparallelizable. + integer, external :: f + + call setup(a, b) + + !$acc kernels copyin (a(0:n-1), b(0:n-1)) copyout (c(0:n-1)) ! { dg-message "optimized: assigned OpenACC seq loop parallelism" } + do i = 0, n - 1 + c(i) = a(f (i)) + b(f (i)) + end do + !$acc end kernels +end program main + +! Check the offloaded function's attributes. +! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels, omp target entrypoint\\)\\)" 1 "ompexp" } } + +! Check that exactly one OpenACC kernels construct is analyzed, and that it +! can't be parallelized. +! { dg-final { scan-tree-dump-times "FAILED:" 1 "parloops1" } } +! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } } +! { dg-final { scan-tree-dump-not "SUCCESS: may be parallelized" "parloops1" } } + +! Check the offloaded function's classification and compute dimensions (will +! always be 1 x 1 x 1 for non-offloading compilation). +! { dg-final { scan-tree-dump-times "(?n)Function is unparallelized OpenACC kernels offload" 1 "oaccloops" } } +! { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccloops" } } +! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc kernels, omp target entrypoint\\)\\)" 1 "oaccloops" } } diff --git a/gcc/testsuite/gfortran.dg/goacc/classify-kernels-unparallelized.f95 b/gcc/testsuite/gfortran.dg/goacc/classify-kernels-unparallelized.f95 index 3fb48b3..ee8e289 100644 --- a/gcc/testsuite/gfortran.dg/goacc/classify-kernels-unparallelized.f95 +++ b/gcc/testsuite/gfortran.dg/goacc/classify-kernels-unparallelized.f95 @@ -1,8 +1,10 @@ ! Check offloaded function's attributes and classification for unparallelized ! OpenACC kernels. +! { dg-additional-options "--param openacc-kernels=decompose" } + ! { dg-additional-options "-O2" } -! { dg-additional-options "-fopt-info-optimized-omp" } +! { dg-additional-options "-fopt-info-all-omp" } ! { dg-additional-options "-fdump-tree-ompexp" } ! { dg-additional-options "-fdump-tree-parloops1-all" } ! { dg-additional-options "-fdump-tree-oaccloops" } @@ -23,6 +25,7 @@ program main call setup(a, b) !$acc kernels copyin (a(0:n-1), b(0:n-1)) copyout (c(0:n-1)) ! { dg-message "optimized: assigned OpenACC seq loop parallelism" } + ! { dg-note {beginning 'parloops' part in OpenACC 'kernels' region} {} { target *-*-* } .+1 } do i = 0, n - 1 c(i) = a(f (i)) + b(f (i)) end do diff --git a/gcc/testsuite/gfortran.dg/goacc/classify-kernels.f95 b/gcc/testsuite/gfortran.dg/goacc/classify-kernels.f95 index 6c8d298..a4bcca0 100644 --- a/gcc/testsuite/gfortran.dg/goacc/classify-kernels.f95 +++ b/gcc/testsuite/gfortran.dg/goacc/classify-kernels.f95 @@ -1,8 +1,10 @@ ! Check offloaded function's attributes and classification for OpenACC ! kernels. +! { dg-additional-options "--param openacc-kernels=decompose" } + ! { dg-additional-options "-O2" } -! { dg-additional-options "-fopt-info-optimized-omp" } +! { dg-additional-options "-fopt-info-all-omp" } ! { dg-additional-options "-fdump-tree-ompexp" } ! { dg-additional-options "-fdump-tree-parloops1-all" } ! { dg-additional-options "-fdump-tree-oaccloops" } @@ -19,6 +21,7 @@ program main call setup(a, b) !$acc kernels copyin (a(0:n-1), b(0:n-1)) copyout (c(0:n-1)) ! { dg-message "optimized: assigned OpenACC gang loop parallelism" } + ! { dg-note {beginning 'parloops' part in OpenACC 'kernels' region} {} { target *-*-* } .+1 } do i = 0, n - 1 c(i) = a(i) + b(i) end do diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/declare-vla-kernels-decompose-ice-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/declare-vla-kernels-decompose-ice-1.c index a6eb82b..3e5b6ba 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/declare-vla-kernels-decompose-ice-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/declare-vla-kernels-decompose-ice-1.c @@ -1,5 +1,5 @@ /* { dg-additional-options "--param=openacc-kernels=decompose" } */ -/* Hopefully, this is the same issue as '../../../gcc/testsuite/c-c++-common/goacc/kernels-decompose-ice-1.c'. +/* ICE similar to PR100280, but not the same. { dg-ice "TODO" } TODO { dg-prune-output "during GIMPLE pass: omplower" } TODO { dg-do link } */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/f-asyncwait-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/f-asyncwait-1.c index e4e5815..f7ccecb 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/f-asyncwait-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/f-asyncwait-1.c @@ -3,7 +3,7 @@ /* Based on '../libgomp.oacc-fortran/asyncwait-1.f90'. */ /* { dg-additional-options "--param=openacc-kernels=decompose" } */ -/* TODO To avoid PR100280 ICE { dg-additional-options "--param=openacc-kernels=parloops" } */ +/* { dg-xfail-run-if TODO { openacc_radeon_accel_selected } } */ /* { dg-additional-options "-fopt-info-all-omp" } { dg-additional-options "-foffload=-fopt-info-all-omp" } */ @@ -202,11 +202,12 @@ main (void) #pragma acc data copy (a[0:N]) copy (b[0:N]) { -#pragma acc kernels async /* { dg-line l_compute[incr c_compute] } */ - /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_compute$c_compute } */ - /* { dg-optimized "assigned OpenACC seq loop parallelism" "" { target *-*-* } l_compute$c_compute } */ +#pragma acc kernels async #pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */ + /* { dg-note {forwarded loop nest in OpenACC 'kernels' region to 'parloops' for analysis} {} { target *-*-* } l_loop_i$c_loop_i } */ + /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop_i$c_loop_i } */ /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop_i$c_loop_i } */ + /* { dg-optimized "assigned OpenACC seq loop parallelism" "" { target *-*-* } l_loop_i$c_loop_i } */ for (int i = 0; i < N; ++i) b[i] = a[i]; @@ -229,11 +230,12 @@ main (void) #pragma acc data copy (a[0:N]) copy (b[0:N]) { -#pragma acc kernels async (1) /* { dg-line l_compute[incr c_compute] } */ - /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_compute$c_compute } */ - /* { dg-optimized "assigned OpenACC seq loop parallelism" "" { target *-*-* } l_compute$c_compute } */ +#pragma acc kernels async (1) #pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */ + /* { dg-note {forwarded loop nest in OpenACC 'kernels' region to 'parloops' for analysis} {} { target *-*-* } l_loop_i$c_loop_i } */ + /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop_i$c_loop_i } */ /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop_i$c_loop_i } */ + /* { dg-optimized "assigned OpenACC seq loop parallelism" "" { target *-*-* } l_loop_i$c_loop_i } */ for (int i = 0; i < N; ++i) b[i] = a[i]; @@ -259,24 +261,27 @@ main (void) #pragma acc data copy (a[0:N]) copy (b[0:N]) copy (c[0:N]) copy (d[0:N]) { #pragma acc kernels async (1) /* { dg-line l_compute[incr c_compute] } */ - /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_compute$c_compute } */ + /* { dg-note {variable 'i' declared in block is candidate for adjusting OpenACC privatization level} "" { target *-*-* } l_compute$c_compute } */ /* { dg-optimized "assigned OpenACC seq loop parallelism" "" { target { ! __OPTIMIZE__ } } l_compute$c_compute } { dg-optimized "assigned OpenACC gang loop parallelism" "" { target { __OPTIMIZE__ } } l_compute$c_compute } */ + /* { dg-note {beginning 'parloops' part in OpenACC 'kernels' region} {} { target *-*-* } .+1 } */ for (int i = 0; i < N; ++i) b[i] = (a[i] * a[i] * a[i]) / a[i]; #pragma acc kernels async (1) /* { dg-line l_compute[incr c_compute] } */ - /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_compute$c_compute } */ + /* { dg-note {variable 'i' declared in block is candidate for adjusting OpenACC privatization level} "" { target *-*-* } l_compute$c_compute } */ /* { dg-optimized "assigned OpenACC seq loop parallelism" "" { target { ! __OPTIMIZE__ } } l_compute$c_compute } { dg-optimized "assigned OpenACC gang loop parallelism" "" { target { __OPTIMIZE__ } } l_compute$c_compute } */ + /* { dg-note {beginning 'parloops' part in OpenACC 'kernels' region} {} { target *-*-* } .+1 } */ for (int i = 0; i < N; ++i) c[i] = (a[i] * 4) / a[i]; -#pragma acc kernels async (1) /* { dg-line l_compute[incr c_compute] } */ - /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_compute$c_compute } */ - /* { dg-optimized "assigned OpenACC seq loop parallelism" "" { target *-*-* } l_compute$c_compute } */ +#pragma acc kernels async (1) #pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */ + /* { dg-note {forwarded loop nest in OpenACC 'kernels' region to 'parloops' for analysis} {} { target *-*-* } l_loop_i$c_loop_i } */ + /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop_i$c_loop_i } */ /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop_i$c_loop_i } */ + /* { dg-optimized "assigned OpenACC seq loop parallelism" "" { target *-*-* } l_loop_i$c_loop_i } */ for (int i = 0; i < N; ++i) d[i] = ((a[i] * a[i] + a[i]) / a[i]) - a[i]; @@ -307,33 +312,37 @@ main (void) #pragma acc data copy (a[0:N], b[0:N], c[0:N], d[0:N], e[0:N]) { #pragma acc kernels async (1) /* { dg-line l_compute[incr c_compute] } */ - /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_compute$c_compute } */ + /* { dg-note {variable 'i' declared in block is candidate for adjusting OpenACC privatization level} "" { target *-*-* } l_compute$c_compute } */ /* { dg-optimized "assigned OpenACC seq loop parallelism" "" { target { ! __OPTIMIZE__ } } l_compute$c_compute } { dg-optimized "assigned OpenACC gang loop parallelism" "" { target { __OPTIMIZE__ } } l_compute$c_compute } */ + /* { dg-note {beginning 'parloops' part in OpenACC 'kernels' region} {} { target *-*-* } .+1 } */ for (int i = 0; i < N; ++i) b[i] = (a[i] * a[i] * a[i]) / a[i]; -#pragma acc kernels async (1) /* { dg-line l_compute[incr c_compute] } */ - /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_compute$c_compute } */ - /* { dg-optimized "assigned OpenACC seq loop parallelism" "" { target *-*-* } l_compute$c_compute } */ +#pragma acc kernels async (1) #pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */ + /* { dg-note {forwarded loop nest in OpenACC 'kernels' region to 'parloops' for analysis} {} { target *-*-* } l_loop_i$c_loop_i } */ + /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop_i$c_loop_i } */ /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop_i$c_loop_i } */ + /* { dg-optimized "assigned OpenACC seq loop parallelism" "" { target *-*-* } l_loop_i$c_loop_i } */ for (int i = 0; i < N; ++i) c[i] = (a[i] * 4) / a[i]; -#pragma acc kernels async (1) /* { dg-line l_compute[incr c_compute] } */ - /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_compute$c_compute } */ - /* { dg-optimized "assigned OpenACC seq loop parallelism" "" { target *-*-* } l_compute$c_compute } */ +#pragma acc kernels async (1) #pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */ + /* { dg-note {forwarded loop nest in OpenACC 'kernels' region to 'parloops' for analysis} {} { target *-*-* } l_loop_i$c_loop_i } */ + /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop_i$c_loop_i } */ /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop_i$c_loop_i } */ + /* { dg-optimized "assigned OpenACC seq loop parallelism" "" { target *-*-* } l_loop_i$c_loop_i } */ for (int i = 0; i < N; ++i) d[i] = ((a[i] * a[i] + a[i]) / a[i]) - a[i]; -#pragma acc kernels wait (1) async (1) /* { dg-line l_compute[incr c_compute] } */ - /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_compute$c_compute } */ - /* { dg-optimized "assigned OpenACC seq loop parallelism" "" { target *-*-* } l_compute$c_compute } */ +#pragma acc kernels wait (1) async (1) #pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */ + /* { dg-note {forwarded loop nest in OpenACC 'kernels' region to 'parloops' for analysis} {} { target *-*-* } l_loop_i$c_loop_i } */ + /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop_i$c_loop_i } */ /* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop_i$c_loop_i } */ + /* { dg-optimized "assigned OpenACC seq loop parallelism" "" { target *-*-* } l_loop_i$c_loop_i } */ for (int i = 0; i < N; ++i) e[i] = a[i] + b[i] + c[i] + d[i]; diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-decompose-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-decompose-1.c index b3b4c49..57e75f6 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-decompose-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-decompose-1.c @@ -32,11 +32,7 @@ int main() { /* { dg-note {beginning 'gang-single' part in OpenACC 'kernels' region} {} { target *-*-* } .+1 } */ int c = 234; - /* { dg-note {variable 'c' declared in block is candidate for adjusting OpenACC privatization level} "" { target *-*-* } l_compute$c_compute } - { dg-note {variable 'c\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_compute$c_compute } */ - - /*TODO Hopefully, this is the same issue as '../../../gcc/testsuite/c-c++-common/goacc/kernels-decompose-ice-1.c'. */ - (volatile int *) &c; + /* { dg-note {variable 'c' declared in block is candidate for adjusting OpenACC privatization level} "" { target *-*-* } l_compute$c_compute } */ #pragma acc loop independent gang /* { dg-line l_loop_i[incr c_loop_i] } */ /* { dg-note {parallelized loop nest in OpenACC 'kernels' region} {} { target *-*-* } l_loop_i$c_loop_i } */ |