aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gcc/omp-oacc-kernels-decompose.cc6
-rw-r--r--gcc/testsuite/c-c++-common/goacc/classify-kernels-parloops.c41
-rw-r--r--gcc/testsuite/c-c++-common/goacc/classify-kernels-unparallelized-parloops.c45
-rw-r--r--gcc/testsuite/c-c++-common/goacc/classify-kernels-unparallelized.c5
-rw-r--r--gcc/testsuite/c-c++-common/goacc/classify-kernels.c5
-rw-r--r--gcc/testsuite/c-c++-common/goacc/kernels-decompose-2.c16
-rw-r--r--gcc/testsuite/c-c++-common/goacc/kernels-decompose-ice-1.c114
-rw-r--r--gcc/testsuite/c-c++-common/goacc/kernels-decompose-ice-2.c22
-rw-r--r--gcc/testsuite/c-c++-common/goacc/kernels-decompose-pr100280-1.c19
-rw-r--r--gcc/testsuite/gfortran.dg/goacc/classify-kernels-parloops.f9543
-rw-r--r--gcc/testsuite/gfortran.dg/goacc/classify-kernels-unparallelized-parloops.f9547
-rw-r--r--gcc/testsuite/gfortran.dg/goacc/classify-kernels-unparallelized.f955
-rw-r--r--gcc/testsuite/gfortran.dg/goacc/classify-kernels.f955
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/declare-vla-kernels-decompose-ice-1.c2
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/f-asyncwait-1.c53
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-decompose-1.c6
16 files changed, 264 insertions, 170 deletions
diff --git a/gcc/omp-oacc-kernels-decompose.cc b/gcc/omp-oacc-kernels-decompose.cc
index cf9718f..4ca899d 100644
--- a/gcc/omp-oacc-kernels-decompose.cc
+++ b/gcc/omp-oacc-kernels-decompose.cc
@@ -793,7 +793,8 @@ make_data_region_try_statement (location_t loc, gimple *body)
/* If INNER_BIND_VARS holds variables, build an OpenACC data region with
location LOC containing BODY and having 'create (var)' clauses for each
- variable. If INNER_CLEANUP is present, add a try-finally statement with
+ variable (as a side effect, such variables also get TREE_ADDRESSABLE set).
+ If INNER_CLEANUP is present, add a try-finally statement with
this cleanup code in the finally block. Return the new data region, or
the original BODY if no data region was needed. */
@@ -842,6 +843,9 @@ maybe_build_inner_data_region (location_t loc, gimple *body,
inner_data_clauses = new_clause;
prev_mapped_var = v;
+
+ /* See <https://gcc.gnu.org/PR100280>. */
+ TREE_ADDRESSABLE (v) = 1;
}
}
diff --git a/gcc/testsuite/c-c++-common/goacc/classify-kernels-parloops.c b/gcc/testsuite/c-c++-common/goacc/classify-kernels-parloops.c
new file mode 100644
index 0000000..f3685f2
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/goacc/classify-kernels-parloops.c
@@ -0,0 +1,41 @@
+/* Check offloaded function's attributes and classification for OpenACC
+ kernels. */
+
+/* { dg-additional-options "--param openacc-kernels=parloops" } */
+
+/* { dg-additional-options "-O2" }
+ { dg-additional-options "-fopt-info-optimized-omp" }
+ { dg-additional-options "-fdump-tree-ompexp" }
+ { dg-additional-options "-fdump-tree-parloops1-all" }
+ { dg-additional-options "-fdump-tree-oaccloops" } */
+
+/* { dg-additional-options "-Wopenacc-parallelism" } for testing/documenting
+ aspects of that functionality. */
+
+#define N 1024
+
+extern unsigned int *__restrict a;
+extern unsigned int *__restrict b;
+extern unsigned int *__restrict c;
+
+void KERNELS ()
+{
+#pragma acc kernels copyin (a[0:N], b[0:N]) copyout (c[0:N]) /* { dg-message "optimized: assigned OpenACC gang loop parallelism" } */
+ for (unsigned int i = 0; i < N; i++)
+ c[i] = a[i] + b[i];
+}
+
+/* Check the offloaded function's attributes.
+ { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels, omp target entrypoint\\)\\)" 1 "ompexp" } } */
+
+/* Check that exactly one OpenACC kernels construct is analyzed, and that it
+ can be parallelized.
+ { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } }
+ { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } }
+ { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */
+
+/* Check the offloaded function's classification and compute dimensions (will
+ always be 1 x 1 x 1 for non-offloading compilation).
+ { dg-final { scan-tree-dump-times "(?n)Function is parallelized OpenACC kernels offload" 1 "oaccloops" } }
+ { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccloops" } }
+ { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "oaccloops" } } */
diff --git a/gcc/testsuite/c-c++-common/goacc/classify-kernels-unparallelized-parloops.c b/gcc/testsuite/c-c++-common/goacc/classify-kernels-unparallelized-parloops.c
new file mode 100644
index 0000000..6522caf
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/goacc/classify-kernels-unparallelized-parloops.c
@@ -0,0 +1,45 @@
+/* Check offloaded function's attributes and classification for unparallelized
+ OpenACC kernels. */
+
+/* { dg-additional-options "--param openacc-kernels=parloops" } */
+
+/* { dg-additional-options "-O2" }
+ { dg-additional-options "-fopt-info-optimized-omp" }
+ { dg-additional-options "-fdump-tree-ompexp" }
+ { dg-additional-options "-fdump-tree-parloops1-all" }
+ { dg-additional-options "-fdump-tree-oaccloops" } */
+
+/* { dg-additional-options "-Wopenacc-parallelism" } for testing/documenting
+ aspects of that functionality. */
+
+#define N 1024
+
+extern unsigned int *__restrict a;
+extern unsigned int *__restrict b;
+extern unsigned int *__restrict c;
+
+/* An "extern"al mapping of loop iterations/array indices makes the loop
+ unparallelizable. */
+extern unsigned int f (unsigned int);
+
+void KERNELS ()
+{
+#pragma acc kernels copyin (a[0:N], b[0:N]) copyout (c[0:N]) /* { dg-message "optimized: assigned OpenACC seq loop parallelism" } */
+ for (unsigned int i = 0; i < N; i++)
+ c[i] = a[f (i)] + b[f (i)];
+}
+
+/* Check the offloaded function's attributes.
+ { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels, omp target entrypoint\\)\\)" 1 "ompexp" } } */
+
+/* Check that exactly one OpenACC kernels construct is analyzed, and that it
+ can't be parallelized.
+ { dg-final { scan-tree-dump-times "FAILED:" 1 "parloops1" } }
+ { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } }
+ { dg-final { scan-tree-dump-not "SUCCESS: may be parallelized" "parloops1" } } */
+
+/* Check the offloaded function's classification and compute dimensions (will
+ always be 1 x 1 x 1 for non-offloading compilation).
+ { dg-final { scan-tree-dump-times "(?n)Function is unparallelized OpenACC kernels offload" 1 "oaccloops" } }
+ { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccloops" } }
+ { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc kernels, omp target entrypoint\\)\\)" 1 "oaccloops" } } */
diff --git a/gcc/testsuite/c-c++-common/goacc/classify-kernels-unparallelized.c b/gcc/testsuite/c-c++-common/goacc/classify-kernels-unparallelized.c
index 1d12658..daa8fcb 100644
--- a/gcc/testsuite/c-c++-common/goacc/classify-kernels-unparallelized.c
+++ b/gcc/testsuite/c-c++-common/goacc/classify-kernels-unparallelized.c
@@ -1,8 +1,10 @@
/* Check offloaded function's attributes and classification for unparallelized
OpenACC kernels. */
+/* { dg-additional-options "--param openacc-kernels=decompose" } */
+
/* { dg-additional-options "-O2" }
- { dg-additional-options "-fopt-info-optimized-omp" }
+ { dg-additional-options "-fopt-info-all-omp" }
{ dg-additional-options "-fdump-tree-ompexp" }
{ dg-additional-options "-fdump-tree-parloops1-all" }
{ dg-additional-options "-fdump-tree-oaccloops" } */
@@ -23,6 +25,7 @@ extern unsigned int f (unsigned int);
void KERNELS ()
{
#pragma acc kernels copyin (a[0:N], b[0:N]) copyout (c[0:N]) /* { dg-message "optimized: assigned OpenACC seq loop parallelism" } */
+ /* { dg-note {beginning 'parloops' part in OpenACC 'kernels' region} {} { target *-*-* } .+1 } */
for (unsigned int i = 0; i < N; i++)
c[i] = a[f (i)] + b[f (i)];
}
diff --git a/gcc/testsuite/c-c++-common/goacc/classify-kernels.c b/gcc/testsuite/c-c++-common/goacc/classify-kernels.c
index bdf7b4a..b54a71e 100644
--- a/gcc/testsuite/c-c++-common/goacc/classify-kernels.c
+++ b/gcc/testsuite/c-c++-common/goacc/classify-kernels.c
@@ -1,8 +1,10 @@
/* Check offloaded function's attributes and classification for OpenACC
kernels. */
+/* { dg-additional-options "--param openacc-kernels=decompose" } */
+
/* { dg-additional-options "-O2" }
- { dg-additional-options "-fopt-info-optimized-omp" }
+ { dg-additional-options "-fopt-info-all-omp" }
{ dg-additional-options "-fdump-tree-ompexp" }
{ dg-additional-options "-fdump-tree-parloops1-all" }
{ dg-additional-options "-fdump-tree-oaccloops" } */
@@ -19,6 +21,7 @@ extern unsigned int *__restrict c;
void KERNELS ()
{
#pragma acc kernels copyin (a[0:N], b[0:N]) copyout (c[0:N]) /* { dg-message "optimized: assigned OpenACC gang loop parallelism" } */
+ /* { dg-note {beginning 'parloops' part in OpenACC 'kernels' region} {} { target *-*-* } .+1 } */
for (unsigned int i = 0; i < N; i++)
c[i] = a[i] + b[i];
}
diff --git a/gcc/testsuite/c-c++-common/goacc/kernels-decompose-2.c b/gcc/testsuite/c-c++-common/goacc/kernels-decompose-2.c
index 4dd55eb..64ce894 100644
--- a/gcc/testsuite/c-c++-common/goacc/kernels-decompose-2.c
+++ b/gcc/testsuite/c-c++-common/goacc/kernels-decompose-2.c
@@ -55,7 +55,7 @@ main ()
;
}
- { /*TODO Instead of using 'for (int i = 0; [...])', move 'int i' outside, to work around for ICE detailed in 'kernels-decompose-ice-1.c'. */
+ {
int i;
#pragma acc kernels /* { dg-line l_compute[incr c_compute] } */
/* { dg-optimized {assigned OpenACC gang loop parallelism} {} { target *-*-* } l_compute$c_compute } */
@@ -64,6 +64,20 @@ main ()
a[i] = 0;
}
+#pragma acc kernels /* { dg-line l_compute[incr c_compute] } */
+ /* { dg-note {beginning 'gang-single' part in OpenACC 'kernels' region} {} { target *-*-* } l_compute$c_compute } */
+ /* { dg-note {variable 'i' declared in block is candidate for adjusting OpenACC privatization level} {} { target *-*-* } l_compute$c_compute } */
+ {
+ int i;
+ }
+
+#pragma acc kernels /* { dg-line l_compute[incr c_compute] } */
+ /* { dg-note {variable 'i' declared in block is candidate for adjusting OpenACC privatization level} {} { target *-*-* } l_compute$c_compute } */
+ /* { dg-optimized {assigned OpenACC gang loop parallelism} {} { target *-*-* } l_compute$c_compute } */
+ /* { dg-note {beginning 'parloops' part in OpenACC 'kernels' region} {} { target *-*-* } .+1 } */
+ for (int i = 0; i < N; i++)
+ a[i] = 0;
+
#pragma acc kernels loop /* { dg-line l_loop_i[incr c_loop_i] } */
/* { dg-note {forwarded loop nest in OpenACC 'kernels' region to 'parloops' for analysis} {} { target *-*-* } l_loop_i$c_loop_i } */
/* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } l_loop_i$c_loop_i } */
diff --git a/gcc/testsuite/c-c++-common/goacc/kernels-decompose-ice-1.c b/gcc/testsuite/c-c++-common/goacc/kernels-decompose-ice-1.c
deleted file mode 100644
index e83b451..0000000
--- a/gcc/testsuite/c-c++-common/goacc/kernels-decompose-ice-1.c
+++ /dev/null
@@ -1,114 +0,0 @@
-/* Test OpenACC 'kernels' construct decomposition. */
-
-/* { dg-additional-options "-fopt-info-omp-all" } */
-
-/* { dg-additional-options "-fchecking --param=openacc-kernels=decompose" } */
-/* { dg-ice "TODO" }
- { dg-prune-output "during GIMPLE pass: omplower" } */
-
-/* { dg-additional-options "--param=openacc-privatization=noisy" } */
-
-/* Reduced from 'kernels-decompose-2.c'.
- (Hopefully) similar instances:
- - 'kernels-decompose-ice-2.c'
- - 'libgomp.oacc-c-c++-common/declare-vla-kernels-decompose-ice-1.c'
- - 'libgomp.oacc-c-c++-common/kernels-decompose-1.c'
-*/
-
-int
-main ()
-{
-#define N 10
-
-#pragma acc kernels
- /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } .-1 } */
- /* { dg-note {beginning 'parloops' part in OpenACC 'kernels' region} {} { target *-*-* } .+1 } */
- for (int i = 0; i < N; i++)
- ;
-
- return 0;
-}
-
-/*
- In 'gimple' we've got:
-
- main ()
- {
- int D.2087;
-
- {
- int a[10];
-
- try
- {
- #pragma omp target oacc_kernels map(tofrom:a [len: 40])
- {
- {
- int i;
-
- i = 0;
- goto <D.2085>;
- [...]
-
- ..., which in 'omp_oacc_kernels_decompose' we turn into:
-
- main ()
- {
- int D.2087;
-
- {
- int a[10];
-
- try
- {
- #pragma omp target oacc_data_kernels map(tofrom:a [len: 40])
- {
- try
- {
- {
- int i;
-
- #pragma omp target oacc_data_kernels map(alloc:i [len: 4])
- {
- try
- {
- {
- #pragma omp target oacc_kernels async(-1) map(force_present:i [len: 4]) map(force_present:a [len: 40])
- {
- i = 0;
- goto <D.2085>;
- [...]
-
- ..., which results in ICE in:
-
- #1 0x0000000000d2247b in lower_omp_target (gsi_p=gsi_p@entry=0x7fffffffbc90, ctx=ctx@entry=0x2c994c0) at [...]/gcc/omp-low.c:11981
- 11981 gcc_assert (offloaded);
- (gdb) list
- 11976 talign = TYPE_ALIGN_UNIT (TREE_TYPE (TREE_TYPE (ovar)));
- 11977 gimplify_assign (x, var, &ilist);
- 11978 }
- 11979 else if (is_gimple_reg (var))
- 11980 {
- 11981 gcc_assert (offloaded);
- 11982 tree avar = create_tmp_var (TREE_TYPE (var));
- 11983 mark_addressable (avar);
- 11984 enum gomp_map_kind map_kind = OMP_CLAUSE_MAP_KIND (c);
- 11985 if (GOMP_MAP_COPY_TO_P (map_kind)
- (gdb) call debug_tree(var)
- <var_decl 0x7ffff7feebd0 i
- type <integer_type 0x7ffff67be5e8 int sizes-gimplified public SI
- size <integer_cst 0x7ffff67a5f18 constant 32>
- unit-size <integer_cst 0x7ffff67a5f30 constant 4>
- align:32 warn_if_not_align:0 symtab:0 alias-set -1 canonical-type 0x7ffff67be5e8 precision:32 min <integer_cst 0x7ffff67a5ed0 -2147483648> max <integer_cst 0x7ffff67a5ee8 2147483647>
- pointer_to_this <pointer_type 0x7ffff67c69d8>>
- used read SI [...]:15:12 size <integer_cst 0x7ffff67a5f18 32> unit-size <integer_cst 0x7ffff67a5f30 4>
- align:32 warn_if_not_align:0 context <function_decl 0x7ffff68eea00 main>>
-
- Just defusing the 'assert' is not sufficient:
-
- libgomp: present clause: !acc_is_present (0x7ffe29cba3ec, 4 (0x4))
-
- TODO Can't the 'omp_oacc_kernels_decompose' transformation be much simpler, such that we avoid the intermediate 'data' if we've got just one compute construct inside it?
- TODO But it's not clear if that'd just resolve one simple instance of the general problem?
-
-*/
diff --git a/gcc/testsuite/c-c++-common/goacc/kernels-decompose-ice-2.c b/gcc/testsuite/c-c++-common/goacc/kernels-decompose-ice-2.c
deleted file mode 100644
index 16af57d..0000000
--- a/gcc/testsuite/c-c++-common/goacc/kernels-decompose-ice-2.c
+++ /dev/null
@@ -1,22 +0,0 @@
-/* Test OpenACC 'kernels' construct decomposition. */
-
-/* { dg-additional-options "-fopt-info-omp-all" } */
-
-/* { dg-additional-options "-fchecking --param=openacc-kernels=decompose" } */
-/* { dg-ice "TODO" }
- { dg-prune-output "during GIMPLE pass: omplower" } */
-
-/* { dg-additional-options "--param=openacc-privatization=noisy" } */
-
-/* Reduced from 'kernels-decompose-ice-1.c'. */
-
-int
-main ()
-{
-#pragma acc kernels
- /* { dg-note {beginning 'gang-single' part in OpenACC 'kernels' region} {} { target *-*-* } .-1 } */
- /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} {} { target *-*-* } .-2 } */
- {
- int i;
- }
-}
diff --git a/gcc/testsuite/c-c++-common/goacc/kernels-decompose-pr100280-1.c b/gcc/testsuite/c-c++-common/goacc/kernels-decompose-pr100280-1.c
new file mode 100644
index 0000000..b497af2
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/goacc/kernels-decompose-pr100280-1.c
@@ -0,0 +1,19 @@
+/* Reduced from 'libgomp.oacc-c-c++-common/kernels-loop-2.c'. */
+
+/* { dg-additional-options "--param openacc-kernels=decompose" } */
+
+/* { dg-additional-options "-fopt-info-all-omp" } */
+
+/* { dg-additional-options "--param=openacc-privatization=noisy" } */
+
+void
+foo (void) /* { dg-line l_f_1 } */
+{
+#pragma acc kernels /* { dg-line l_k_1 } */
+ /* { dg-note {variable 'i' declared in block is candidate for adjusting OpenACC privatization level} {} { target *-*-* } l_k_1 } */
+ /* { dg-optimized {assigned OpenACC seq loop parallelism} {} { target *-*-* } l_k_1 } */
+ /* { dg-bogus {note: beginning 'parloops' part in OpenACC 'kernels' region} {TODO location} { xfail *-*-* } l_f_1 }
+ { dg-note {beginning 'parloops' part in OpenACC 'kernels' region} TODO { xfail *-*-* } .+1 } */
+ for (int i;;)
+ ;
+}
diff --git a/gcc/testsuite/gfortran.dg/goacc/classify-kernels-parloops.f95 b/gcc/testsuite/gfortran.dg/goacc/classify-kernels-parloops.f95
new file mode 100644
index 0000000..b8c2d99
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/goacc/classify-kernels-parloops.f95
@@ -0,0 +1,43 @@
+! Check offloaded function's attributes and classification for OpenACC
+! kernels.
+
+! { dg-additional-options "--param openacc-kernels=parloops" }
+
+! { dg-additional-options "-O2" }
+! { dg-additional-options "-fopt-info-optimized-omp" }
+! { dg-additional-options "-fdump-tree-ompexp" }
+! { dg-additional-options "-fdump-tree-parloops1-all" }
+! { dg-additional-options "-fdump-tree-oaccloops" }
+
+! { dg-additional-options "-Wopenacc-parallelism" } for testing/documenting
+! aspects of that functionality.
+
+program main
+ implicit none
+ integer, parameter :: n = 1024
+ integer, dimension (0:n-1) :: a, b, c
+ integer :: i
+
+ call setup(a, b)
+
+ !$acc kernels copyin (a(0:n-1), b(0:n-1)) copyout (c(0:n-1)) ! { dg-message "optimized: assigned OpenACC gang loop parallelism" }
+ do i = 0, n - 1
+ c(i) = a(i) + b(i)
+ end do
+ !$acc end kernels
+end program main
+
+! Check the offloaded function's attributes.
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels, omp target entrypoint\\)\\)" 1 "ompexp" } }
+
+! Check that exactly one OpenACC kernels construct is analyzed, and that it
+! can be parallelized.
+! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops1" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } }
+! { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } }
+
+! Check the offloaded function's classification and compute dimensions (will
+! always be 1 x 1 x 1 for non-offloading compilation).
+! { dg-final { scan-tree-dump-times "(?n)Function is parallelized OpenACC kernels offload" 1 "oaccloops" } }
+! { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccloops" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "oaccloops" } }
diff --git a/gcc/testsuite/gfortran.dg/goacc/classify-kernels-unparallelized-parloops.f95 b/gcc/testsuite/gfortran.dg/goacc/classify-kernels-unparallelized-parloops.f95
new file mode 100644
index 0000000..3773327
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/goacc/classify-kernels-unparallelized-parloops.f95
@@ -0,0 +1,47 @@
+! Check offloaded function's attributes and classification for unparallelized
+! OpenACC kernels.
+
+! { dg-additional-options "--param openacc-kernels=parloops" }
+
+! { dg-additional-options "-O2" }
+! { dg-additional-options "-fopt-info-optimized-omp" }
+! { dg-additional-options "-fdump-tree-ompexp" }
+! { dg-additional-options "-fdump-tree-parloops1-all" }
+! { dg-additional-options "-fdump-tree-oaccloops" }
+
+! { dg-additional-options "-Wopenacc-parallelism" } for testing/documenting
+! aspects of that functionality.
+
+program main
+ implicit none
+ integer, parameter :: n = 1024
+ integer, dimension (0:n-1) :: a, b, c
+ integer :: i
+
+ ! An "external" mapping of loop iterations/array indices makes the loop
+ ! unparallelizable.
+ integer, external :: f
+
+ call setup(a, b)
+
+ !$acc kernels copyin (a(0:n-1), b(0:n-1)) copyout (c(0:n-1)) ! { dg-message "optimized: assigned OpenACC seq loop parallelism" }
+ do i = 0, n - 1
+ c(i) = a(f (i)) + b(f (i))
+ end do
+ !$acc end kernels
+end program main
+
+! Check the offloaded function's attributes.
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels, omp target entrypoint\\)\\)" 1 "ompexp" } }
+
+! Check that exactly one OpenACC kernels construct is analyzed, and that it
+! can't be parallelized.
+! { dg-final { scan-tree-dump-times "FAILED:" 1 "parloops1" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 1 "parloops1" } }
+! { dg-final { scan-tree-dump-not "SUCCESS: may be parallelized" "parloops1" } }
+
+! Check the offloaded function's classification and compute dimensions (will
+! always be 1 x 1 x 1 for non-offloading compilation).
+! { dg-final { scan-tree-dump-times "(?n)Function is unparallelized OpenACC kernels offload" 1 "oaccloops" } }
+! { dg-final { scan-tree-dump-times "(?n)Compute dimensions \\\[1, 1, 1\\\]" 1 "oaccloops" } }
+! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc function \\(1, 1, 1\\), oacc kernels, omp target entrypoint\\)\\)" 1 "oaccloops" } }
diff --git a/gcc/testsuite/gfortran.dg/goacc/classify-kernels-unparallelized.f95 b/gcc/testsuite/gfortran.dg/goacc/classify-kernels-unparallelized.f95
index 3fb48b3..ee8e289 100644
--- a/gcc/testsuite/gfortran.dg/goacc/classify-kernels-unparallelized.f95
+++ b/gcc/testsuite/gfortran.dg/goacc/classify-kernels-unparallelized.f95
@@ -1,8 +1,10 @@
! Check offloaded function's attributes and classification for unparallelized
! OpenACC kernels.
+! { dg-additional-options "--param openacc-kernels=decompose" }
+
! { dg-additional-options "-O2" }
-! { dg-additional-options "-fopt-info-optimized-omp" }
+! { dg-additional-options "-fopt-info-all-omp" }
! { dg-additional-options "-fdump-tree-ompexp" }
! { dg-additional-options "-fdump-tree-parloops1-all" }
! { dg-additional-options "-fdump-tree-oaccloops" }
@@ -23,6 +25,7 @@ program main
call setup(a, b)
!$acc kernels copyin (a(0:n-1), b(0:n-1)) copyout (c(0:n-1)) ! { dg-message "optimized: assigned OpenACC seq loop parallelism" }
+ ! { dg-note {beginning 'parloops' part in OpenACC 'kernels' region} {} { target *-*-* } .+1 }
do i = 0, n - 1
c(i) = a(f (i)) + b(f (i))
end do
diff --git a/gcc/testsuite/gfortran.dg/goacc/classify-kernels.f95 b/gcc/testsuite/gfortran.dg/goacc/classify-kernels.f95
index 6c8d298..a4bcca0 100644
--- a/gcc/testsuite/gfortran.dg/goacc/classify-kernels.f95
+++ b/gcc/testsuite/gfortran.dg/goacc/classify-kernels.f95
@@ -1,8 +1,10 @@
! Check offloaded function's attributes and classification for OpenACC
! kernels.
+! { dg-additional-options "--param openacc-kernels=decompose" }
+
! { dg-additional-options "-O2" }
-! { dg-additional-options "-fopt-info-optimized-omp" }
+! { dg-additional-options "-fopt-info-all-omp" }
! { dg-additional-options "-fdump-tree-ompexp" }
! { dg-additional-options "-fdump-tree-parloops1-all" }
! { dg-additional-options "-fdump-tree-oaccloops" }
@@ -19,6 +21,7 @@ program main
call setup(a, b)
!$acc kernels copyin (a(0:n-1), b(0:n-1)) copyout (c(0:n-1)) ! { dg-message "optimized: assigned OpenACC gang loop parallelism" }
+ ! { dg-note {beginning 'parloops' part in OpenACC 'kernels' region} {} { target *-*-* } .+1 }
do i = 0, n - 1
c(i) = a(i) + b(i)
end do
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/declare-vla-kernels-decompose-ice-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/declare-vla-kernels-decompose-ice-1.c
index a6eb82b..3e5b6ba 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/declare-vla-kernels-decompose-ice-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/declare-vla-kernels-decompose-ice-1.c
@@ -1,5 +1,5 @@
/* { dg-additional-options "--param=openacc-kernels=decompose" } */
-/* Hopefully, this is the same issue as '../../../gcc/testsuite/c-c++-common/goacc/kernels-decompose-ice-1.c'.
+/* ICE similar to PR100280, but not the same.
{ dg-ice "TODO" }
TODO { dg-prune-output "during GIMPLE pass: omplower" }
TODO { dg-do link } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/f-asyncwait-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/f-asyncwait-1.c
index e4e5815..f7ccecb 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/f-asyncwait-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/f-asyncwait-1.c
@@ -3,7 +3,7 @@
/* Based on '../libgomp.oacc-fortran/asyncwait-1.f90'. */
/* { dg-additional-options "--param=openacc-kernels=decompose" } */
-/* TODO To avoid PR100280 ICE { dg-additional-options "--param=openacc-kernels=parloops" } */
+/* { dg-xfail-run-if TODO { openacc_radeon_accel_selected } } */
/* { dg-additional-options "-fopt-info-all-omp" }
{ dg-additional-options "-foffload=-fopt-info-all-omp" } */
@@ -202,11 +202,12 @@ main (void)
#pragma acc data copy (a[0:N]) copy (b[0:N])
{
-#pragma acc kernels async /* { dg-line l_compute[incr c_compute] } */
- /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_compute$c_compute } */
- /* { dg-optimized "assigned OpenACC seq loop parallelism" "" { target *-*-* } l_compute$c_compute } */
+#pragma acc kernels async
#pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */
+ /* { dg-note {forwarded loop nest in OpenACC 'kernels' region to 'parloops' for analysis} {} { target *-*-* } l_loop_i$c_loop_i } */
+ /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop_i$c_loop_i } */
/* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop_i$c_loop_i } */
+ /* { dg-optimized "assigned OpenACC seq loop parallelism" "" { target *-*-* } l_loop_i$c_loop_i } */
for (int i = 0; i < N; ++i)
b[i] = a[i];
@@ -229,11 +230,12 @@ main (void)
#pragma acc data copy (a[0:N]) copy (b[0:N])
{
-#pragma acc kernels async (1) /* { dg-line l_compute[incr c_compute] } */
- /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_compute$c_compute } */
- /* { dg-optimized "assigned OpenACC seq loop parallelism" "" { target *-*-* } l_compute$c_compute } */
+#pragma acc kernels async (1)
#pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */
+ /* { dg-note {forwarded loop nest in OpenACC 'kernels' region to 'parloops' for analysis} {} { target *-*-* } l_loop_i$c_loop_i } */
+ /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop_i$c_loop_i } */
/* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop_i$c_loop_i } */
+ /* { dg-optimized "assigned OpenACC seq loop parallelism" "" { target *-*-* } l_loop_i$c_loop_i } */
for (int i = 0; i < N; ++i)
b[i] = a[i];
@@ -259,24 +261,27 @@ main (void)
#pragma acc data copy (a[0:N]) copy (b[0:N]) copy (c[0:N]) copy (d[0:N])
{
#pragma acc kernels async (1) /* { dg-line l_compute[incr c_compute] } */
- /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_compute$c_compute } */
+ /* { dg-note {variable 'i' declared in block is candidate for adjusting OpenACC privatization level} "" { target *-*-* } l_compute$c_compute } */
/* { dg-optimized "assigned OpenACC seq loop parallelism" "" { target { ! __OPTIMIZE__ } } l_compute$c_compute }
{ dg-optimized "assigned OpenACC gang loop parallelism" "" { target { __OPTIMIZE__ } } l_compute$c_compute } */
+ /* { dg-note {beginning 'parloops' part in OpenACC 'kernels' region} {} { target *-*-* } .+1 } */
for (int i = 0; i < N; ++i)
b[i] = (a[i] * a[i] * a[i]) / a[i];
#pragma acc kernels async (1) /* { dg-line l_compute[incr c_compute] } */
- /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_compute$c_compute } */
+ /* { dg-note {variable 'i' declared in block is candidate for adjusting OpenACC privatization level} "" { target *-*-* } l_compute$c_compute } */
/* { dg-optimized "assigned OpenACC seq loop parallelism" "" { target { ! __OPTIMIZE__ } } l_compute$c_compute }
{ dg-optimized "assigned OpenACC gang loop parallelism" "" { target { __OPTIMIZE__ } } l_compute$c_compute } */
+ /* { dg-note {beginning 'parloops' part in OpenACC 'kernels' region} {} { target *-*-* } .+1 } */
for (int i = 0; i < N; ++i)
c[i] = (a[i] * 4) / a[i];
-#pragma acc kernels async (1) /* { dg-line l_compute[incr c_compute] } */
- /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_compute$c_compute } */
- /* { dg-optimized "assigned OpenACC seq loop parallelism" "" { target *-*-* } l_compute$c_compute } */
+#pragma acc kernels async (1)
#pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */
+ /* { dg-note {forwarded loop nest in OpenACC 'kernels' region to 'parloops' for analysis} {} { target *-*-* } l_loop_i$c_loop_i } */
+ /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop_i$c_loop_i } */
/* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop_i$c_loop_i } */
+ /* { dg-optimized "assigned OpenACC seq loop parallelism" "" { target *-*-* } l_loop_i$c_loop_i } */
for (int i = 0; i < N; ++i)
d[i] = ((a[i] * a[i] + a[i]) / a[i]) - a[i];
@@ -307,33 +312,37 @@ main (void)
#pragma acc data copy (a[0:N], b[0:N], c[0:N], d[0:N], e[0:N])
{
#pragma acc kernels async (1) /* { dg-line l_compute[incr c_compute] } */
- /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_compute$c_compute } */
+ /* { dg-note {variable 'i' declared in block is candidate for adjusting OpenACC privatization level} "" { target *-*-* } l_compute$c_compute } */
/* { dg-optimized "assigned OpenACC seq loop parallelism" "" { target { ! __OPTIMIZE__ } } l_compute$c_compute }
{ dg-optimized "assigned OpenACC gang loop parallelism" "" { target { __OPTIMIZE__ } } l_compute$c_compute } */
+ /* { dg-note {beginning 'parloops' part in OpenACC 'kernels' region} {} { target *-*-* } .+1 } */
for (int i = 0; i < N; ++i)
b[i] = (a[i] * a[i] * a[i]) / a[i];
-#pragma acc kernels async (1) /* { dg-line l_compute[incr c_compute] } */
- /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_compute$c_compute } */
- /* { dg-optimized "assigned OpenACC seq loop parallelism" "" { target *-*-* } l_compute$c_compute } */
+#pragma acc kernels async (1)
#pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */
+ /* { dg-note {forwarded loop nest in OpenACC 'kernels' region to 'parloops' for analysis} {} { target *-*-* } l_loop_i$c_loop_i } */
+ /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop_i$c_loop_i } */
/* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop_i$c_loop_i } */
+ /* { dg-optimized "assigned OpenACC seq loop parallelism" "" { target *-*-* } l_loop_i$c_loop_i } */
for (int i = 0; i < N; ++i)
c[i] = (a[i] * 4) / a[i];
-#pragma acc kernels async (1) /* { dg-line l_compute[incr c_compute] } */
- /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_compute$c_compute } */
- /* { dg-optimized "assigned OpenACC seq loop parallelism" "" { target *-*-* } l_compute$c_compute } */
+#pragma acc kernels async (1)
#pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */
+ /* { dg-note {forwarded loop nest in OpenACC 'kernels' region to 'parloops' for analysis} {} { target *-*-* } l_loop_i$c_loop_i } */
+ /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop_i$c_loop_i } */
/* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop_i$c_loop_i } */
+ /* { dg-optimized "assigned OpenACC seq loop parallelism" "" { target *-*-* } l_loop_i$c_loop_i } */
for (int i = 0; i < N; ++i)
d[i] = ((a[i] * a[i] + a[i]) / a[i]) - a[i];
-#pragma acc kernels wait (1) async (1) /* { dg-line l_compute[incr c_compute] } */
- /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_compute$c_compute } */
- /* { dg-optimized "assigned OpenACC seq loop parallelism" "" { target *-*-* } l_compute$c_compute } */
+#pragma acc kernels wait (1) async (1)
#pragma acc loop /* { dg-line l_loop_i[incr c_loop_i] } */
+ /* { dg-note {forwarded loop nest in OpenACC 'kernels' region to 'parloops' for analysis} {} { target *-*-* } l_loop_i$c_loop_i } */
+ /* { dg-note {variable 'i' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop_i$c_loop_i } */
/* { dg-note {variable 'i' in 'private' clause isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_loop_i$c_loop_i } */
+ /* { dg-optimized "assigned OpenACC seq loop parallelism" "" { target *-*-* } l_loop_i$c_loop_i } */
for (int i = 0; i < N; ++i)
e[i] = a[i] + b[i] + c[i] + d[i];
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-decompose-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-decompose-1.c
index b3b4c49..57e75f6 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-decompose-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-decompose-1.c
@@ -32,11 +32,7 @@ int main()
{
/* { dg-note {beginning 'gang-single' part in OpenACC 'kernels' region} {} { target *-*-* } .+1 } */
int c = 234;
- /* { dg-note {variable 'c' declared in block is candidate for adjusting OpenACC privatization level} "" { target *-*-* } l_compute$c_compute }
- { dg-note {variable 'c\.[0-9]+' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } l_compute$c_compute } */
-
- /*TODO Hopefully, this is the same issue as '../../../gcc/testsuite/c-c++-common/goacc/kernels-decompose-ice-1.c'. */
- (volatile int *) &c;
+ /* { dg-note {variable 'c' declared in block is candidate for adjusting OpenACC privatization level} "" { target *-*-* } l_compute$c_compute } */
#pragma acc loop independent gang /* { dg-line l_loop_i[incr c_loop_i] } */
/* { dg-note {parallelized loop nest in OpenACC 'kernels' region} {} { target *-*-* } l_loop_i$c_loop_i } */