aboutsummaryrefslogtreecommitdiff
path: root/libgomp/testsuite
diff options
context:
space:
mode:
authorThomas Schwinge <thomas@codesourcery.com>2016-03-30 17:08:47 +0200
committerThomas Schwinge <tschwinge@gcc.gnu.org>2016-03-30 17:08:47 +0200
commit2620c80db02d5e32ffb5b54b80be67fcc7843d20 (patch)
tree644cb73427c8a0d16b846a71d030b324e252a3af /libgomp/testsuite
parentba9c755f257ad36eaa7335c48008c76c27c0b30c (diff)
downloadgcc-2620c80db02d5e32ffb5b54b80be67fcc7843d20.zip
gcc-2620c80db02d5e32ffb5b54b80be67fcc7843d20.tar.gz
gcc-2620c80db02d5e32ffb5b54b80be67fcc7843d20.tar.bz2
Update OpenACC test cases
gcc/testsuite/ * c-c++-common/goacc/combined-directives.c: Clean up dg-* directives. * c-c++-common/goacc/loop-clauses.c: Likewise. * g++.dg/goacc/template.C: Likewise. * gfortran.dg/goacc/combined-directives.f90: Likewise. * gfortran.dg/goacc/loop-1.f95: Likewise. * gfortran.dg/goacc/loop-5.f95: Likewise. * gfortran.dg/goacc/loop-6.f95: Likewise. * gfortran.dg/goacc/loop-tree-1.f90: Likewise. * c-c++-common/goacc-gomp/nesting-1.c: Update. * c-c++-common/goacc-gomp/nesting-fail-1.c: Likewise. * c-c++-common/goacc/clauses-fail.c: Likewise. * c-c++-common/goacc/parallel-1.c: Likewise. * c-c++-common/goacc/reduction-1.c: Likewise. * c-c++-common/goacc/reduction-2.c: Likewise. * c-c++-common/goacc/reduction-3.c: Likewise. * c-c++-common/goacc/reduction-4.c: Likewise. * c-c++-common/goacc/routine-3.c: Likewise. * c-c++-common/goacc/routine-4.c: Likewise. * c-c++-common/goacc/routine-5.c: Likewise. * c-c++-common/goacc/tile.c: Likewise. * g++.dg/goacc/template.C: Likewise. * gfortran.dg/goacc/combined-directives.f90: Likewise. * c-c++-common/goacc/nesting-1.c: Move dg-error test cases into... * c-c++-common/goacc/nesting-fail-1.c: ... this file. Update. * c-c++-common/goacc/kernels-1.c: Update. Incorporate... * c-c++-common/goacc/kernels-empty.c: ... this file, and... * c-c++-common/goacc/kernels-eternal.c: ... this file, and... * c-c++-common/goacc/kernels-noreturn.c: ... this file. * c-c++-common/goacc/host_data-1.c: New file. Incorporate... * c-c++-common/goacc/use_device-1.c: ... this file. * c-c++-common/goacc/host_data-2.c: New file. Incorporate... * c-c++-common/goacc/host_data-5.c: ... this file, and... * c-c++-common/goacc/host_data-6.c: ... this file. * c-c++-common/goacc/loop-2-kernels.c: New file. * c-c++-common/goacc/loop-2-parallel.c: Likewise. * c-c++-common/goacc/loop-3.c: Likewise. * g++.dg/goacc/reference.C: Likewise. * g++.dg/goacc/routine-1.C: Likewise. * g++.dg/goacc/routine-2.C: Likewise. libgomp/ * testsuite/libgomp.oacc-c-c++-common/clauses-1.c: Update. * testsuite/libgomp.oacc-c-c++-common/deviceptr-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/if-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/vector-loop.c: Likewise. * testsuite/libgomp.oacc-fortran/asyncwait-1.f90: Likewise. * testsuite/libgomp.oacc-fortran/asyncwait-2.f90: Likewise. * testsuite/libgomp.oacc-fortran/asyncwait-3.f90: Likewise. * testsuite/libgomp.oacc-fortran/declare-1.f90: Likewise. * testsuite/libgomp.oacc-c-c++-common/asyncwait-1.c: Likewise. XFAIL. * testsuite/libgomp.oacc-c-c++-common/firstprivate-1.c: Update. Incorporate... * testsuite/libgomp.oacc-c-c++-common/firstprivate-2.c: ... this file. * testsuite/libgomp.oacc-c++/template-reduction.C: New file. * testsuite/libgomp.oacc-c-c++-common/gang-static-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/gang-static-2.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/kernels-loop-clauses.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/private-variables.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/reduction-7.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/routine-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/routine-4.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c: Likewise. * testsuite/libgomp.oacc-fortran/clauses-1.f90: Likewise. * testsuite/libgomp.oacc-fortran/default-1.f90: Likewise. * testsuite/libgomp.oacc-fortran/firstprivate-1.f90: Likewise. * testsuite/libgomp.oacc-fortran/gang-static-1.f90: Likewise. * testsuite/libgomp.oacc-fortran/if-1.f90: Likewise. * testsuite/libgomp.oacc-fortran/implicit-firstprivate-ref.f90: Likewise. * testsuite/libgomp.oacc-fortran/pr68813.f90: Likewise. * testsuite/libgomp.oacc-fortran/private-variables.f90: Likewise. * testsuite/libgomp.oacc-c-c++-common/kernels-1.c: Merge this file... * testsuite/libgomp.oacc-c-c++-common/parallel-1.c: ..., and this file into... * testsuite/libgomp.oacc-c-c++-common/data-clauses.h: ... this new file. Update. * testsuite/libgomp.oacc-c-c++-common/data-clauses-kernels.c: New file. * testsuite/libgomp.oacc-c-c++-common/data-clauses-parallel.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/kernels-2.c: Rename to... * testsuite/libgomp.oacc-c-c++-common/data-clauses-kernels-ipa-pta.c: ... this new file. Update. * testsuite/libgomp.oacc-c-c++-common/parallel-2.c: Rename to... * testsuite/libgomp.oacc-c-c++-common/data-clauses-parallel-ipa-pta.c: ... this new file. Update. * testsuite/libgomp.oacc-c-c++-common/mode-transitions.c: New file. Incorporate... * testsuite/libgomp.oacc-c-c++-common/worker-single-1a.c: ... this file, and... * testsuite/libgomp.oacc-c-c++-common/worker-single-4.c: ... this file, and... * testsuite/libgomp.oacc-c-c++-common/worker-single-6.c: ... this file. * testsuite/libgomp.oacc-c-c++-common/update-1-2.c: Remove file. Co-Authored-By: Cesar Philippidis <cesar@codesourcery.com> Co-Authored-By: Chung-Lin Tang <cltang@codesourcery.com> Co-Authored-By: James Norris <jnorris@codesourcery.com> Co-Authored-By: Julian Brown <julian@codesourcery.com> Co-Authored-By: Nathan Sidwell <nathan@codesourcery.com> Co-Authored-By: Tom de Vries <tom@codesourcery.com> From-SVN: r234575
Diffstat (limited to 'libgomp/testsuite')
-rw-r--r--libgomp/testsuite/libgomp.oacc-c++/template-reduction.C98
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/asyncwait-1.c434
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/clauses-1.c26
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/data-clauses-kernels-ipa-pta.c (renamed from libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-2.c)2
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/data-clauses-kernels.c2
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/data-clauses-parallel-ipa-pta.c (renamed from libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-2.c)2
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/data-clauses-parallel.c2
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/data-clauses.h (renamed from libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-1.c)92
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/deviceptr-1.c23
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/firstprivate-1.c114
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/firstprivate-2.c31
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/gang-static-1.c48
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/gang-static-2.c100
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/if-1.c354
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-1.c184
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-clauses.c62
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/mode-transitions.c895
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/private-variables.c953
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-7.c129
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/routine-1.c88
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/routine-4.c123
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c76
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/update-1-2.c361
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/vector-loop.c2
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/worker-single-1a.c28
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/worker-single-4.c28
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/worker-single-6.c46
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/asyncwait-1.f90122
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/asyncwait-2.f9029
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/asyncwait-3.f9031
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/clauses-1.f90290
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/declare-1.f9041
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/default-1.f9054
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/firstprivate-1.f9042
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/gang-static-1.f9079
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/if-1.f90886
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/implicit-firstprivate-ref.f9042
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/pr68813.f9019
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/private-variables.f90544
39 files changed, 5709 insertions, 773 deletions
diff --git a/libgomp/testsuite/libgomp.oacc-c++/template-reduction.C b/libgomp/testsuite/libgomp.oacc-c++/template-reduction.C
new file mode 100644
index 0000000..fb5924c
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c++/template-reduction.C
@@ -0,0 +1,98 @@
+const int n = 100;
+
+// Check explicit template copy map
+
+template<typename T> T
+sum (T array[])
+{
+ T s = 0;
+
+#pragma acc parallel loop num_gangs (10) gang reduction (+:s) copy (s, array[0:n])
+ for (int i = 0; i < n; i++)
+ s += array[i];
+
+ return s;
+}
+
+// Check implicit template copy map
+
+template<typename T> T
+sum ()
+{
+ T s = 0;
+ T array[n];
+
+ for (int i = 0; i < n; i++)
+ array[i] = i+1;
+
+#pragma acc parallel loop num_gangs (10) gang reduction (+:s) copy (s)
+ for (int i = 0; i < n; i++)
+ s += array[i];
+
+ return s;
+}
+
+// Check present and async
+
+template<typename T> T
+async_sum (T array[])
+{
+ T s = 0;
+
+#pragma acc parallel loop num_gangs (10) gang async (1) present (array[0:n])
+ for (int i = 0; i < n; i++)
+ array[i] = i+1;
+
+#pragma acc parallel loop num_gangs (10) gang reduction (+:s) present (array[0:n]) copy (s) async wait (1)
+ for (int i = 0; i < n; i++)
+ s += array[i];
+
+#pragma acc wait
+
+ return s;
+}
+
+// Check present and async and an explicit firstprivate
+
+template<typename T> T
+async_sum (int c)
+{
+ T s = 0;
+
+#pragma acc parallel loop num_gangs (10) gang reduction (+:s) copy(s) firstprivate (c) async wait (1)
+ for (int i = 0; i < n; i++)
+ s += i+c;
+
+#pragma acc wait
+
+ return s;
+}
+
+int
+main()
+{
+ int a[n];
+ int result = 0;
+
+ for (int i = 0; i < n; i++)
+ {
+ a[i] = i+1;
+ result += i+1;
+ }
+
+ if (sum (a) != result)
+ __builtin_abort ();
+
+ if (sum<int> () != result)
+ __builtin_abort ();
+
+#pragma acc enter data copyin (a)
+ if (async_sum (a) != result)
+ __builtin_abort ();
+
+ if (async_sum<int> (1) != result)
+ __builtin_abort ();
+#pragma acc exit data delete (a)
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/asyncwait-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/asyncwait-1.c
index 22cef6d..f3b490a 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/asyncwait-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/asyncwait-1.c
@@ -1,4 +1,6 @@
/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* <http://news.gmane.org/find-root.php?message_id=%3C87pp0aaksc.fsf%40kepler.schwinge.homeip.net%3E>.
+ { dg-xfail-run-if "TODO" { *-*-* } } */
/* { dg-additional-options "-lcuda" } */
#include <openacc.h>
@@ -460,6 +462,438 @@ main (int argc, char **argv)
abort ();
}
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 3.0;
+ b[i] = 0.0;
+ }
+
+#pragma acc data copy (a[0:N]) copy (b[0:N]) copyin (N)
+ {
+
+#pragma acc kernels async
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+
+#pragma acc wait
+
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 3.0)
+ abort ();
+
+ if (b[i] != 3.0)
+ abort ();
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 2.0;
+ b[i] = 0.0;
+ }
+
+#pragma acc data copy (a[0:N]) copy (b[0:N]) copyin (N)
+ {
+
+#pragma acc kernels async (1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+
+#pragma acc wait (1)
+
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 2.0)
+ abort ();
+
+ if (b[i] != 2.0)
+ abort ();
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 3.0;
+ b[i] = 0.0;
+ c[i] = 0.0;
+ d[i] = 0.0;
+ }
+
+#pragma acc data copy (a[0:N]) copy (b[0:N]) copy (c[0:N]) copy (d[0:N]) copyin (N)
+ {
+
+#pragma acc kernels async (1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
+ }
+
+#pragma acc kernels async (1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
+ }
+
+
+#pragma acc kernels async (1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
+ }
+
+#pragma acc wait (1)
+
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 3.0)
+ abort ();
+
+ if (b[i] != 9.0)
+ abort ();
+
+ if (c[i] != 4.0)
+ abort ();
+
+ if (d[i] != 1.0)
+ abort ();
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 2.0;
+ b[i] = 0.0;
+ c[i] = 0.0;
+ d[i] = 0.0;
+ e[i] = 0.0;
+ }
+
+#pragma acc data copy (a[0:N], b[0:N], c[0:N], d[0:N], e[0:N]) copyin (N)
+ {
+
+#pragma acc kernels async (1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
+ }
+
+#pragma acc kernels async (1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
+ }
+
+#pragma acc kernels async (1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
+ }
+
+#pragma acc kernels wait (1) async (1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ e[ii] = a[ii] + b[ii] + c[ii] + d[ii];
+ }
+
+#pragma acc wait (1)
+
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 2.0)
+ abort ();
+
+ if (b[i] != 4.0)
+ abort ();
+
+ if (c[i] != 4.0)
+ abort ();
+
+ if (d[i] != 1.0)
+ abort ();
+
+ if (e[i] != 11.0)
+ abort ();
+ }
+
+
+ r = cuStreamCreate (&stream1, CU_STREAM_NON_BLOCKING);
+ if (r != CUDA_SUCCESS)
+ {
+ fprintf (stderr, "cuStreamCreate failed: %d\n", r);
+ abort ();
+ }
+
+ acc_set_cuda_stream (1, stream1);
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 5.0;
+ b[i] = 0.0;
+ }
+
+#pragma acc data copy (a[0:N], b[0:N]) copyin (N)
+ {
+
+#pragma acc kernels async (1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+
+#pragma acc wait (1)
+
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 5.0)
+ abort ();
+
+ if (b[i] != 5.0)
+ abort ();
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 7.0;
+ b[i] = 0.0;
+ c[i] = 0.0;
+ d[i] = 0.0;
+ }
+
+#pragma acc data copy (a[0:N]) copy (b[0:N]) copy (c[0:N]) copy (d[0:N]) copyin (N)
+ {
+
+#pragma acc kernels async (1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
+ }
+
+#pragma acc kernels async (1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
+ }
+
+#pragma acc kernels async (1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
+ }
+
+#pragma acc wait (1)
+
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 7.0)
+ abort ();
+
+ if (b[i] != 49.0)
+ abort ();
+
+ if (c[i] != 4.0)
+ abort ();
+
+ if (d[i] != 1.0)
+ abort ();
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 3.0;
+ b[i] = 0.0;
+ c[i] = 0.0;
+ d[i] = 0.0;
+ e[i] = 0.0;
+ }
+
+#pragma acc data copy (a[0:N], b[0:N], c[0:N], d[0:N], e[0:N]) copyin (N)
+ {
+
+#pragma acc kernels async (1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
+ }
+
+#pragma acc kernels async (1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
+ }
+
+#pragma acc kernels async (1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ d[ii] = ((a[ii] * a[ii] + a[ii]) / a[ii]) - a[ii];
+ }
+
+#pragma acc kernels wait (1) async (1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ e[ii] = a[ii] + b[ii] + c[ii] + d[ii];
+ }
+
+#pragma acc wait (1)
+
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 3.0)
+ abort ();
+
+ if (b[i] != 9.0)
+ abort ();
+
+ if (c[i] != 4.0)
+ abort ();
+
+ if (d[i] != 1.0)
+ abort ();
+
+ if (e[i] != 17.0)
+ abort ();
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 4.0;
+ b[i] = 0.0;
+ c[i] = 0.0;
+ d[i] = 0.0;
+ e[i] = 0.0;
+ }
+
+#pragma acc data copyin (a[0:N], b[0:N], c[0:N]) copyin (N)
+ {
+
+#pragma acc kernels async (1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
+ }
+
+#pragma acc kernels async (1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
+ }
+
+#pragma acc update host (a[0:N], b[0:N], c[0:N]) wait (1)
+
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 4.0)
+ abort ();
+
+ if (b[i] != 16.0)
+ abort ();
+
+ if (c[i] != 4.0)
+ abort ();
+ }
+
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 5.0;
+ b[i] = 0.0;
+ c[i] = 0.0;
+ d[i] = 0.0;
+ e[i] = 0.0;
+ }
+
+#pragma acc data copyin (a[0:N], b[0:N], c[0:N]) copyin (N)
+ {
+
+#pragma acc kernels async (1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = (a[ii] * a[ii] * a[ii]) / a[ii];
+ }
+
+#pragma acc kernels async (1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ c[ii] = (a[ii] + a[ii] + a[ii] + a[ii]) / a[ii];
+ }
+
+#pragma acc update host (a[0:N], b[0:N], c[0:N]) async (1)
+
+#pragma acc wait (1)
+
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 5.0)
+ abort ();
+
+ if (b[i] != 25.0)
+ abort ();
+
+ if (c[i] != 4.0)
+ abort ();
+ }
+
acc_shutdown (acc_device_nvidia);
return 0;
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/clauses-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/clauses-1.c
index 51c0cf5..410c46c 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/clauses-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/clauses-1.c
@@ -586,6 +586,32 @@ main (int argc, char **argv)
for (i = 0; i < N; i++)
{
+ a[i] = 6.0;
+ b[i] = 0.0;
+ }
+
+#pragma acc parallel pcopy (a[0:N], b[0:N])
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ b[ii] = a[ii];
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != 6.0)
+ abort ();
+ }
+
+ if (acc_is_present (&a[0], (N * sizeof (float))))
+ abort ();
+
+ if (acc_is_present (&b[0], (N * sizeof (float))))
+ abort ();
+
+ for (i = 0; i < N; i++)
+ {
a[i] = 5.0;
b[i] = 7.0;
}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/data-clauses-kernels-ipa-pta.c
index d9fff6f..2cd98bd 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-2.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/data-clauses-kernels-ipa-pta.c
@@ -1,4 +1,4 @@
/* { dg-do run { target lto } } */
/* { dg-additional-options "-fipa-pta -flto -flto-partition=max" } */
-#include "parallel-1.c"
+#include "data-clauses-kernels.c"
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/data-clauses-kernels.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/data-clauses-kernels.c
new file mode 100644
index 0000000..f7f2d1c
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/data-clauses-kernels.c
@@ -0,0 +1,2 @@
+#define CONSTRUCT kernels
+#include "data-clauses.h"
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/data-clauses-parallel-ipa-pta.c
index f76c926..ddcf4e3 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-2.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/data-clauses-parallel-ipa-pta.c
@@ -1,4 +1,4 @@
/* { dg-do run { target lto } } */
/* { dg-additional-options "-fipa-pta -flto -flto-partition=max" } */
-#include "kernels-1.c"
+#include "data-clauses-parallel.c"
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/data-clauses-parallel.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/data-clauses-parallel.c
new file mode 100644
index 0000000..e734b2f
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/data-clauses-parallel.c
@@ -0,0 +1,2 @@
+#define CONSTRUCT parallel
+#include "data-clauses.h"
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/data-clauses.h
index fd9df33..d557bef 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/data-clauses.h
@@ -1,7 +1,3 @@
-/* { dg-do run } */
-
-#include <stdlib.h>
-
int i;
int main(void)
@@ -11,145 +7,145 @@ int main(void)
i = -1;
j = -2;
v = 0;
-#pragma acc parallel /* copyout */ present_or_copyout (v) copyin (i, j)
+#pragma acc CONSTRUCT /* copyout */ present_or_copyout (v) copyin (i, j)
{
if (i != -1 || j != -2)
- abort ();
+ __builtin_abort ();
i = 2;
j = 1;
if (i != 2 || j != 1)
- abort ();
+ __builtin_abort ();
v = 1;
}
#if ACC_MEM_SHARED
if (v != 1 || i != 2 || j != 1)
- abort ();
+ __builtin_abort ();
#else
if (v != 1 || i != -1 || j != -2)
- abort ();
+ __builtin_abort ();
#endif
i = -1;
j = -2;
v = 0;
-#pragma acc parallel /* copyout */ present_or_copyout (v) copyout (i, j)
+#pragma acc CONSTRUCT /* copyout */ present_or_copyout (v) copyout (i, j)
{
i = 2;
j = 1;
if (i != 2 || j != 1)
- abort ();
+ __builtin_abort ();
v = 1;
}
if (v != 1 || i != 2 || j != 1)
- abort ();
+ __builtin_abort ();
i = -1;
j = -2;
v = 0;
-#pragma acc parallel /* copyout */ present_or_copyout (v) copy (i, j)
+#pragma acc CONSTRUCT /* copyout */ present_or_copyout (v) copy (i, j)
{
if (i != -1 || j != -2)
- abort ();
+ __builtin_abort ();
i = 2;
j = 1;
if (i != 2 || j != 1)
- abort ();
+ __builtin_abort ();
v = 1;
}
if (v != 1 || i != 2 || j != 1)
- abort ();
+ __builtin_abort ();
i = -1;
j = -2;
v = 0;
-#pragma acc parallel /* copyout */ present_or_copyout (v) create (i, j)
+#pragma acc CONSTRUCT /* copyout */ present_or_copyout (v) create (i, j)
{
i = 2;
j = 1;
if (i != 2 || j != 1)
- abort ();
+ __builtin_abort ();
v = 1;
}
#if ACC_MEM_SHARED
if (v != 1 || i != 2 || j != 1)
- abort ();
+ __builtin_abort ();
#else
if (v != 1 || i != -1 || j != -2)
- abort ();
+ __builtin_abort ();
#endif
i = -1;
j = -2;
v = 0;
-#pragma acc parallel /* copyout */ present_or_copyout (v) present_or_copyin (i, j)
+#pragma acc CONSTRUCT /* copyout */ present_or_copyout (v) present_or_copyin (i, j)
{
if (i != -1 || j != -2)
- abort ();
+ __builtin_abort ();
i = 2;
j = 1;
if (i != 2 || j != 1)
- abort ();
+ __builtin_abort ();
v = 1;
}
if (v != 1)
- abort ();
+ __builtin_abort ();
#if ACC_MEM_SHARED
if (v != 1 || i != 2 || j != 1)
- abort ();
+ __builtin_abort ();
#else
if (v != 1 || i != -1 || j != -2)
- abort ();
+ __builtin_abort ();
#endif
i = -1;
j = -2;
v = 0;
-#pragma acc parallel /* copyout */ present_or_copyout (v) present_or_copyout (i, j)
+#pragma acc CONSTRUCT /* copyout */ present_or_copyout (v) present_or_copyout (i, j)
{
i = 2;
j = 1;
if (i != 2 || j != 1)
- abort ();
+ __builtin_abort ();
v = 1;
}
if (v != 1 || i != 2 || j != 1)
- abort ();
+ __builtin_abort ();
i = -1;
j = -2;
v = 0;
-#pragma acc parallel /* copyout */ present_or_copyout (v) present_or_copy (i, j)
+#pragma acc CONSTRUCT /* copyout */ present_or_copyout (v) present_or_copy (i, j)
{
if (i != -1 || j != -2)
- abort ();
+ __builtin_abort ();
i = 2;
j = 1;
if (i != 2 || j != 1)
- abort ();
+ __builtin_abort ();
v = 1;
}
if (v != 1 || i != 2 || j != 1)
- abort ();
+ __builtin_abort ();
i = -1;
j = -2;
v = 0;
-#pragma acc parallel /* copyout */ present_or_copyout (v) present_or_create (i, j)
+#pragma acc CONSTRUCT /* copyout */ present_or_copyout (v) present_or_create (i, j)
{
i = 2;
j = 1;
if (i != 2 || j != 1)
- abort ();
+ __builtin_abort ();
v = 1;
}
if (v != 1)
- abort ();
+ __builtin_abort ();
#if ACC_MEM_SHARED
if (v != 1 || i != 2 || j != 1)
- abort ();
+ __builtin_abort ();
#else
if (v != 1 || i != -1 || j != -2)
- abort ();
+ __builtin_abort ();
#endif
i = -1;
@@ -158,23 +154,23 @@ int main(void)
#pragma acc data copyin (i, j)
{
-#pragma acc parallel /* copyout */ present_or_copyout (v) present (i, j)
+#pragma acc CONSTRUCT /* copyout */ present_or_copyout (v) present (i, j)
{
if (i != -1 || j != -2)
- abort ();
+ __builtin_abort ();
i = 2;
j = 1;
if (i != 2 || j != 1)
- abort ();
+ __builtin_abort ();
v = 1;
}
}
#if ACC_MEM_SHARED
if (v != 1 || i != 2 || j != 1)
- abort ();
+ __builtin_abort ();
#else
if (v != 1 || i != -1 || j != -2)
- abort ();
+ __builtin_abort ();
#endif
i = -1;
@@ -183,23 +179,23 @@ int main(void)
#pragma acc data copyin(i, j)
{
-#pragma acc parallel /* copyout */ present_or_copyout (v)
+#pragma acc CONSTRUCT /* copyout */ present_or_copyout (v)
{
if (i != -1 || j != -2)
- abort ();
+ __builtin_abort ();
i = 2;
j = 1;
if (i != 2 || j != 1)
- abort ();
+ __builtin_abort ();
v = 1;
}
}
#if ACC_MEM_SHARED
if (v != 1 || i != 2 || j != 1)
- abort ();
+ __builtin_abort ();
#else
if (v != 1 || i != -1 || j != -2)
- abort ();
+ __builtin_abort ();
#endif
return 0;
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/deviceptr-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/deviceptr-1.c
index e271a37..8247e7b 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/deviceptr-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/deviceptr-1.c
@@ -1,5 +1,3 @@
-/* { dg-do run } */
-
#include <stdlib.h>
int main (void)
@@ -28,5 +26,26 @@ int main (void)
abort ();
#endif
+ a_1 = a_2 = 0;
+
+#pragma acc data deviceptr (a)
+#pragma acc parallel copyout (a_1, a_2)
+ {
+ a_1 = a;
+ a_2 = &a;
+ }
+
+ if (a != A)
+ abort ();
+ if (a_1 != a)
+ abort ();
+#if ACC_MEM_SHARED
+ if (a_2 != &a)
+ abort ();
+#else
+ if (a_2 == &a)
+ abort ();
+#endif
+
return 0;
}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/firstprivate-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/firstprivate-1.c
index 7f5d3d3..689a443 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/firstprivate-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/firstprivate-1.c
@@ -1,8 +1,7 @@
-/* { dg-do run } */
-
#include <openacc.h>
-int main ()
+
+void t1 ()
{
int ok = 1;
int val = 2;
@@ -28,14 +27,115 @@ int main ()
if (ondev)
{
if (!ok)
- return 1;
+ __builtin_abort ();
if (val != 2)
- return 1;
+ __builtin_abort ();
for (int i = 0; i < 32; i++)
if (ary[i] != 2 + i)
- return 1;
+ __builtin_abort ();
}
-
+}
+
+
+void t2 ()
+{
+ int ok = 1;
+ int val = 2;
+
+#pragma acc data copy(val)
+ {
+#pragma acc parallel present (val)
+ {
+ val = 7;
+ }
+
+#pragma acc parallel firstprivate (val) copy(ok)
+ {
+ ok = val == 7;
+ val = 9;
+ }
+ }
+
+ if (!ok)
+ __builtin_abort ();
+ if (val != 7)
+ __builtin_abort ();
+}
+
+
+#define N 100
+void t3 ()
+{
+ int a, b[N], c, d, i;
+ int n = acc_get_device_type () == acc_device_nvidia ? N : 1;
+
+ a = 5;
+ for (i = 0; i < n; i++)
+ b[i] = -1;
+
+ #pragma acc parallel num_gangs (n) firstprivate (a)
+ #pragma acc loop gang
+ for (i = 0; i < n; i++)
+ {
+ a = a + i;
+ b[i] = a;
+ }
+
+ for (i = 0; i < n; i++)
+ if (a + i != b[i])
+ __builtin_abort ();
+
+ #pragma acc data copy (a)
+ {
+ #pragma acc parallel firstprivate (a) copyout (c)
+ {
+ a = 10;
+ c = a;
+ }
+
+ /* This version of 'a' should still be 5. */
+ #pragma acc parallel copyout (d) present (a)
+ {
+ d = a;
+ }
+ }
+
+ if (c != 10)
+ __builtin_abort ();
+ if (d != 5)
+ __builtin_abort ();
+}
+#undef N
+
+
+void t4 ()
+{
+ int x = 5, i, arr[32];
+
+ for (i = 0; i < 32; i++)
+ arr[i] = 3;
+
+#pragma acc parallel firstprivate(x) copy(arr) num_gangs(32) num_workers(8) vector_length(32)
+ {
+#pragma acc loop gang
+ for (i = 0; i < 32; i++)
+ arr[i] += x;
+ }
+
+ for (i = 0; i < 32; i++)
+ if (arr[i] != 8)
+ __builtin_abort ();
+}
+
+
+int
+main()
+{
+ t1 ();
+ t2 ();
+ t3 ();
+ t4 ();
+
return 0;
}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/firstprivate-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/firstprivate-2.c
deleted file mode 100644
index 9666542..0000000
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/firstprivate-2.c
+++ /dev/null
@@ -1,31 +0,0 @@
-/* { dg-do run } */
-
-#include <openacc.h>
-
-int main ()
-{
- int ok = 1;
- int val = 2;
-
-#pragma acc data copy(val)
- {
-#pragma acc parallel present (val)
- {
- val = 7;
- }
-
-#pragma acc parallel firstprivate (val) copy(ok)
- {
- ok = val == 7;
- val = 9;
- }
-
- }
-
- if (!ok)
- return 1;
- if(val != 7)
- return 1;
-
- return 0;
-}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/gang-static-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/gang-static-1.c
new file mode 100644
index 0000000..d8ab958
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/gang-static-1.c
@@ -0,0 +1,48 @@
+#include <assert.h>
+
+#define N 100
+
+void
+test (int *a, int *b, int sarg)
+{
+ int i;
+
+ for (i = 0; i < N; i++)
+ assert (a[i] == b[i] + sarg);
+}
+
+int
+main ()
+{
+ int a[N], b[N];
+ int i;
+
+ for (i = 0; i < N; i++)
+ b[i] = i+1;
+
+#pragma acc parallel loop gang (static:*) num_gangs (10)
+ for (i = 0; i < 100; i++)
+ a[i] = b[i] + 0;
+
+ test (a, b, 0);
+
+#pragma acc parallel loop gang (static:1) num_gangs (10)
+ for (i = 0; i < 100; i++)
+ a[i] = b[i] + 1;
+
+ test (a, b, 1);
+
+#pragma acc parallel loop gang (static:5) num_gangs (10)
+ for (i = 0; i < 100; i++)
+ a[i] = b[i] + 5;
+
+ test (a, b, 5);
+
+#pragma acc parallel loop gang (static:20) num_gangs (10)
+ for (i = 0; i < 100; i++)
+ a[i] = b[i] + 20;
+
+ test (a, b, 20);
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/gang-static-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/gang-static-2.c
new file mode 100644
index 0000000..ce9632c
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/gang-static-2.c
@@ -0,0 +1,100 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+/* This code uses nvptx inline assembly guarded with acc_on_device, which is
+ not optimized away at -O0, and then confuses the target assembler.
+ { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
+
+#include <assert.h>
+#include <openacc.h>
+
+#define N 100
+
+#define GANG_ID(I) \
+ (acc_on_device (acc_device_nvidia) \
+ ? ({unsigned __r; \
+ __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (__r)); \
+ __r; }) : (I))
+
+int
+test_static(int *a, int num_gangs, int sarg)
+{
+ int i, j;
+
+ if (sarg == 0)
+ sarg = 1;
+
+ for (i = 0; i < N / sarg; i++)
+ for (j = 0; j < sarg; j++)
+ assert (a[i*sarg+j] == i % num_gangs);
+}
+
+int
+test_nonstatic(int *a, int gangs)
+{
+ int i, j;
+
+ for (i = 0; i < N; i+=gangs)
+ for (j = 0; j < gangs; j++)
+ assert (a[i+j] == i/gangs);
+}
+
+int
+main ()
+{
+ int a[N];
+ int i, x;
+
+#pragma acc parallel loop gang (static:*) num_gangs (10)
+ for (i = 0; i < 100; i++)
+ a[i] = GANG_ID (i);
+
+ test_nonstatic (a, 10);
+
+#pragma acc parallel loop gang (static:1) num_gangs (10)
+ for (i = 0; i < 100; i++)
+ a[i] = GANG_ID (i);
+
+ test_static (a, 10, 1);
+
+#pragma acc parallel loop gang (static:2) num_gangs (10)
+ for (i = 0; i < 100; i++)
+ a[i] = GANG_ID (i);
+
+ test_static (a, 10, 2);
+
+#pragma acc parallel loop gang (static:5) num_gangs (10)
+ for (i = 0; i < 100; i++)
+ a[i] = GANG_ID (i);
+
+ test_static (a, 10, 5);
+
+#pragma acc parallel loop gang (static:20) num_gangs (10)
+ for (i = 0; i < 100; i++)
+ a[i] = GANG_ID (i);
+
+ test_static (a, 10, 20);
+
+ /* Non-static gang. */
+#pragma acc parallel loop gang num_gangs (10)
+ for (i = 0; i < 100; i++)
+ a[i] = GANG_ID (i);
+
+ test_nonstatic (a, 10);
+
+ /* Static arguments with a variable expression. */
+
+ x = 20;
+#pragma acc parallel loop gang (static:0+x) num_gangs (10)
+ for (i = 0; i < 100; i++)
+ a[i] = GANG_ID (i);
+
+ test_static (a, 10, 20);
+
+ x = 20;
+#pragma acc parallel loop gang (static:x) num_gangs (10)
+ for (i = 0; i < 100; i++)
+ a[i] = GANG_ID (i);
+
+ test_static (a, 10, 20);
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/if-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/if-1.c
index 6aa3bb7..5398905 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/if-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/if-1.c
@@ -1,5 +1,3 @@
-/* { dg-do run } */
-
#include <openacc.h>
#include <stdlib.h>
#include <stdbool.h>
@@ -608,5 +606,357 @@ main(int argc, char **argv)
abort ();
#endif
+ for (i = 0; i < N; i++)
+ a[i] = 4.0;
+
+#pragma acc kernels copyin(a[0:N]) copyout(b[0:N]) if(1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ if (acc_on_device (acc_device_host))
+ b[ii] = a[ii] + 1;
+ else
+ b[ii] = a[ii];
+ }
+ }
+
+#if ACC_MEM_SHARED
+ exp = 5.0;
+#else
+ exp = 4.0;
+#endif
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != exp)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ a[i] = 16.0;
+
+#pragma acc kernels if(0)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ if (acc_on_device (acc_device_host))
+ b[ii] = a[ii] + 1;
+ else
+ b[ii] = a[ii];
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != 17.0)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ a[i] = 8.0;
+
+#pragma acc kernels copyin(a[0:N]) copyout(b[0:N]) if(one)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ if (acc_on_device (acc_device_host))
+ b[ii] = a[ii] + 1;
+ else
+ b[ii] = a[ii];
+ }
+ }
+
+#if ACC_MEM_SHARED
+ exp = 9.0;
+#else
+ exp = 8.0;
+#endif
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != exp)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ a[i] = 22.0;
+
+#pragma acc kernels if(zero)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ if (acc_on_device (acc_device_host))
+ b[ii] = a[ii] + 1;
+ else
+ b[ii] = a[ii];
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != 23.0)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ a[i] = 16.0;
+
+#pragma acc kernels copyin(a[0:N]) copyout(b[0:N]) if(true)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ if (acc_on_device (acc_device_host))
+ b[ii] = a[ii] + 1;
+ else
+ b[ii] = a[ii];
+ }
+ }
+
+#if ACC_MEM_SHARED
+ exp = 17.0;
+#else
+ exp = 16.0;
+#endif
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != exp)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ a[i] = 76.0;
+
+#pragma acc kernels if(false)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ if (acc_on_device (acc_device_host))
+ b[ii] = a[ii] + 1;
+ else
+ b[ii] = a[ii];
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != 77.0)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ a[i] = 22.0;
+
+ n = 1;
+
+#pragma acc kernels copyin(a[0:N]) copyout(b[0:N]) if(n)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ if (acc_on_device (acc_device_host))
+ b[ii] = a[ii] + 1;
+ else
+ b[ii] = a[ii];
+ }
+ }
+
+#if ACC_MEM_SHARED
+ exp = 23.0;
+#else
+ exp = 22.0;
+#endif
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != exp)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ a[i] = 18.0;
+
+ n = 0;
+
+#pragma acc kernels if(n)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ if (acc_on_device (acc_device_host))
+ b[ii] = a[ii] + 1;
+ else
+ b[ii] = a[ii];
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != 19.0)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ a[i] = 49.0;
+
+ n = 1;
+
+#pragma acc kernels copyin(a[0:N]) copyout(b[0:N]) if(n + n)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ if (acc_on_device (acc_device_host))
+ b[ii] = a[ii] + 1;
+ else
+ b[ii] = a[ii];
+ }
+ }
+
+#if ACC_MEM_SHARED
+ exp = 50.0;
+#else
+ exp = 49.0;
+#endif
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != exp)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ a[i] = 38.0;
+
+ n = 0;
+
+#pragma acc kernels if(n + n)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ if (acc_on_device (acc_device_host))
+ b[ii] = a[ii] + 1;
+ else
+ b[ii] = a[ii];
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != 39.0)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ a[i] = 91.0;
+
+#pragma acc kernels copyin(a[0:N]) copyout(b[0:N]) if(-2)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ if (acc_on_device (acc_device_host))
+ b[ii] = a[ii] + 1;
+ else
+ b[ii] = a[ii];
+ }
+ }
+
+#if ACC_MEM_SHARED
+ exp = 92.0;
+#else
+ exp = 91.0;
+#endif
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != exp)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ a[i] = 43.0;
+
+#pragma acc kernels copyin(a[0:N]) copyout(b[0:N]) if(one == 1)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ if (acc_on_device (acc_device_host))
+ b[ii] = a[ii] + 1;
+ else
+ b[ii] = a[ii];
+ }
+ }
+
+#if ACC_MEM_SHARED
+ exp = 44.0;
+#else
+ exp = 43.0;
+#endif
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != exp)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ a[i] = 87.0;
+
+#pragma acc kernels if(one == 0)
+ {
+ int ii;
+
+ for (ii = 0; ii < N; ii++)
+ {
+ if (acc_on_device (acc_device_host))
+ b[ii] = a[ii] + 1;
+ else
+ b[ii] = a[ii];
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (b[i] != 88.0)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = 3.0;
+ b[i] = 9.0;
+ }
+
+#if ACC_MEM_SHARED
+ exp = 0.0;
+ exp2 = 0.0;
+#else
+ acc_map_data (a, d_a, N * sizeof (float));
+ acc_map_data (b, d_b, N * sizeof (float));
+ exp = 3.0;
+ exp2 = 9.0;
+#endif
+
return 0;
}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-1.c
deleted file mode 100644
index 3acfdf5..0000000
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-1.c
+++ /dev/null
@@ -1,184 +0,0 @@
-/* { dg-do run } */
-
-#include <stdlib.h>
-
-int i;
-
-int main (void)
-{
- int j, v;
-
-#if 0
- i = -1;
- j = -2;
- v = 0;
-#pragma acc kernels /* copyout */ present_or_copyout (v) copyin (i, j)
- {
- if (i != -1 || j != -2)
- abort ();
- i = 2;
- j = 1;
- if (i != 2 || j != 1)
- abort ();
- v = 1;
- }
- if (v != 1 || i != -1 || j != -2)
- abort ();
-
- i = -1;
- j = -2;
- v = 0;
-#pragma acc kernels /* copyout */ present_or_copyout (v) copyout (i, j)
- {
- i = 2;
- j = 1;
- if (i != 2 || j != 1)
- abort ();
- v = 1;
- }
- if (v != 1 || i != 2 || j != 1)
- abort ();
-
- i = -1;
- j = -2;
- v = 0;
-#pragma acc kernels /* copyout */ present_or_copyout (v) copy (i, j)
- {
- if (i != -1 || j != -2)
- abort ();
- i = 2;
- j = 1;
- if (i != 2 || j != 1)
- abort ();
- v = 1;
- }
- if (v != 1 || i != 2 || j != 1)
- abort ();
-
- i = -1;
- j = -2;
- v = 0;
-#pragma acc kernels /* copyout */ present_or_copyout (v) create (i, j)
- {
- i = 2;
- j = 1;
- if (i != 2 || j != 1)
- abort ();
- v = 1;
- }
- if (v != 1 || i != -1 || j != -2)
- abort ();
-#endif
-
- i = -1;
- j = -2;
- v = 0;
-#pragma acc kernels /* copyout */ present_or_copyout (v) present_or_copyin (i, j)
- {
- if (i != -1 || j != -2)
- abort ();
- i = 2;
- j = 1;
- if (i != 2 || j != 1)
- abort ();
- v = 1;
- }
- if (v != 1)
- abort ();
-#if ACC_MEM_SHARED
- if (i != 2 || j != 1)
- abort ();
-#else
- if (i != -1 || j != -2)
- abort ();
-#endif
-
- i = -1;
- j = -2;
- v = 0;
-#pragma acc kernels /* copyout */ present_or_copyout (v) present_or_copyout (i, j)
- {
- i = 2;
- j = 1;
- if (i != 2 || j != 1)
- abort ();
- v = 1;
- }
- if (v != 1 || i != 2 || j != 1)
- abort ();
-
- i = -1;
- j = -2;
- v = 0;
-#pragma acc kernels /* copyout */ present_or_copyout (v) present_or_copy (i, j)
- {
- if (i != -1 || j != -2)
- abort ();
- i = 2;
- j = 1;
- if (i != 2 || j != 1)
- abort ();
- v = 1;
- }
- if (v != 1 || i != 2 || j != 1)
- abort ();
-
- i = -1;
- j = -2;
- v = 0;
-#pragma acc kernels /* copyout */ present_or_copyout (v) present_or_create (i, j)
- {
- i = 2;
- j = 1;
- if (i != 2 || j != 1)
- abort ();
- v = 1;
- }
- if (v != 1)
- abort ();
-#if ACC_MEM_SHARED
- if (i != 2 || j != 1)
- abort ();
-#else
- if (i != -1 || j != -2)
- abort ();
-#endif
-
-#if 0
- i = -1;
- j = -2;
- v = 0;
-#pragma acc kernels /* copyout */ present_or_copyout (v) present (i, j)
- {
- if (i != -1 || j != -2)
- abort ();
- i = 2;
- j = 1;
- if (i != 2 || j != 1)
- abort ();
- v = 1;
- }
- if (v != 1 || i != 2 || j != 1)
- abort ();
-#endif
-
-#if 0
- i = -1;
- j = -2;
- v = 0;
-#pragma acc kernels /* copyout */ present_or_copyout (v)
- {
- if (i != -1 || j != -2)
- abort ();
- i = 2;
- j = 1;
- if (i != 2 || j != 1)
- abort ();
- v = 1;
- }
- if (v != 1 || i != 2 || j != 1)
- abort ();
-#endif
-
- return 0;
-}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-clauses.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-clauses.c
new file mode 100644
index 0000000..2c42497
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-clauses.c
@@ -0,0 +1,62 @@
+/* Exercise the auto, independent, seq and tile loop clauses inside
+ kernels regions. */
+
+#include <assert.h>
+
+#define N 100
+
+void
+check (int *a, int *b)
+{
+ int i;
+
+ for (i = 0; i < N; i++)
+ assert (a[i] == b[i]);
+}
+
+int
+main ()
+{
+ int i, a[N], b[N];
+
+#pragma acc kernels copy(a)
+ {
+#pragma acc loop auto
+ for (i = 0; i < N; i++)
+ a[i] = i;
+ }
+
+ for (i = 0; i < N; i++)
+ b[i] = i;
+
+ check (a, b);
+
+#pragma acc kernels copyout(a)
+ {
+#pragma acc loop independent
+ for (i = 0; i < N; i++)
+ a[i] = i;
+ }
+
+ check (a, b);
+
+#pragma acc kernels present_or_copy(a)
+ {
+#pragma acc loop seq
+ for (i = 0; i < N; i++)
+ a[i] = i;
+ }
+
+ check (a, b);
+
+#pragma acc kernels pcopyout(a) present_or_copyin(b)
+ {
+#pragma acc loop seq
+ for (i = 0; i < N; i++)
+ a[i] = b[i];
+ }
+
+ check (a, b);
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/mode-transitions.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/mode-transitions.c
new file mode 100644
index 0000000..2394ac8
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/mode-transitions.c
@@ -0,0 +1,895 @@
+/* Miscellaneous test cases for gang/worker/vector mode transitions. */
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <math.h>
+#include <openacc.h>
+
+
+/* Test basic vector-partitioned mode transitions. */
+
+void t1()
+{
+ int n = 0, arr[32], i;
+
+ for (i = 0; i < 32; i++)
+ arr[i] = 0;
+
+ #pragma acc parallel copy(n, arr) \
+ num_gangs(1) num_workers(1) vector_length(32)
+ {
+ int j;
+ n++;
+ #pragma acc loop vector
+ for (j = 0; j < 32; j++)
+ arr[j]++;
+ n++;
+ }
+
+ assert (n == 2);
+
+ for (i = 0; i < 32; i++)
+ assert (arr[i] == 1);
+}
+
+
+/* Test vector-partitioned, gang-partitioned mode. */
+
+void t2()
+{
+ int n[32], arr[1024], i;
+
+ for (i = 0; i < 1024; i++)
+ arr[i] = 0;
+
+ for (i = 0; i < 32; i++)
+ n[i] = 0;
+
+ #pragma acc parallel copy(n, arr) \
+ num_gangs(32) num_workers(1) vector_length(32)
+ {
+ int j, k;
+
+ #pragma acc loop gang(static:*)
+ for (j = 0; j < 32; j++)
+ n[j]++;
+
+ #pragma acc loop gang
+ for (j = 0; j < 32; j++)
+ #pragma acc loop vector
+ for (k = 0; k < 32; k++)
+ arr[j * 32 + k]++;
+
+ #pragma acc loop gang(static:*)
+ for (j = 0; j < 32; j++)
+ n[j]++;
+ }
+
+ for (i = 0; i < 32; i++)
+ assert (n[i] == 2);
+
+ for (i = 0; i < 1024; i++)
+ assert (arr[i] == 1);
+}
+
+
+/* Test conditions inside vector-partitioned loops. */
+
+void t4()
+{
+ int n[32], arr[1024], i;
+
+ for (i = 0; i < 1024; i++)
+ arr[i] = i;
+
+ for (i = 0; i < 32; i++)
+ n[i] = 0;
+
+ #pragma acc parallel copy(n, arr) \
+ num_gangs(32) num_workers(1) vector_length(32)
+ {
+ int j, k;
+
+ #pragma acc loop gang(static:*)
+ for (j = 0; j < 32; j++)
+ n[j]++;
+
+ #pragma acc loop gang
+ for (j = 0; j < 32; j++)
+ {
+ #pragma acc loop vector
+ for (k = 0; k < 32; k++)
+ if ((arr[j * 32 + k] % 2) != 0)
+ arr[j * 32 + k] *= 2;
+ }
+
+ #pragma acc loop gang(static:*)
+ for (j = 0; j < 32; j++)
+ n[j]++;
+ }
+
+ for (i = 0; i < 32; i++)
+ assert (n[i] == 2);
+
+ for (i = 0; i < 1024; i++)
+ assert (arr[i] == ((i % 2) == 0 ? i : i * 2));
+}
+
+
+/* Test conditions inside gang-partitioned/vector-partitioned loops. */
+
+void t5()
+{
+ int n[32], arr[1024], i;
+
+ for (i = 0; i < 1024; i++)
+ arr[i] = i;
+
+ for (i = 0; i < 32; i++)
+ n[i] = 0;
+
+ #pragma acc parallel copy(n, arr) \
+ num_gangs(32) num_workers(1) vector_length(32)
+ {
+ int j;
+
+ #pragma acc loop gang(static:*)
+ for (j = 0; j < 32; j++)
+ n[j]++;
+
+ #pragma acc loop gang vector
+ for (j = 0; j < 1024; j++)
+ if ((arr[j] % 2) != 0)
+ arr[j] *= 2;
+
+ #pragma acc loop gang(static:*)
+ for (j = 0; j < 32; j++)
+ n[j]++;
+ }
+
+ for (i = 0; i < 32; i++)
+ assert (n[i] == 2);
+
+ for (i = 0; i < 1024; i++)
+ assert (arr[i] == ((i % 2) == 0 ? i : i * 2));
+}
+
+
+/* Test trivial operation of vector-single mode. */
+
+void t7()
+{
+ int n = 0;
+ #pragma acc parallel copy(n) \
+ num_gangs(1) num_workers(1) vector_length(32)
+ {
+ n++;
+ }
+ assert (n == 1);
+}
+
+
+/* Test vector-single, gang-partitioned mode. */
+
+void t8()
+{
+ int arr[1024];
+ int gangs;
+
+ for (gangs = 1; gangs <= 1024; gangs <<= 1)
+ {
+ int i;
+
+ for (i = 0; i < 1024; i++)
+ arr[i] = 0;
+
+ #pragma acc parallel copy(arr) \
+ num_gangs(gangs) num_workers(1) vector_length(32)
+ {
+ int j;
+ #pragma acc loop gang
+ for (j = 0; j < 1024; j++)
+ arr[j]++;
+ }
+
+ for (i = 0; i < 1024; i++)
+ assert (arr[i] == 1);
+ }
+}
+
+
+/* Test conditions in vector-single mode. */
+
+void t9()
+{
+ int arr[1024];
+ int gangs;
+
+ for (gangs = 1; gangs <= 1024; gangs <<= 1)
+ {
+ int i;
+
+ for (i = 0; i < 1024; i++)
+ arr[i] = 0;
+
+ #pragma acc parallel copy(arr) \
+ num_gangs(gangs) num_workers(1) vector_length(32)
+ {
+ int j;
+ #pragma acc loop gang
+ for (j = 0; j < 1024; j++)
+ if ((j % 3) == 0)
+ arr[j]++;
+ else
+ arr[j] += 2;
+ }
+
+ for (i = 0; i < 1024; i++)
+ assert (arr[i] == ((i % 3) == 0) ? 1 : 2);
+ }
+}
+
+
+/* Test switch in vector-single mode. */
+
+void t10()
+{
+ int arr[1024];
+ int gangs;
+
+ for (gangs = 1; gangs <= 1024; gangs <<= 1)
+ {
+ int i;
+
+ for (i = 0; i < 1024; i++)
+ arr[i] = 0;
+
+ #pragma acc parallel copy(arr) \
+ num_gangs(gangs) num_workers(1) vector_length(32)
+ {
+ int j;
+ #pragma acc loop gang
+ for (j = 0; j < 1024; j++)
+ switch (j % 5)
+ {
+ case 0: arr[j] += 1; break;
+ case 1: arr[j] += 2; break;
+ case 2: arr[j] += 3; break;
+ case 3: arr[j] += 4; break;
+ case 4: arr[j] += 5; break;
+ default: arr[j] += 99;
+ }
+ }
+
+ for (i = 0; i < 1024; i++)
+ assert (arr[i] == (i % 5) + 1);
+ }
+}
+
+
+/* Test switch in vector-single mode, initialise array on device. */
+
+void t11()
+{
+ int arr[1024];
+ int i;
+
+ for (i = 0; i < 1024; i++)
+ arr[i] = 99;
+
+ #pragma acc parallel copy(arr) \
+ num_gangs(1024) num_workers(1) vector_length(32)
+ {
+ int j;
+
+ /* This loop and the one following must be distributed to available gangs
+ in the same way to ensure data dependencies are not violated (hence the
+ "static" clauses). */
+ #pragma acc loop gang(static:*)
+ for (j = 0; j < 1024; j++)
+ arr[j] = 0;
+
+ #pragma acc loop gang(static:*)
+ for (j = 0; j < 1024; j++)
+ switch (j % 5)
+ {
+ case 0: arr[j] += 1; break;
+ case 1: arr[j] += 2; break;
+ case 2: arr[j] += 3; break;
+ case 3: arr[j] += 4; break;
+ case 4: arr[j] += 5; break;
+ default: arr[j] += 99;
+ }
+ }
+
+ for (i = 0; i < 1024; i++)
+ assert (arr[i] == (i % 5) + 1);
+}
+
+
+/* Test multiple conditions in vector-single mode. */
+
+#define NUM_GANGS 4096
+void t12()
+{
+ bool fizz[NUM_GANGS], buzz[NUM_GANGS], fizzbuzz[NUM_GANGS];
+ int i;
+
+ #pragma acc parallel copyout(fizz, buzz, fizzbuzz) \
+ num_gangs(NUM_GANGS) num_workers(1) vector_length(32)
+ {
+ int j;
+
+ /* This loop and the one following must be distributed to available gangs
+ in the same way to ensure data dependencies are not violated (hence the
+ "static" clauses). */
+ #pragma acc loop gang(static:*)
+ for (j = 0; j < NUM_GANGS; j++)
+ fizz[j] = buzz[j] = fizzbuzz[j] = 0;
+
+ #pragma acc loop gang(static:*)
+ for (j = 0; j < NUM_GANGS; j++)
+ {
+ if ((j % 3) == 0 && (j % 5) == 0)
+ fizzbuzz[j] = 1;
+ else
+ {
+ if ((j % 3) == 0)
+ fizz[j] = 1;
+ else if ((j % 5) == 0)
+ buzz[j] = 1;
+ }
+ }
+ }
+
+ for (i = 0; i < NUM_GANGS; i++)
+ {
+ assert (fizzbuzz[i] == ((i % 3) == 0 && (i % 5) == 0));
+ assert (fizz[i] == ((i % 3) == 0 && (i % 5) != 0));
+ assert (buzz[i] == ((i % 3) != 0 && (i % 5) == 0));
+ }
+}
+#undef NUM_GANGS
+
+
+/* Test worker-partitioned/vector-single mode. */
+
+void t13()
+{
+ int arr[32 * 8], i;
+
+ for (i = 0; i < 32 * 8; i++)
+ arr[i] = 0;
+
+ #pragma acc parallel copy(arr) \
+ num_gangs(8) num_workers(8) vector_length(32)
+ {
+ int j;
+ #pragma acc loop gang
+ for (j = 0; j < 32; j++)
+ {
+ int k;
+ #pragma acc loop worker
+ for (k = 0; k < 8; k++)
+ arr[j * 8 + k] += j * 8 + k;
+ }
+ }
+
+ for (i = 0; i < 32 * 8; i++)
+ assert (arr[i] == i);
+}
+
+
+/* Test worker-single/worker-partitioned transitions. */
+
+void t16()
+{
+ int n[32], arr[32 * 32], i;
+
+ for (i = 0; i < 32 * 32; i++)
+ arr[i] = 0;
+
+ for (i = 0; i < 32; i++)
+ n[i] = 0;
+
+ #pragma acc parallel copy(n, arr) \
+ num_gangs(8) num_workers(16) vector_length(32)
+ {
+ int j;
+ #pragma acc loop gang
+ for (j = 0; j < 32; j++)
+ {
+ int k;
+
+ n[j]++;
+
+ #pragma acc loop worker
+ for (k = 0; k < 32; k++)
+ arr[j * 32 + k]++;
+
+ n[j]++;
+
+ #pragma acc loop worker
+ for (k = 0; k < 32; k++)
+ arr[j * 32 + k]++;
+
+ n[j]++;
+
+ #pragma acc loop worker
+ for (k = 0; k < 32; k++)
+ arr[j * 32 + k]++;
+
+ n[j]++;
+ }
+ }
+
+ for (i = 0; i < 32; i++)
+ assert (n[i] == 4);
+
+ for (i = 0; i < 32 * 32; i++)
+ assert (arr[i] == 3);
+}
+
+
+/* Test correct synchronisation between worker-partitioned loops. */
+
+void t17()
+{
+ int arr_a[32 * 32], arr_b[32 * 32], i;
+ int num_workers, num_gangs;
+
+ for (num_workers = 1; num_workers <= 32; num_workers <<= 1)
+ for (num_gangs = 1; num_gangs <= 32; num_gangs <<= 1)
+ {
+ for (i = 0; i < 32 * 32; i++)
+ arr_a[i] = i;
+
+ #pragma acc parallel copyin(arr_a) copyout(arr_b) \
+ num_gangs(num_gangs) num_workers(num_workers) vector_length(32)
+ {
+ int j;
+ #pragma acc loop gang
+ for (j = 0; j < 32; j++)
+ {
+ int k;
+
+ #pragma acc loop worker
+ for (k = 0; k < 32; k++)
+ arr_b[j * 32 + (31 - k)] = arr_a[j * 32 + k] * 2;
+
+ #pragma acc loop worker
+ for (k = 0; k < 32; k++)
+ arr_a[j * 32 + (31 - k)] = arr_b[j * 32 + k] * 2;
+
+ #pragma acc loop worker
+ for (k = 0; k < 32; k++)
+ arr_b[j * 32 + (31 - k)] = arr_a[j * 32 + k] * 2;
+ }
+ }
+
+ for (i = 0; i < 32 * 32; i++)
+ assert (arr_b[i] == (i ^ 31) * 8);
+ }
+}
+
+
+/* Test correct synchronisation between worker+vector-partitioned loops. */
+
+void t18()
+{
+ int arr_a[32 * 32 * 32], arr_b[32 * 32 * 32], i;
+ int num_workers, num_gangs;
+
+ for (num_workers = 1; num_workers <= 32; num_workers <<= 1)
+ for (num_gangs = 1; num_gangs <= 32; num_gangs <<= 1)
+ {
+ for (i = 0; i < 32 * 32 * 32; i++)
+ arr_a[i] = i;
+
+ #pragma acc parallel copyin(arr_a) copyout(arr_b) \
+ num_gangs(num_gangs) num_workers(num_workers) vector_length(32)
+ {
+ int j;
+ #pragma acc loop gang
+ for (j = 0; j < 32; j++)
+ {
+ int k;
+
+ #pragma acc loop worker vector
+ for (k = 0; k < 32 * 32; k++)
+ arr_b[j * 32 * 32 + (1023 - k)] = arr_a[j * 32 * 32 + k] * 2;
+
+ #pragma acc loop worker vector
+ for (k = 0; k < 32 * 32; k++)
+ arr_a[j * 32 * 32 + (1023 - k)] = arr_b[j * 32 * 32 + k] * 2;
+
+ #pragma acc loop worker vector
+ for (k = 0; k < 32 * 32; k++)
+ arr_b[j * 32 * 32 + (1023 - k)] = arr_a[j * 32 * 32 + k] * 2;
+ }
+ }
+
+ for (i = 0; i < 32 * 32 * 32; i++)
+ assert (arr_b[i] == (i ^ 1023) * 8);
+ }
+}
+
+
+/* Test correct synchronisation between vector-partitioned loops in
+ worker-partitioned mode. */
+
+void t19()
+{
+ int n[32 * 32], arr_a[32 * 32 * 32], arr_b[32 * 32 * 32], i;
+ int num_workers, num_gangs;
+
+ for (num_workers = 1; num_workers <= 32; num_workers <<= 1)
+ for (num_gangs = 1; num_gangs <= 32; num_gangs <<= 1)
+ {
+ for (i = 0; i < 32 * 32 * 32; i++)
+ arr_a[i] = i;
+
+ for (i = 0; i < 32 * 32; i++)
+ n[i] = 0;
+
+ #pragma acc parallel copy (n) copyin(arr_a) copyout(arr_b) \
+ num_gangs(num_gangs) num_workers(num_workers) vector_length(32)
+ {
+ int j;
+ #pragma acc loop gang
+ for (j = 0; j < 32; j++)
+ {
+ int k;
+
+ #pragma acc loop worker
+ for (k = 0; k < 32; k++)
+ {
+ int m;
+
+ n[j * 32 + k]++;
+
+ #pragma acc loop vector
+ for (m = 0; m < 32; m++)
+ {
+ if (((j * 1024 + k * 32 + m) % 2) == 0)
+ arr_b[j * 1024 + k * 32 + (31 - m)]
+ = arr_a[j * 1024 + k * 32 + m] * 2;
+ else
+ arr_b[j * 1024 + k * 32 + (31 - m)]
+ = arr_a[j * 1024 + k * 32 + m] * 3;
+ }
+
+ /* Test returning to vector-single mode... */
+ n[j * 32 + k]++;
+
+ #pragma acc loop vector
+ for (m = 0; m < 32; m++)
+ {
+ if (((j * 1024 + k * 32 + m) % 3) == 0)
+ arr_a[j * 1024 + k * 32 + (31 - m)]
+ = arr_b[j * 1024 + k * 32 + m] * 5;
+ else
+ arr_a[j * 1024 + k * 32 + (31 - m)]
+ = arr_b[j * 1024 + k * 32 + m] * 7;
+ }
+
+ /* ...and back-to-back vector loops. */
+
+ #pragma acc loop vector
+ for (m = 0; m < 32; m++)
+ {
+ if (((j * 1024 + k * 32 + m) % 2) == 0)
+ arr_b[j * 1024 + k * 32 + (31 - m)]
+ = arr_a[j * 1024 + k * 32 + m] * 3;
+ else
+ arr_b[j * 1024 + k * 32 + (31 - m)]
+ = arr_a[j * 1024 + k * 32 + m] * 2;
+ }
+ }
+ }
+ }
+
+ for (i = 0; i < 32 * 32; i++)
+ assert (n[i] == 2);
+
+ for (i = 0; i < 32 * 32 * 32; i++)
+ {
+ int m = 6 * ((i % 3) == 0 ? 5 : 7);
+ assert (arr_b[i] == (i ^ 31) * m);
+ }
+ }
+}
+
+
+/* With -O0, variables are on the stack, not in registers. Check that worker
+ state propagation handles the stack frame. */
+
+void t20()
+{
+ int w0 = 0;
+ int w1 = 0;
+ int w2 = 0;
+ int w3 = 0;
+ int w4 = 0;
+ int w5 = 0;
+ int w6 = 0;
+ int w7 = 0;
+
+ int i;
+
+#pragma acc parallel copy (w0, w1, w2, w3, w4, w5, w6, w7) \
+ num_gangs (1) num_workers (8)
+ {
+ int internal = 100;
+
+#pragma acc loop worker
+ for (i = 0; i < 8; i++)
+ {
+ switch (i)
+ {
+ case 0: w0 = internal; break;
+ case 1: w1 = internal; break;
+ case 2: w2 = internal; break;
+ case 3: w3 = internal; break;
+ case 4: w4 = internal; break;
+ case 5: w5 = internal; break;
+ case 6: w6 = internal; break;
+ case 7: w7 = internal; break;
+ default: break;
+ }
+ }
+ }
+
+ if (w0 != 100
+ || w1 != 100
+ || w2 != 100
+ || w3 != 100
+ || w4 != 100
+ || w5 != 100
+ || w6 != 100
+ || w7 != 100)
+ __builtin_abort ();
+}
+
+
+/* Test worker-single/vector-single mode. */
+
+void t21()
+{
+ int arr[32], i;
+
+ for (i = 0; i < 32; i++)
+ arr[i] = 0;
+
+ #pragma acc parallel copy(arr) \
+ num_gangs(8) num_workers(8) vector_length(32)
+ {
+ int j;
+ #pragma acc loop gang
+ for (j = 0; j < 32; j++)
+ arr[j]++;
+ }
+
+ for (i = 0; i < 32; i++)
+ assert (arr[i] == 1);
+}
+
+
+/* Test worker-single/vector-single mode. */
+
+void t22()
+{
+ int arr[32], i;
+
+ for (i = 0; i < 32; i++)
+ arr[i] = 0;
+
+ #pragma acc parallel copy(arr) \
+ num_gangs(8) num_workers(8) vector_length(32)
+ {
+ int j;
+ #pragma acc loop gang
+ for (j = 0; j < 32; j++)
+ {
+ #pragma acc atomic
+ arr[j]++;
+ }
+ }
+
+ for (i = 0; i < 32; i++)
+ assert (arr[i] == 1);
+}
+
+
+/* Test condition in worker-single/vector-single mode. */
+
+void t23()
+{
+ int arr[32], i;
+
+ for (i = 0; i < 32; i++)
+ arr[i] = i;
+
+ #pragma acc parallel copy(arr) \
+ num_gangs(8) num_workers(8) vector_length(32)
+ {
+ int j;
+ #pragma acc loop gang
+ for (j = 0; j < 32; j++)
+ if ((arr[j] % 2) != 0)
+ arr[j]++;
+ else
+ arr[j] += 2;
+ }
+
+ for (i = 0; i < 32; i++)
+ assert (arr[i] == ((i % 2) != 0) ? i + 1 : i + 2);
+}
+
+
+/* Test switch in worker-single/vector-single mode. */
+
+void t24()
+{
+ int arr[32], i;
+
+ for (i = 0; i < 32; i++)
+ arr[i] = i;
+
+ #pragma acc parallel copy(arr) \
+ num_gangs(8) num_workers(8) vector_length(32)
+ {
+ int j;
+ #pragma acc loop gang
+ for (j = 0; j < 32; j++)
+ switch (arr[j] % 5)
+ {
+ case 0: arr[j] += 1; break;
+ case 1: arr[j] += 2; break;
+ case 2: arr[j] += 3; break;
+ case 3: arr[j] += 4; break;
+ case 4: arr[j] += 5; break;
+ default: arr[j] += 99;
+ }
+ }
+
+ for (i = 0; i < 32; i++)
+ assert (arr[i] == i + (i % 5) + 1);
+}
+
+
+/* Test worker-single/vector-partitioned mode. */
+
+void t25()
+{
+ int arr[32 * 32], i;
+
+ for (i = 0; i < 32 * 32; i++)
+ arr[i] = i;
+
+ #pragma acc parallel copy(arr) \
+ num_gangs(8) num_workers(8) vector_length(32)
+ {
+ int j;
+ #pragma acc loop gang
+ for (j = 0; j < 32; j++)
+ {
+ int k;
+ #pragma acc loop vector
+ for (k = 0; k < 32; k++)
+ {
+ #pragma acc atomic
+ arr[j * 32 + k]++;
+ }
+ }
+ }
+
+ for (i = 0; i < 32 * 32; i++)
+ assert (arr[i] == i + 1);
+}
+
+
+/* Test worker-single, vector-partitioned, gang-redundant mode. */
+
+#define ACTUAL_GANGS 8
+void t27()
+{
+ int n, arr[32], i;
+ int ondev;
+
+ for (i = 0; i < 32; i++)
+ arr[i] = 0;
+
+ n = 0;
+
+ #pragma acc parallel copy(n, arr) copyout(ondev) \
+ num_gangs(ACTUAL_GANGS) num_workers(8) vector_length(32)
+ {
+ int j;
+
+ ondev = acc_on_device (acc_device_not_host);
+
+ #pragma acc atomic
+ n++;
+
+ #pragma acc loop vector
+ for (j = 0; j < 32; j++)
+ {
+ #pragma acc atomic
+ arr[j] += 1;
+ }
+
+ #pragma acc atomic
+ n++;
+ }
+
+ int m = ondev ? ACTUAL_GANGS : 1;
+
+ assert (n == m * 2);
+
+ for (i = 0; i < 32; i++)
+ assert (arr[i] == m);
+}
+#undef ACTUAL_GANGS
+
+
+/* Check if worker-single variables get broadcastd to vectors. */
+
+#pragma acc routine
+float t28_routine ()
+{
+ return 2.71;
+}
+
+#define N 32
+void t28()
+{
+ float threads[N], v1 = 3.14;
+
+ for (int i = 0; i < N; i++)
+ threads[i] = -1;
+
+#pragma acc parallel num_gangs (1) vector_length (32) copy (v1)
+ {
+ float val = t28_routine ();
+
+#pragma acc loop vector
+ for (int i = 0; i < N; i++)
+ threads[i] = val + v1*i;
+ }
+
+ for (int i = 0; i < N; i++)
+ assert (fabs (threads[i] - (t28_routine () + v1*i)) < 0.0001);
+}
+#undef N
+
+
+int main()
+{
+ t1();
+ t2();
+ t4();
+ t5();
+ t7();
+ t8();
+ t9();
+ t10();
+ t11();
+ t12();
+ t13();
+ t16();
+ t17();
+ t18();
+ t19();
+ t20();
+ t21();
+ t22();
+ t23();
+ t24();
+ t25();
+ t27();
+ t28();
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/private-variables.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/private-variables.c
new file mode 100644
index 0000000..53f03d1
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/private-variables.c
@@ -0,0 +1,953 @@
+#include <assert.h>
+#include <openacc.h>
+
+typedef struct {
+ int x, y;
+} vec2;
+
+typedef struct {
+ int x, y, z;
+ int attr[13];
+} vec3_attr;
+
+
+/* Test of gang-private variables declared in local scope with parallel
+ directive. */
+
+void local_g_1()
+{
+ int i, arr[32];
+
+ for (i = 0; i < 32; i++)
+ arr[i] = 3;
+
+ #pragma acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
+ {
+ int x;
+
+ #pragma acc loop gang(static:1)
+ for (i = 0; i < 32; i++)
+ x = i * 2;
+
+ #pragma acc loop gang(static:1)
+ for (i = 0; i < 32; i++)
+ {
+ if (acc_on_device (acc_device_host))
+ x = i * 2;
+ arr[i] += x;
+ }
+ }
+
+ for (i = 0; i < 32; i++)
+ assert (arr[i] == 3 + i * 2);
+}
+
+
+/* Test of worker-private variables declared in a local scope, broadcasting
+ to vector-partitioned mode. Back-to-back worker loops. */
+
+void local_w_1()
+{
+ int i, arr[32 * 32 * 32];
+
+ for (i = 0; i < 32 * 32 * 32; i++)
+ arr[i] = i;
+
+ #pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32)
+ {
+ int j;
+
+ #pragma acc loop gang
+ for (i = 0; i < 32; i++)
+ {
+ #pragma acc loop worker
+ for (j = 0; j < 32; j++)
+ {
+ int k;
+ int x = i ^ j * 3;
+
+ #pragma acc loop vector
+ for (k = 0; k < 32; k++)
+ arr[i * 1024 + j * 32 + k] += x * k;
+ }
+
+ #pragma acc loop worker
+ for (j = 0; j < 32; j++)
+ {
+ int k;
+ int x = i | j * 5;
+
+ #pragma acc loop vector
+ for (k = 0; k < 32; k++)
+ arr[i * 1024 + j * 32 + k] += x * k;
+ }
+ }
+ }
+
+ for (i = 0; i < 32; i++)
+ for (int j = 0; j < 32; j++)
+ for (int k = 0; k < 32; k++)
+ {
+ int idx = i * 1024 + j * 32 + k;
+ assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
+ }
+}
+
+
+/* Test of worker-private variables declared in a local scope, broadcasting
+ to vector-partitioned mode. Successive vector loops. */
+
+void local_w_2()
+{
+ int i, arr[32 * 32 * 32];
+
+ for (i = 0; i < 32 * 32 * 32; i++)
+ arr[i] = i;
+
+ #pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32)
+ {
+ int j;
+
+ #pragma acc loop gang
+ for (i = 0; i < 32; i++)
+ {
+ #pragma acc loop worker
+ for (j = 0; j < 32; j++)
+ {
+ int k;
+ int x = i ^ j * 3;
+
+ #pragma acc loop vector
+ for (k = 0; k < 32; k++)
+ arr[i * 1024 + j * 32 + k] += x * k;
+
+ x = i | j * 5;
+
+ #pragma acc loop vector
+ for (k = 0; k < 32; k++)
+ arr[i * 1024 + j * 32 + k] += x * k;
+ }
+ }
+ }
+
+ for (i = 0; i < 32; i++)
+ for (int j = 0; j < 32; j++)
+ for (int k = 0; k < 32; k++)
+ {
+ int idx = i * 1024 + j * 32 + k;
+ assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
+ }
+}
+
+
+/* Test of worker-private variables declared in a local scope, broadcasting
+ to vector-partitioned mode. Aggregate worker variable. */
+
+void local_w_3()
+{
+ int i, arr[32 * 32 * 32];
+
+ for (i = 0; i < 32 * 32 * 32; i++)
+ arr[i] = i;
+
+ #pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32)
+ {
+ int j;
+
+ #pragma acc loop gang
+ for (i = 0; i < 32; i++)
+ {
+ #pragma acc loop worker
+ for (j = 0; j < 32; j++)
+ {
+ int k;
+ vec2 pt;
+
+ pt.x = i ^ j * 3;
+ pt.y = i | j * 5;
+
+ #pragma acc loop vector
+ for (k = 0; k < 32; k++)
+ arr[i * 1024 + j * 32 + k] += pt.x * k;
+
+ #pragma acc loop vector
+ for (k = 0; k < 32; k++)
+ arr[i * 1024 + j * 32 + k] += pt.y * k;
+ }
+ }
+ }
+
+ for (i = 0; i < 32; i++)
+ for (int j = 0; j < 32; j++)
+ for (int k = 0; k < 32; k++)
+ {
+ int idx = i * 1024 + j * 32 + k;
+ assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
+ }
+}
+
+
+/* Test of worker-private variables declared in a local scope, broadcasting
+ to vector-partitioned mode. Addressable worker variable. */
+
+void local_w_4()
+{
+ int i, arr[32 * 32 * 32];
+
+ for (i = 0; i < 32 * 32 * 32; i++)
+ arr[i] = i;
+
+ #pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32)
+ {
+ int j;
+
+ #pragma acc loop gang
+ for (i = 0; i < 32; i++)
+ {
+ #pragma acc loop worker
+ for (j = 0; j < 32; j++)
+ {
+ int k;
+ vec2 pt, *ptp;
+
+ ptp = &pt;
+
+ pt.x = i ^ j * 3;
+
+ #pragma acc loop vector
+ for (k = 0; k < 32; k++)
+ arr[i * 1024 + j * 32 + k] += ptp->x * k;
+
+ ptp->y = i | j * 5;
+
+ #pragma acc loop vector
+ for (k = 0; k < 32; k++)
+ arr[i * 1024 + j * 32 + k] += pt.y * k;
+ }
+ }
+ }
+
+ for (i = 0; i < 32; i++)
+ for (int j = 0; j < 32; j++)
+ for (int k = 0; k < 32; k++)
+ {
+ int idx = i * 1024 + j * 32 + k;
+ assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
+ }
+}
+
+
+/* Test of worker-private variables declared in a local scope, broadcasting
+ to vector-partitioned mode. Array worker variable. */
+
+void local_w_5()
+{
+ int i, arr[32 * 32 * 32];
+
+ for (i = 0; i < 32 * 32 * 32; i++)
+ arr[i] = i;
+
+ #pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32)
+ {
+ int j;
+
+ #pragma acc loop gang
+ for (i = 0; i < 32; i++)
+ {
+ #pragma acc loop worker
+ for (j = 0; j < 32; j++)
+ {
+ int k;
+ int pt[2];
+
+ pt[0] = i ^ j * 3;
+
+ #pragma acc loop vector
+ for (k = 0; k < 32; k++)
+ arr[i * 1024 + j * 32 + k] += pt[0] * k;
+
+ pt[1] = i | j * 5;
+
+ #pragma acc loop vector
+ for (k = 0; k < 32; k++)
+ arr[i * 1024 + j * 32 + k] += pt[1] * k;
+ }
+ }
+ }
+
+ for (i = 0; i < 32; i++)
+ for (int j = 0; j < 32; j++)
+ for (int k = 0; k < 32; k++)
+ {
+ int idx = i * 1024 + j * 32 + k;
+ assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
+ }
+}
+
+
+/* Test of gang-private variables declared on loop directive. */
+
+void loop_g_1()
+{
+ int x = 5, i, arr[32];
+
+ for (i = 0; i < 32; i++)
+ arr[i] = i;
+
+ #pragma acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
+ {
+ #pragma acc loop gang private(x)
+ for (i = 0; i < 32; i++)
+ {
+ x = i * 2;
+ arr[i] += x;
+ }
+ }
+
+ for (i = 0; i < 32; i++)
+ assert (arr[i] == i * 3);
+}
+
+
+/* Test of gang-private variables declared on loop directive, with broadcasting
+ to partitioned workers. */
+
+void loop_g_2()
+{
+ int x = 5, i, arr[32 * 32];
+
+ for (i = 0; i < 32 * 32; i++)
+ arr[i] = i;
+
+ #pragma acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
+ {
+ #pragma acc loop gang private(x)
+ for (i = 0; i < 32; i++)
+ {
+ x = i * 2;
+
+ #pragma acc loop worker
+ for (int j = 0; j < 32; j++)
+ arr[i * 32 + j] += x;
+ }
+ }
+
+ for (i = 0; i < 32 * 32; i++)
+ assert (arr[i] == i + (i / 32) * 2);
+}
+
+
+/* Test of gang-private variables declared on loop directive, with broadcasting
+ to partitioned vectors. */
+
+void loop_g_3()
+{
+ int x = 5, i, arr[32 * 32];
+
+ for (i = 0; i < 32 * 32; i++)
+ arr[i] = i;
+
+ #pragma acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
+ {
+ #pragma acc loop gang private(x)
+ for (i = 0; i < 32; i++)
+ {
+ x = i * 2;
+
+ #pragma acc loop vector
+ for (int j = 0; j < 32; j++)
+ arr[i * 32 + j] += x;
+ }
+ }
+
+ for (i = 0; i < 32 * 32; i++)
+ assert (arr[i] == i + (i / 32) * 2);
+}
+
+
+/* Test of gang-private addressable variable declared on loop directive, with
+ broadcasting to partitioned workers. */
+
+void loop_g_4()
+{
+ int x = 5, i, arr[32 * 32];
+
+ for (i = 0; i < 32 * 32; i++)
+ arr[i] = i;
+
+ #pragma acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
+ {
+ #pragma acc loop gang private(x)
+ for (i = 0; i < 32; i++)
+ {
+ int *p = &x;
+
+ x = i * 2;
+
+ #pragma acc loop worker
+ for (int j = 0; j < 32; j++)
+ arr[i * 32 + j] += x;
+
+ (*p)--;
+ }
+ }
+
+ for (i = 0; i < 32 * 32; i++)
+ assert (arr[i] == i + (i / 32) * 2);
+}
+
+
+/* Test of gang-private array variable declared on loop directive, with
+ broadcasting to partitioned workers. */
+
+void loop_g_5()
+{
+ int x[8], i, arr[32 * 32];
+
+ for (i = 0; i < 32 * 32; i++)
+ arr[i] = i;
+
+ #pragma acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
+ {
+ #pragma acc loop gang private(x)
+ for (i = 0; i < 32; i++)
+ {
+ for (int j = 0; j < 8; j++)
+ x[j] = j * 2;
+
+ #pragma acc loop worker
+ for (int j = 0; j < 32; j++)
+ arr[i * 32 + j] += x[j % 8];
+ }
+ }
+
+ for (i = 0; i < 32 * 32; i++)
+ assert (arr[i] == i + (i % 8) * 2);
+}
+
+
+/* Test of gang-private aggregate variable declared on loop directive, with
+ broadcasting to partitioned workers. */
+
+void loop_g_6()
+{
+ int i, arr[32 * 32];
+ vec3_attr pt;
+
+ for (i = 0; i < 32 * 32; i++)
+ arr[i] = i;
+
+ #pragma acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
+ {
+ #pragma acc loop gang private(pt)
+ for (i = 0; i < 32; i++)
+ {
+ pt.x = i;
+ pt.y = i * 2;
+ pt.z = i * 4;
+ pt.attr[5] = i * 6;
+
+ #pragma acc loop worker
+ for (int j = 0; j < 32; j++)
+ arr[i * 32 + j] += pt.x + pt.y + pt.z + pt.attr[5];
+ }
+ }
+
+ for (i = 0; i < 32 * 32; i++)
+ assert (arr[i] == i + (i / 32) * 13);
+}
+
+
+/* Test of vector-private variables declared on loop directive. */
+
+void loop_v_1()
+{
+ int x, i, arr[32 * 32 * 32];
+
+ for (i = 0; i < 32 * 32 * 32; i++)
+ arr[i] = i;
+
+ #pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32)
+ {
+ int j;
+
+ #pragma acc loop gang
+ for (i = 0; i < 32; i++)
+ {
+ #pragma acc loop worker
+ for (j = 0; j < 32; j++)
+ {
+ int k;
+
+ #pragma acc loop vector private(x)
+ for (k = 0; k < 32; k++)
+ {
+ x = i ^ j * 3;
+ arr[i * 1024 + j * 32 + k] += x * k;
+ }
+
+ #pragma acc loop vector private(x)
+ for (k = 0; k < 32; k++)
+ {
+ x = i | j * 5;
+ arr[i * 1024 + j * 32 + k] += x * k;
+ }
+ }
+ }
+ }
+
+ for (i = 0; i < 32; i++)
+ for (int j = 0; j < 32; j++)
+ for (int k = 0; k < 32; k++)
+ {
+ int idx = i * 1024 + j * 32 + k;
+ assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
+ }
+}
+
+
+/* Test of vector-private variables declared on loop directive. Array type. */
+
+void loop_v_2()
+{
+ int pt[2], i, arr[32 * 32 * 32];
+
+ for (i = 0; i < 32 * 32 * 32; i++)
+ arr[i] = i;
+
+ #pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32)
+ {
+ int j;
+
+ #pragma acc loop gang
+ for (i = 0; i < 32; i++)
+ {
+ #pragma acc loop worker
+ for (j = 0; j < 32; j++)
+ {
+ int k;
+
+ #pragma acc loop vector private(pt)
+ for (k = 0; k < 32; k++)
+ {
+ pt[0] = i ^ j * 3;
+ pt[1] = i | j * 5;
+ arr[i * 1024 + j * 32 + k] += pt[0] * k;
+ arr[i * 1024 + j * 32 + k] += pt[1] * k;
+ }
+ }
+ }
+ }
+
+ for (i = 0; i < 32; i++)
+ for (int j = 0; j < 32; j++)
+ for (int k = 0; k < 32; k++)
+ {
+ int idx = i * 1024 + j * 32 + k;
+ assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
+ }
+}
+
+
+/* Test of worker-private variables declared on a loop directive. */
+
+void loop_w_1()
+{
+ int x = 5, i, arr[32 * 32];
+
+ for (i = 0; i < 32 * 32; i++)
+ arr[i] = i;
+
+ #pragma acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
+ {
+ int j;
+
+ #pragma acc loop gang
+ for (i = 0; i < 32; i++)
+ {
+ #pragma acc loop worker private(x)
+ for (j = 0; j < 32; j++)
+ {
+ x = i ^ j * 3;
+ /* Try to ensure 'x' accesses doesn't get optimized into a
+ temporary. */
+ __asm__ __volatile__ ("");
+ arr[i * 32 + j] += x;
+ }
+ }
+ }
+
+ for (i = 0; i < 32 * 32; i++)
+ assert (arr[i] == i + ((i / 32) ^ (i % 32) * 3));
+}
+
+
+/* Test of worker-private variables declared on a loop directive, broadcasting
+ to vector-partitioned mode. */
+
+void loop_w_2()
+{
+ int x = 5, i, arr[32 * 32 * 32];
+
+ for (i = 0; i < 32 * 32 * 32; i++)
+ arr[i] = i;
+
+ #pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32)
+ {
+ int j;
+
+ #pragma acc loop gang
+ for (i = 0; i < 32; i++)
+ {
+ #pragma acc loop worker private(x)
+ for (j = 0; j < 32; j++)
+ {
+ int k;
+ x = i ^ j * 3;
+
+ #pragma acc loop vector
+ for (k = 0; k < 32; k++)
+ arr[i * 1024 + j * 32 + k] += x * k;
+ }
+ }
+ }
+
+ for (i = 0; i < 32; i++)
+ for (int j = 0; j < 32; j++)
+ for (int k = 0; k < 32; k++)
+ {
+ int idx = i * 1024 + j * 32 + k;
+ assert (arr[idx] == idx + (i ^ j * 3) * k);
+ }
+}
+
+
+/* Test of worker-private variables declared on a loop directive, broadcasting
+ to vector-partitioned mode. Back-to-back worker loops. */
+
+void loop_w_3()
+{
+ int x = 5, i, arr[32 * 32 * 32];
+
+ for (i = 0; i < 32 * 32 * 32; i++)
+ arr[i] = i;
+
+ #pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32)
+ {
+ int j;
+
+ #pragma acc loop gang
+ for (i = 0; i < 32; i++)
+ {
+ #pragma acc loop worker private(x)
+ for (j = 0; j < 32; j++)
+ {
+ int k;
+ x = i ^ j * 3;
+
+ #pragma acc loop vector
+ for (k = 0; k < 32; k++)
+ arr[i * 1024 + j * 32 + k] += x * k;
+ }
+
+ #pragma acc loop worker private(x)
+ for (j = 0; j < 32; j++)
+ {
+ int k;
+ x = i | j * 5;
+
+ #pragma acc loop vector
+ for (k = 0; k < 32; k++)
+ arr[i * 1024 + j * 32 + k] += x * k;
+ }
+ }
+ }
+
+ for (i = 0; i < 32; i++)
+ for (int j = 0; j < 32; j++)
+ for (int k = 0; k < 32; k++)
+ {
+ int idx = i * 1024 + j * 32 + k;
+ assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
+ }
+}
+
+
+/* Test of worker-private variables declared on a loop directive, broadcasting
+ to vector-partitioned mode. Successive vector loops. */
+
+void loop_w_4()
+{
+ int x = 5, i, arr[32 * 32 * 32];
+
+ for (i = 0; i < 32 * 32 * 32; i++)
+ arr[i] = i;
+
+ #pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32)
+ {
+ int j;
+
+ #pragma acc loop gang
+ for (i = 0; i < 32; i++)
+ {
+ #pragma acc loop worker private(x)
+ for (j = 0; j < 32; j++)
+ {
+ int k;
+ x = i ^ j * 3;
+
+ #pragma acc loop vector
+ for (k = 0; k < 32; k++)
+ arr[i * 1024 + j * 32 + k] += x * k;
+
+ x = i | j * 5;
+
+ #pragma acc loop vector
+ for (k = 0; k < 32; k++)
+ arr[i * 1024 + j * 32 + k] += x * k;
+ }
+ }
+ }
+
+ for (i = 0; i < 32; i++)
+ for (int j = 0; j < 32; j++)
+ for (int k = 0; k < 32; k++)
+ {
+ int idx = i * 1024 + j * 32 + k;
+ assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
+ }
+}
+
+
+/* Test of worker-private variables declared on a loop directive, broadcasting
+ to vector-partitioned mode. Addressable worker variable. */
+
+void loop_w_5()
+{
+ int x = 5, i, arr[32 * 32 * 32];
+
+ for (i = 0; i < 32 * 32 * 32; i++)
+ arr[i] = i;
+
+ #pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32)
+ {
+ int j;
+
+ #pragma acc loop gang
+ for (i = 0; i < 32; i++)
+ {
+ #pragma acc loop worker private(x)
+ for (j = 0; j < 32; j++)
+ {
+ int k;
+ int *p = &x;
+
+ x = i ^ j * 3;
+
+ #pragma acc loop vector
+ for (k = 0; k < 32; k++)
+ arr[i * 1024 + j * 32 + k] += x * k;
+
+ *p = i | j * 5;
+
+ #pragma acc loop vector
+ for (k = 0; k < 32; k++)
+ arr[i * 1024 + j * 32 + k] += x * k;
+ }
+ }
+ }
+
+ for (i = 0; i < 32; i++)
+ for (int j = 0; j < 32; j++)
+ for (int k = 0; k < 32; k++)
+ {
+ int idx = i * 1024 + j * 32 + k;
+ assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
+ }
+}
+
+
+/* Test of worker-private variables declared on a loop directive, broadcasting
+ to vector-partitioned mode. Aggregate worker variable. */
+
+void loop_w_6()
+{
+ int i, arr[32 * 32 * 32];
+ vec2 pt;
+
+ for (i = 0; i < 32 * 32 * 32; i++)
+ arr[i] = i;
+
+ #pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32)
+ {
+ int j;
+
+ #pragma acc loop gang
+ for (i = 0; i < 32; i++)
+ {
+ #pragma acc loop worker private(pt)
+ for (j = 0; j < 32; j++)
+ {
+ int k;
+
+ pt.x = i ^ j * 3;
+ pt.y = i | j * 5;
+
+ #pragma acc loop vector
+ for (k = 0; k < 32; k++)
+ arr[i * 1024 + j * 32 + k] += pt.x * k;
+
+ #pragma acc loop vector
+ for (k = 0; k < 32; k++)
+ arr[i * 1024 + j * 32 + k] += pt.y * k;
+ }
+ }
+ }
+
+ for (i = 0; i < 32; i++)
+ for (int j = 0; j < 32; j++)
+ for (int k = 0; k < 32; k++)
+ {
+ int idx = i * 1024 + j * 32 + k;
+ assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
+ }
+}
+
+
+/* Test of worker-private variables declared on loop directive, broadcasting
+ to vector-partitioned mode. Array worker variable. */
+
+void loop_w_7()
+{
+ int i, arr[32 * 32 * 32];
+ int pt[2];
+
+ for (i = 0; i < 32 * 32 * 32; i++)
+ arr[i] = i;
+
+ /* "pt" is treated as "present_or_copy" on the parallel directive because it
+ is an array variable. */
+ #pragma acc parallel copy(arr) num_gangs(32) num_workers(32) vector_length(32)
+ {
+ int j;
+
+ #pragma acc loop gang
+ for (i = 0; i < 32; i++)
+ {
+ /* But here, it is made private per-worker. */
+ #pragma acc loop worker private(pt)
+ for (j = 0; j < 32; j++)
+ {
+ int k;
+
+ pt[0] = i ^ j * 3;
+
+ #pragma acc loop vector
+ for (k = 0; k < 32; k++)
+ arr[i * 1024 + j * 32 + k] += pt[0] * k;
+
+ pt[1] = i | j * 5;
+
+ #pragma acc loop vector
+ for (k = 0; k < 32; k++)
+ arr[i * 1024 + j * 32 + k] += pt[1] * k;
+ }
+ }
+ }
+
+ for (i = 0; i < 32; i++)
+ for (int j = 0; j < 32; j++)
+ for (int k = 0; k < 32; k++)
+ {
+ int idx = i * 1024 + j * 32 + k;
+ assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
+ }
+}
+
+
+/* Test of gang-private variables declared on the parallel directive. */
+
+void parallel_g_1()
+{
+ int x = 5, i, arr[32];
+
+ for (i = 0; i < 32; i++)
+ arr[i] = 3;
+
+ #pragma acc parallel private(x) copy(arr) num_gangs(32) num_workers(8) vector_length(32)
+ {
+ #pragma acc loop gang(static:1)
+ for (i = 0; i < 32; i++)
+ x = i * 2;
+
+ #pragma acc loop gang(static:1)
+ for (i = 0; i < 32; i++)
+ {
+ if (acc_on_device (acc_device_host))
+ x = i * 2;
+ arr[i] += x;
+ }
+ }
+
+ for (i = 0; i < 32; i++)
+ assert (arr[i] == 3 + i * 2);
+}
+
+
+/* Test of gang-private array variable declared on the parallel directive. */
+
+void parallel_g_2()
+{
+ int x[32], i, arr[32 * 32];
+
+ for (i = 0; i < 32 * 32; i++)
+ arr[i] = i;
+
+ #pragma acc parallel private(x) copy(arr) num_gangs(32) num_workers(2) vector_length(32)
+ {
+ #pragma acc loop gang
+ for (i = 0; i < 32; i++)
+ {
+ int j;
+ for (j = 0; j < 32; j++)
+ x[j] = j * 2;
+
+ #pragma acc loop worker
+ for (j = 0; j < 32; j++)
+ arr[i * 32 + j] += x[31 - j];
+ }
+ }
+
+ for (i = 0; i < 32 * 32; i++)
+ assert (arr[i] == i + (31 - (i % 32)) * 2);
+}
+
+
+int main ()
+{
+ local_g_1();
+ local_w_1();
+ local_w_2();
+ local_w_3();
+ local_w_4();
+ local_w_5();
+ loop_g_1();
+ loop_g_2();
+ loop_g_3();
+ loop_g_4();
+ loop_g_5();
+ loop_g_6();
+ loop_v_1();
+ loop_v_2();
+ loop_w_1();
+ loop_w_2();
+ loop_w_3();
+ loop_w_4();
+ loop_w_5();
+ loop_w_6();
+ loop_w_7();
+ parallel_g_1();
+ parallel_g_2();
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-7.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-7.c
new file mode 100644
index 0000000..b23c758
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-7.c
@@ -0,0 +1,129 @@
+/* Tests of reduction on loop directive. */
+
+#include <assert.h>
+
+
+/* Test of reduction on loop directive (gangs, non-private reduction
+ variable). */
+
+void g_np_1()
+{
+ int i, arr[1024], res = 0, hres = 0;
+
+ for (i = 0; i < 1024; i++)
+ arr[i] = i;
+
+ #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
+ copy(res)
+ {
+ #pragma acc loop gang reduction(+:res)
+ for (i = 0; i < 1024; i++)
+ res += arr[i];
+ }
+
+ for (i = 0; i < 1024; i++)
+ hres += arr[i];
+
+ assert (res == hres);
+
+ res = hres = 1;
+
+ #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
+ copy(res)
+ {
+ #pragma acc loop gang reduction(*:res)
+ for (i = 0; i < 12; i++)
+ res *= arr[i];
+ }
+
+ for (i = 0; i < 12; i++)
+ hres *= arr[i];
+
+ assert (res == hres);
+}
+
+
+/* Test of reduction on loop directive (gangs and vectors, non-private
+ reduction variable). */
+
+void gv_np_1()
+{
+ int i, arr[1024], res = 0, hres = 0;
+
+ for (i = 0; i < 1024; i++)
+ arr[i] = i;
+
+ #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
+ copy(res)
+ {
+ #pragma acc loop gang vector reduction(+:res)
+ for (i = 0; i < 1024; i++)
+ res += arr[i];
+ }
+
+ for (i = 0; i < 1024; i++)
+ hres += arr[i];
+
+ assert (res == hres);
+}
+
+
+/* Test of reduction on loop directive (gangs and workers, non-private
+ reduction variable). */
+
+void gw_np_1()
+{
+ int i, arr[1024], res = 0, hres = 0;
+
+ for (i = 0; i < 1024; i++)
+ arr[i] = i;
+
+ #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
+ copy(res)
+ {
+ #pragma acc loop gang worker reduction(+:res)
+ for (i = 0; i < 1024; i++)
+ res += arr[i];
+ }
+
+ for (i = 0; i < 1024; i++)
+ hres += arr[i];
+
+ assert (res == hres);
+}
+
+
+/* Test of reduction on loop directive (gangs, workers and vectors, non-private
+ reduction variable). */
+
+void gwv_np_1()
+{
+ int i, arr[1024], res = 0, hres = 0;
+
+ for (i = 0; i < 1024; i++)
+ arr[i] = i;
+
+ #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
+ copy(res)
+ {
+ #pragma acc loop gang worker vector reduction(+:res)
+ for (i = 0; i < 1024; i++)
+ res += arr[i];
+ }
+
+ for (i = 0; i < 1024; i++)
+ hres += arr[i];
+
+ assert (res == hres);
+}
+
+
+int main()
+{
+ g_np_1();
+ gv_np_1();
+ gw_np_1();
+ gwv_np_1();
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-1.c
new file mode 100644
index 0000000..f112457
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-1.c
@@ -0,0 +1,88 @@
+// { dg-additional-options "-fno-exceptions" }
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#pragma acc routine
+int fact(int n)
+{
+ if (n == 0 || n == 1)
+ return 1;
+ else
+ return n * fact (n - 1);
+}
+
+int main()
+{
+ int *s, *g, *w, *v, *gw, *gv, *wv, *gwv, i, n = 10;
+
+ s = (int *) malloc (sizeof (int) * n);
+ g = (int *) malloc (sizeof (int) * n);
+ w = (int *) malloc (sizeof (int) * n);
+ v = (int *) malloc (sizeof (int) * n);
+ gw = (int *) malloc (sizeof (int) * n);
+ gv = (int *) malloc (sizeof (int) * n);
+ wv = (int *) malloc (sizeof (int) * n);
+ gwv = (int *) malloc (sizeof (int) * n);
+
+#pragma acc parallel loop async copyout(s[0:n]) seq
+ for (i = 0; i < n; i++)
+ s[i] = fact (i);
+
+#pragma acc parallel loop async copyout(g[0:n]) gang
+ for (i = 0; i < n; i++)
+ g[i] = fact (i);
+
+#pragma acc parallel loop async copyout(w[0:n]) worker
+ for (i = 0; i < n; i++)
+ w[i] = fact (i);
+
+#pragma acc parallel loop async copyout(v[0:n]) vector
+ for (i = 0; i < n; i++)
+ v[i] = fact (i);
+
+#pragma acc parallel loop async copyout(gw[0:n]) gang worker
+ for (i = 0; i < n; i++)
+ gw[i] = fact (i);
+
+#pragma acc parallel loop async copyout(gv[0:n]) gang vector
+ for (i = 0; i < n; i++)
+ gv[i] = fact (i);
+
+#pragma acc parallel loop async copyout(wv[0:n]) worker vector
+ for (i = 0; i < n; i++)
+ wv[i] = fact (i);
+
+#pragma acc parallel loop async copyout(gwv[0:n]) gang worker vector
+ for (i = 0; i < n; i++)
+ gwv[i] = fact (i);
+
+#pragma acc wait
+
+ for (i = 0; i < n; i++)
+ if (s[i] != fact (i))
+ abort ();
+ for (i = 0; i < n; i++)
+ if (g[i] != s[i])
+ abort ();
+ for (i = 0; i < n; i++)
+ if (w[i] != s[i])
+ abort ();
+ for (i = 0; i < n; i++)
+ if (v[i] != s[i])
+ abort ();
+ for (i = 0; i < n; i++)
+ if (gw[i] != s[i])
+ abort ();
+ for (i = 0; i < n; i++)
+ if (gv[i] != s[i])
+ abort ();
+ for (i = 0; i < n; i++)
+ if (wv[i] != s[i])
+ abort ();
+ for (i = 0; i < n; i++)
+ if (gwv[i] != s[i])
+ abort ();
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-4.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-4.c
new file mode 100644
index 0000000..d6ff44d
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-4.c
@@ -0,0 +1,123 @@
+#include <stdlib.h>
+#include <stdio.h>
+
+#define M 8
+#define N 32
+
+#pragma acc routine vector
+void
+vector (int *a)
+{
+ int i;
+
+#pragma acc loop vector
+ for (i = 0; i < N; i++)
+ a[i] -= a[i];
+}
+
+#pragma acc routine worker
+void
+worker (int *b)
+{
+ int i, j;
+
+#pragma acc loop worker
+ for (i = 0; i < N; i++)
+ {
+#pragma acc loop vector
+ for (j = 0; j < M; j++)
+ b[i * M + j] += b[i * M + j];
+ }
+}
+
+#pragma acc routine gang
+void
+gang (int *a)
+{
+ int i;
+
+#pragma acc loop gang worker vector
+ for (i = 0; i < N; i++)
+ a[i] -= i;
+}
+
+#pragma acc routine seq
+void
+seq (int *a)
+{
+ int i;
+
+ for (i = 0; i < N; i++)
+ a[i] += 1;
+}
+
+int
+main(int argc, char **argv)
+{
+ int i;
+ int a[N];
+ int b[M * N];
+
+ i = 0;
+
+ for (i = 0; i < N; i++)
+ a[i] = 0;
+
+#pragma acc parallel copy (a[0:N])
+ {
+#pragma acc loop seq
+ for (i = 0; i < N; i++)
+ seq (&a[0]);
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != N)
+ abort ();
+ }
+
+#pragma acc parallel copy (a[0:N])
+ {
+#pragma acc loop seq
+ for (i = 0; i < N; i++)
+ gang (&a[0]);
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != N + (N * (-1 * i)))
+ abort ();
+ }
+
+ for (i = 0; i < N; i++)
+ a[i] = i;
+
+#pragma acc parallel copy (b[0:M*N])
+ {
+ worker (&b[0]);
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != i)
+ abort ();
+ }
+
+ for (i = 0; i < N; i++)
+ a[i] = i;
+
+#pragma acc parallel copy (a[0:N])
+ {
+#pragma acc loop
+ for (i = 0; i < N; i++)
+ vector (&a[0]);
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != 0)
+ abort ();
+ }
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c
new file mode 100644
index 0000000..b5cbc90
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c
@@ -0,0 +1,76 @@
+/* This code uses nvptx inline assembly guarded with acc_on_device, which is
+ not optimized away at -O0, and then confuses the target assembler.
+ { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
+
+#include <stdio.h>
+#include <openacc.h>
+
+#define NUM_WORKERS 16
+#define NUM_VECTORS 32
+#define WIDTH 64
+#define HEIGHT 32
+
+#define WORK_ID(I,N) \
+ (acc_on_device (acc_device_nvidia) \
+ ? ({unsigned __r; \
+ __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (__r)); \
+ __r; }) : (I % N))
+#define VEC_ID(I,N) \
+ (acc_on_device (acc_device_nvidia) \
+ ? ({unsigned __r; \
+ __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (__r)); \
+ __r; }) : (I % N))
+
+#pragma acc routine worker
+void __attribute__ ((noinline))
+ WorkVec (int *ptr, int w, int h, int nw, int nv)
+{
+#pragma acc loop worker
+ for (int i = 0; i < h; i++)
+#pragma acc loop vector
+ for (int j = 0; j < w; j++)
+ ptr[i*w + j] = (WORK_ID (i, nw) << 8) | VEC_ID(j, nv);
+}
+
+int DoWorkVec (int nw)
+{
+ int ary[HEIGHT][WIDTH];
+ int err = 0;
+
+ for (int ix = 0; ix != HEIGHT; ix++)
+ for (int jx = 0; jx != WIDTH; jx++)
+ ary[ix][jx] = 0xdeadbeef;
+
+ printf ("spawning %d ...", nw); fflush (stdout);
+
+#pragma acc parallel num_workers(nw) vector_length (NUM_VECTORS) copy (ary)
+ {
+ WorkVec ((int *)ary, WIDTH, HEIGHT, nw, NUM_VECTORS);
+ }
+
+ for (int ix = 0; ix != HEIGHT; ix++)
+ for (int jx = 0; jx != WIDTH; jx++)
+ {
+ int exp = ((ix % nw) << 8) | (jx % NUM_VECTORS);
+
+ if (ary[ix][jx] != exp)
+ {
+ printf ("\nary[%d][%d] = %#x expected %#x", ix, jx,
+ ary[ix][jx], exp);
+ err = 1;
+ }
+ }
+ printf (err ? " failed\n" : " ok\n");
+
+ return err;
+}
+
+int main ()
+{
+ int err = 0;
+
+ for (int W = 1; W <= NUM_WORKERS; W <<= 1)
+ err |= DoWorkVec (W);
+
+ return err;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/update-1-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/update-1-2.c
deleted file mode 100644
index 82c3192..0000000
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/update-1-2.c
+++ /dev/null
@@ -1,361 +0,0 @@
-/* Copy of update-1.c with self exchanged with host for #pragma acc update. */
-
-/* { dg-do run } */
-/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */
-
-#include <openacc.h>
-#include <string.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdbool.h>
-
-int
-main (int argc, char **argv)
-{
- int N = 8;
- int NDIV2 = N / 2;
- float *a, *b, *c;
- float *d_a, *d_b, *d_c;
- int i;
-
- a = (float *) malloc (N * sizeof (float));
- b = (float *) malloc (N * sizeof (float));
- c = (float *) malloc (N * sizeof (float));
-
- d_a = (float *) acc_malloc (N * sizeof (float));
- d_b = (float *) acc_malloc (N * sizeof (float));
- d_c = (float *) acc_malloc (N * sizeof (float));
-
- for (i = 0; i < N; i++)
- {
- a[i] = 3.0;
- b[i] = 0.0;
- }
-
- acc_map_data (a, d_a, N * sizeof (float));
- acc_map_data (b, d_b, N * sizeof (float));
- acc_map_data (c, d_c, N * sizeof (float));
-
-#pragma acc update device (a[0:N], b[0:N])
-
-#pragma acc parallel present (a[0:N], b[0:N])
- {
- int ii;
-
- for (ii = 0; ii < N; ii++)
- b[ii] = a[ii];
- }
-
-#pragma acc update self (a[0:N], b[0:N])
-
- for (i = 0; i < N; i++)
- {
- if (a[i] != 3.0)
- abort ();
-
- if (b[i] != 3.0)
- abort ();
- }
-
- if (!acc_is_present (&a[0], (N * sizeof (float))))
- abort ();
-
- if (!acc_is_present (&b[0], (N * sizeof (float))))
- abort ();
-
- for (i = 0; i < N; i++)
- {
- a[i] = 5.0;
- b[i] = 1.0;
- }
-
-#pragma acc update device (a[0:N], b[0:N])
-
-#pragma acc parallel present (a[0:N], b[0:N])
- {
- int ii;
-
- for (ii = 0; ii < N; ii++)
- b[ii] = a[ii];
- }
-
-#pragma acc update self (a[0:N], b[0:N])
-
- for (i = 0; i < N; i++)
- {
- if (a[i] != 5.0)
- abort ();
-
- if (b[i] != 5.0)
- abort ();
- }
-
- if (!acc_is_present (&a[0], (N * sizeof (float))))
- abort ();
-
- if (!acc_is_present (&b[0], (N * sizeof (float))))
- abort ();
-
- for (i = 0; i < N; i++)
- {
- a[i] = 5.0;
- b[i] = 1.0;
- }
-
-#pragma acc update device (a[0:N], b[0:N])
-
-#pragma acc parallel present (a[0:N], b[0:N])
- {
- int ii;
-
- for (ii = 0; ii < N; ii++)
- b[ii] = a[ii];
- }
-
-#pragma acc update host (a[0:N], b[0:N])
-
- for (i = 0; i < N; i++)
- {
- if (a[i] != 5.0)
- abort ();
-
- if (b[i] != 5.0)
- abort ();
- }
-
- if (!acc_is_present (&a[0], (N * sizeof (float))))
- abort ();
-
- if (!acc_is_present (&b[0], (N * sizeof (float))))
- abort ();
-
- for (i = 0; i < N; i++)
- {
- a[i] = 6.0;
- b[i] = 0.0;
- }
-
-#pragma acc update device (a[0:N], b[0:N])
-
- for (i = 0; i < N; i++)
- {
- a[i] = 9.0;
- }
-
-#pragma acc parallel present (a[0:N], b[0:N])
- {
- int ii;
-
- for (ii = 0; ii < N; ii++)
- b[ii] = a[ii];
- }
-
-#pragma acc update self (a[0:N], b[0:N])
-
- for (i = 0; i < N; i++)
- {
- if (a[i] != 6.0)
- abort ();
-
- if (b[i] != 6.0)
- abort ();
- }
-
- if (!acc_is_present (&a[0], (N * sizeof (float))))
- abort ();
-
- if (!acc_is_present (&b[0], (N * sizeof (float))))
- abort ();
-
- for (i = 0; i < N; i++)
- {
- a[i] = 7.0;
- b[i] = 2.0;
- }
-
-#pragma acc update device (a[0:N], b[0:N])
-
- for (i = 0; i < N; i++)
- {
- a[i] = 9.0;
- }
-
-#pragma acc parallel present (a[0:N], b[0:N])
- {
- int ii;
-
- for (ii = 0; ii < N; ii++)
- b[ii] = a[ii];
- }
-
-#pragma acc update self (a[0:N], b[0:N])
-
- for (i = 0; i < N; i++)
- {
- if (a[i] != 7.0)
- abort ();
-
- if (b[i] != 7.0)
- abort ();
- }
-
- for (i = 0; i < N; i++)
- {
- a[i] = 9.0;
- }
-
-#pragma acc update device (a[0:N])
-
-#pragma acc parallel present (a[0:N], b[0:N])
- {
- int ii;
-
- for (ii = 0; ii < N; ii++)
- b[ii] = a[ii];
- }
-
-#pragma acc update self (a[0:N], b[0:N])
-
- for (i = 0; i < N; i++)
- {
- if (a[i] != 9.0)
- abort ();
-
- if (b[i] != 9.0)
- abort ();
- }
-
- if (!acc_is_present (&a[0], (N * sizeof (float))))
- abort ();
-
- if (!acc_is_present (&b[0], (N * sizeof (float))))
- abort ();
-
- for (i = 0; i < N; i++)
- {
- a[i] = 5.0;
- }
-
-#pragma acc update device (a[0:N])
-
- for (i = 0; i < N; i++)
- {
- a[i] = 6.0;
- }
-
-#pragma acc update device (a[0:NDIV2])
-
-#pragma acc parallel present (a[0:N], b[0:N])
- {
- int ii;
-
- for (ii = 0; ii < N; ii++)
- b[ii] = a[ii];
- }
-
-#pragma acc update self (a[0:N], b[0:N])
-
- for (i = 0; i < NDIV2; i++)
- {
- if (a[i] != 6.0)
- abort ();
-
- if (b[i] != 6.0)
- abort ();
- }
-
- for (i = NDIV2; i < N; i++)
- {
- if (a[i] != 5.0)
- abort ();
-
- if (b[i] != 5.0)
- abort ();
- }
-
- if (!acc_is_present (&a[0], (N * sizeof (float))))
- abort ();
-
- if (!acc_is_present (&b[0], (N * sizeof (float))))
- abort ();
-
- for (i = 0; i < N; i++)
- {
- a[i] = 0.0;
- }
-
-#pragma acc update device (a[0:4])
-
-#pragma acc parallel present (a[0:N])
- {
- int ii;
-
- for (ii = 0; ii < N; ii++)
- a[ii] = a[ii] + 1.0;
- }
-
-#pragma acc update self (a[4:4])
-
- for (i = 0; i < NDIV2; i++)
- {
- if (a[i] != 0.0)
- abort ();
- }
-
- for (i = NDIV2; i < N; i++)
- {
- if (a[i] != 6.0)
- abort ();
- }
-
-#pragma acc update self (a[0:4])
-
- for (i = 0; i < NDIV2; i++)
- {
- if (a[i] != 1.0)
- abort ();
- }
-
- for (i = NDIV2; i < N; i++)
- {
- if (a[i] != 6.0)
- abort ();
- }
-
- a[2] = 9;
- a[3] = 9;
- a[4] = 9;
- a[5] = 9;
-
-#pragma acc update device (a[2:4])
-
-#pragma acc parallel present (a[0:N])
- {
- int ii;
-
- for (ii = 0; ii < N; ii++)
- a[ii] = a[ii] + 1.0;
- }
-
-#pragma acc update self (a[2:4])
-
- for (i = 0; i < 2; i++)
- {
- if (a[i] != 1.0)
- abort ();
- }
-
- for (i = 2; i < 6; i++)
- {
- if (a[i] != 10.0)
- abort ();
- }
-
- for (i = 6; i < N; i++)
- {
- if (a[i] != 6.0)
- abort ();
- }
-
- return 0;
-}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-loop.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-loop.c
index 8a51ee3..807347f 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-loop.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-loop.c
@@ -20,7 +20,7 @@ main (void)
#pragma acc parallel vector_length (32) copyin (a,b) copyout (c)
{
-#pragma acc loop /* vector clause is missing, since it's not yet supported. */
+#pragma acc loop vector
for (unsigned int i = 0; i < n; i++)
c[i] = a[i] + b[i];
}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/worker-single-1a.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/worker-single-1a.c
deleted file mode 100644
index 99c6dfb..0000000
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/worker-single-1a.c
+++ /dev/null
@@ -1,28 +0,0 @@
-#include <assert.h>
-
-/* Test worker-single/vector-single mode. */
-
-int
-main (int argc, char *argv[])
-{
- int arr[32], i;
-
- for (i = 0; i < 32; i++)
- arr[i] = 0;
-
- #pragma acc parallel copy(arr) num_gangs(8) num_workers(8) vector_length(32)
- {
- int j;
- #pragma acc loop gang
- for (j = 0; j < 32; j++)
- {
- #pragma acc atomic
- arr[j]++;
- }
- }
-
- for (i = 0; i < 32; i++)
- assert (arr[i] == 1);
-
- return 0;
-}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/worker-single-4.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/worker-single-4.c
deleted file mode 100644
index 84080d0..0000000
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/worker-single-4.c
+++ /dev/null
@@ -1,28 +0,0 @@
-#include <assert.h>
-
-/* Test worker-single/vector-partitioned mode. */
-
-int
-main (int argc, char *argv[])
-{
- int arr[32], i;
-
- for (i = 0; i < 32; i++)
- arr[i] = i;
-
- #pragma acc parallel copy(arr) num_gangs(1) num_workers(8) vector_length(32)
- {
- int k;
- #pragma acc loop vector
- for (k = 0; k < 32; k++)
- {
- #pragma acc atomic
- arr[k]++;
- }
- }
-
- for (i = 0; i < 32; i++)
- assert (arr[i] == i + 1);
-
- return 0;
-}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/worker-single-6.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/worker-single-6.c
deleted file mode 100644
index cbc3e37..0000000
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/worker-single-6.c
+++ /dev/null
@@ -1,46 +0,0 @@
-#include <assert.h>
-
-#if defined(ACC_DEVICE_TYPE_host)
-#define ACTUAL_GANGS 1
-#else
-#define ACTUAL_GANGS 8
-#endif
-
-/* Test worker-single, vector-partitioned, gang-redundant mode. */
-
-int
-main (int argc, char *argv[])
-{
- int n, arr[32], i;
-
- for (i = 0; i < 32; i++)
- arr[i] = 0;
-
- n = 0;
-
- #pragma acc parallel copy(n, arr) num_gangs(ACTUAL_GANGS) num_workers(8) \
- vector_length(32)
- {
- int j;
-
- #pragma acc atomic
- n++;
-
- #pragma acc loop vector
- for (j = 0; j < 32; j++)
- {
- #pragma acc atomic
- arr[j] += 1;
- }
-
- #pragma acc atomic
- n++;
- }
-
- assert (n == ACTUAL_GANGS * 2);
-
- for (i = 0; i < 32; i++)
- assert (arr[i] == ACTUAL_GANGS);
-
- return 0;
-}
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/asyncwait-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/asyncwait-1.f90
index b6e637b..01728bd 100644
--- a/libgomp/testsuite/libgomp.oacc-fortran/asyncwait-1.f90
+++ b/libgomp/testsuite/libgomp.oacc-fortran/asyncwait-1.f90
@@ -132,4 +132,126 @@ program asyncwait
if (d(i) .ne. 1.0) call abort
if (e(i) .ne. 11.0) call abort
end do
+
+ a(:) = 3.0
+ b(:) = 0.0
+
+ !$acc data copy (a(1:N)) copy (b(1:N))
+
+ !$acc kernels async
+ !$acc loop
+ do i = 1, N
+ b(i) = a(i)
+ end do
+ !$acc end kernels
+
+ !$acc wait
+ !$acc end data
+
+ do i = 1, N
+ if (a(i) .ne. 3.0) call abort
+ if (b(i) .ne. 3.0) call abort
+ end do
+
+ a(:) = 2.0
+ b(:) = 0.0
+
+ !$acc data copy (a(1:N)) copy (b(1:N))
+
+ !$acc kernels async (1)
+ !$acc loop
+ do i = 1, N
+ b(i) = a(i)
+ end do
+ !$acc end kernels
+
+ !$acc wait (1)
+ !$acc end data
+
+ do i = 1, N
+ if (a(i) .ne. 2.0) call abort
+ if (b(i) .ne. 2.0) call abort
+ end do
+
+ a(:) = 3.0
+ b(:) = 0.0
+ c(:) = 0.0
+ d(:) = 0.0
+
+ !$acc data copy (a(1:N)) copy (b(1:N)) copy (c(1:N)) copy (d(1:N))
+
+ !$acc kernels async (1)
+ do i = 1, N
+ b(i) = (a(i) * a(i) * a(i)) / a(i)
+ end do
+ !$acc end kernels
+
+ !$acc kernels async (1)
+ do i = 1, N
+ c(i) = (a(i) * 4) / a(i)
+ end do
+ !$acc end kernels
+
+ !$acc kernels async (1)
+ !$acc loop
+ do i = 1, N
+ d(i) = ((a(i) * a(i) + a(i)) / a(i)) - a(i)
+ end do
+ !$acc end kernels
+
+ !$acc wait (1)
+ !$acc end data
+
+ do i = 1, N
+ if (a(i) .ne. 3.0) call abort
+ if (b(i) .ne. 9.0) call abort
+ if (c(i) .ne. 4.0) call abort
+ if (d(i) .ne. 1.0) call abort
+ end do
+
+ a(:) = 2.0
+ b(:) = 0.0
+ c(:) = 0.0
+ d(:) = 0.0
+ e(:) = 0.0
+
+ !$acc data copy (a(1:N), b(1:N), c(1:N), d(1:N), e(1:N))
+
+ !$acc kernels async (1)
+ do i = 1, N
+ b(i) = (a(i) * a(i) * a(i)) / a(i)
+ end do
+ !$acc end kernels
+
+ !$acc kernels async (1)
+ !$acc loop
+ do i = 1, N
+ c(i) = (a(i) * 4) / a(i)
+ end do
+ !$acc end kernels
+
+ !$acc kernels async (1)
+ !$acc loop
+ do i = 1, N
+ d(i) = ((a(i) * a(i) + a(i)) / a(i)) - a(i)
+ end do
+ !$acc end kernels
+
+ !$acc kernels wait (1) async (1)
+ !$acc loop
+ do i = 1, N
+ e(i) = a(i) + b(i) + c(i) + d(i)
+ end do
+ !$acc end kernels
+
+ !$acc wait (1)
+ !$acc end data
+
+ do i = 1, N
+ if (a(i) .ne. 2.0) call abort
+ if (b(i) .ne. 4.0) call abort
+ if (c(i) .ne. 4.0) call abort
+ if (d(i) .ne. 1.0) call abort
+ if (e(i) .ne. 11.0) call abort
+ end do
end program asyncwait
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/asyncwait-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/asyncwait-2.f90
index bade52b..fe131b6 100644
--- a/libgomp/testsuite/libgomp.oacc-fortran/asyncwait-2.f90
+++ b/libgomp/testsuite/libgomp.oacc-fortran/asyncwait-2.f90
@@ -1,6 +1,6 @@
! { dg-do run }
-program parallel_wait
+program asyncwait
integer, parameter :: N = 64
real, allocatable :: a(:), b(:), c(:)
integer i
@@ -33,8 +33,33 @@ program parallel_wait
do i = 1, N
if (c(i) .ne. 2.0) call abort
end do
+
+ !$acc kernels async (0)
+ !$acc loop
+ do i = 1, N
+ a(i) = 1
+ end do
+ !$acc end kernels
+
+ !$acc kernels async (1)
+ !$acc loop
+ do i = 1, N
+ b(i) = 1
+ end do
+ !$acc end kernels
+
+ !$acc kernels wait (0, 1)
+ !$acc loop
+ do i = 1, N
+ c(i) = a(i) + b(i)
+ end do
+ !$acc end kernels
+
+ do i = 1, N
+ if (c(i) .ne. 2.0) call abort
+ end do
deallocate (a)
deallocate (b)
deallocate (c)
-end program parallel_wait
+end program asyncwait
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/asyncwait-3.f90 b/libgomp/testsuite/libgomp.oacc-fortran/asyncwait-3.f90
index d48dc11..fa96a01 100644
--- a/libgomp/testsuite/libgomp.oacc-fortran/asyncwait-3.f90
+++ b/libgomp/testsuite/libgomp.oacc-fortran/asyncwait-3.f90
@@ -1,6 +1,6 @@
! { dg-do run }
-program parallel_wait
+program asyncwait
integer, parameter :: N = 64
real, allocatable :: a(:), b(:), c(:)
integer i
@@ -35,8 +35,35 @@ program parallel_wait
do i = 1, N
if (c(i) .ne. 2.0) call abort
end do
+
+ !$acc kernels async (0)
+ !$acc loop
+ do i = 1, N
+ a(i) = 1
+ end do
+ !$acc end kernels
+
+ !$acc kernels async (1)
+ !$acc loop
+ do i = 1, N
+ b(i) = 1
+ end do
+ !$acc end kernels
+
+ !$acc wait (0, 1)
+
+ !$acc kernels
+ !$acc loop
+ do i = 1, N
+ c(i) = a(i) + b(i)
+ end do
+ !$acc end kernels
+
+ do i = 1, N
+ if (c(i) .ne. 2.0) call abort
+ end do
deallocate (a)
deallocate (b)
deallocate (c)
-end program parallel_wait
+end program asyncwait
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/clauses-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/clauses-1.f90
new file mode 100644
index 0000000..e6ab78d
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/clauses-1.f90
@@ -0,0 +1,290 @@
+! { dg-do run }
+! { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } }
+
+program main
+ use openacc
+ implicit none
+
+ integer, parameter :: N = 32
+ real, allocatable :: a(:), b(:), c(:)
+ integer i
+
+ i = 0
+
+ allocate (a(N))
+ allocate (b(N))
+ allocate (c(N))
+
+ a(:) = 3.0
+ b(:) = 0.0
+
+ !$acc parallel copyin (a(1:N)) copyout (b(1:N))
+ do i = 1, N
+ b(i) = a(i)
+ end do
+ !$acc end parallel
+
+ do i = 1, N
+ if (b(i) .ne. 3.0) call abort
+ end do
+
+ if (acc_is_present (a) .eqv. .TRUE.) call abort
+ if (acc_is_present (b) .eqv. .TRUE.) call abort
+
+ a(:) = 5.0
+ b(:) = 1.0
+
+ !$acc parallel copyin (a(1:N)) copyout (b(1:N))
+ do i = 1, N
+ b(i) = a(i)
+ end do
+ !$acc end parallel
+
+ do i = 1, N
+ if (b(i) .ne. 5.0) call abort
+ end do
+
+ if (acc_is_present (a) .eqv. .TRUE.) call abort
+ if (acc_is_present (b) .eqv. .TRUE.) call abort
+
+ a(:) = 6.0
+ b(:) = 0.0
+
+ call acc_copyin (a, sizeof (a))
+
+ a(:) = 9.0
+
+ !$acc parallel present_or_copyin (a(1:N)) copyout (b(1:N))
+ do i = 1, N
+ b(i) = a(i)
+ end do
+ !$acc end parallel
+
+ do i = 1, N
+ if (b(i) .ne. 6.0) call abort
+ end do
+
+ call acc_copyout (a, sizeof (a))
+
+ if (acc_is_present (a) .eqv. .TRUE.) call abort
+ if (acc_is_present (b) .eqv. .TRUE.) call abort
+
+ a(:) = 6.0
+ b(:) = 0.0
+
+ !$acc parallel copyin (a(1:N)) present_or_copyout (b(1:N))
+ do i = 1, N
+ b(i) = a(i)
+ end do
+ !$acc end parallel
+
+ do i = 1, N
+ if (b(i) .ne. 6.0) call abort
+ end do
+
+ if (acc_is_present (a) .eqv. .TRUE.) call abort
+ if (acc_is_present (b) .eqv. .TRUE.) call abort
+
+ a(:) = 5.0
+ b(:) = 2.0
+
+ call acc_copyin (b, sizeof (b))
+
+ !$acc parallel copyin (a(1:N)) present_or_copyout (b(1:N))
+ do i = 1, N
+ b(i) = a(i)
+ end do
+ !$acc end parallel
+
+ do i = 1, N
+ if (a(i) .ne. 5.0) call abort
+ if (b(i) .ne. 2.0) call abort
+ end do
+
+ call acc_copyout (b, sizeof (b))
+
+ if (acc_is_present (a) .eqv. .TRUE.) call abort
+ if (acc_is_present (b) .eqv. .TRUE.) call abort
+
+ a(:) = 3.0;
+ b(:) = 4.0;
+
+ !$acc parallel copy (a(1:N)) copyout (b(1:N))
+ do i = 1, N
+ a(i) = a(i) + 1
+ b(i) = a(i) + 2
+ end do
+ !$acc end parallel
+
+ do i = 1, N
+ if (a(i) .ne. 4.0) call abort
+ if (b(i) .ne. 6.0) call abort
+ end do
+
+ if (acc_is_present (a) .eqv. .TRUE.) call abort
+ if (acc_is_present (b) .eqv. .TRUE.) call abort
+
+ a(:) = 4.0
+ b(:) = 7.0
+
+ !$acc parallel present_or_copy (a(1:N)) present_or_copy (b(1:N))
+ do i = 1, N
+ a(i) = a(i) + 1
+ b(i) = b(i) + 2
+ end do
+ !$acc end parallel
+
+ do i = 1, N
+ if (a(i) .ne. 5.0) call abort
+ if (b(i) .ne. 9.0) call abort
+ end do
+
+ if (acc_is_present (a) .eqv. .TRUE.) call abort
+ if (acc_is_present (b) .eqv. .TRUE.) call abort
+
+ a(:) = 3.0
+ b(:) = 7.0
+
+ call acc_copyin (a, sizeof (a))
+ call acc_copyin (b, sizeof (b))
+
+ !$acc parallel present_or_copy (a(1:N)) present_or_copy (b(1:N))
+ do i = 1, N
+ a(i) = a(i) + 1
+ b(i) = b(i) + 2
+ end do
+ !$acc end parallel
+
+ do i = 1, N
+ if (a(i) .ne. 3.0) call abort
+ if (b(i) .ne. 7.0) call abort
+ end do
+
+ call acc_copyout (a, sizeof (a))
+ call acc_copyout (b, sizeof (b))
+
+ if (acc_is_present (a) .eqv. .TRUE.) call abort
+ if (acc_is_present (b) .eqv. .TRUE.) call abort
+
+ a(:) = 3.0
+ b(:) = 7.0
+
+ !$acc parallel copyin (a(1:N)) create (c(1:N)) copyout (b(1:N))
+ do i = 1, N
+ c(i) = a(i)
+ b(i) = c(i)
+ end do
+ !$acc end parallel
+
+ do i = 1, N
+ if (a(i) .ne. 3.0) call abort
+ if (b(i) .ne. 3.0) call abort
+ end do
+
+ if (acc_is_present (a) .eqv. .TRUE.) call abort
+ if (acc_is_present (b) .eqv. .TRUE.) call abort
+ if (acc_is_present (c) .eqv. .TRUE.) call abort
+
+ a(:) = 4.0
+ b(:) = 8.0
+
+ !$acc parallel copyin (a(1:N)) present_or_create (c(1:N)) copyout (b(1:N))
+ do i = 1, N
+ c(i) = a(i)
+ b(i) = c(i)
+ end do
+ !$acc end parallel
+
+ do i = 1, N
+ if (a(i) .ne. 4.0) call abort
+ if (b(i) .ne. 4.0) call abort
+ end do
+
+ if (acc_is_present (a) .eqv. .TRUE.) call abort
+ if (acc_is_present (b) .eqv. .TRUE.) call abort
+ if (acc_is_present (c) .eqv. .TRUE.) call abort
+
+ a(:) = 4.0
+
+ call acc_copyin (a, sizeof (a))
+ call acc_copyin (b, sizeof (b))
+ call acc_copyin (c, sizeof (c))
+
+ !$acc parallel present (a(1:N)) present (c(1:N)) present (b(1:N))
+ do i = 1, N
+ c(i) = a(i)
+ b(i) = c(i)
+ end do
+ !$acc end parallel
+
+ call acc_copyout (a, sizeof (a))
+ call acc_copyout (b, sizeof (b))
+ call acc_copyout (c, sizeof (c))
+
+ do i = 1, N
+ if (a(i) .ne. 4.0) call abort
+ if (b(i) .ne. 4.0) call abort
+ end do
+
+ if (acc_is_present (a) .eqv. .TRUE.) call abort
+ if (acc_is_present (b) .eqv. .TRUE.) call abort
+ if (acc_is_present (c) .eqv. .TRUE.) call abort
+
+ a(:) = 6.0
+ b(:) = 0.0
+
+ call acc_copyin (a, sizeof (a))
+
+ a(:) = 9.0
+
+ !$acc parallel pcopyin (a(1:N)) copyout (b(1:N))
+ do i = 1, N
+ b(i) = a(i)
+ end do
+ !$acc end parallel
+
+ do i = 1, N
+ if (b(i) .ne. 6.0) call abort
+ end do
+
+ call acc_copyout (a, sizeof (a))
+
+ if (acc_is_present (a) .eqv. .TRUE.) call abort
+ if (acc_is_present (b) .eqv. .TRUE.) call abort
+
+ a(:) = 6.0
+ b(:) = 0.0
+
+ !$acc parallel copyin (a(1:N)) pcopyout (b(1:N))
+ do i = 1, N
+ b(i) = a(i)
+ end do
+ !$acc end parallel
+
+ do i = 1, N
+ if (b(i) .ne. 6.0) call abort
+ end do
+
+ if (acc_is_present (a) .eqv. .TRUE.) call abort
+ if (acc_is_present (b) .eqv. .TRUE.) call abort
+
+ a(:) = 5.0
+ b(:) = 7.0
+
+ !$acc parallel copyin (a(1:N)) pcreate (c(1:N)) copyout (b(1:N))
+ do i = 1, N
+ c(i) = a(i)
+ b(i) = c(i)
+ end do
+ !$acc end parallel
+
+ do i = 1, N
+ if (a(i) .ne. 5.0) call abort
+ if (b(i) .ne. 5.0) call abort
+ end do
+
+ if (acc_is_present (a) .eqv. .TRUE.) call abort
+ if (acc_is_present (b) .eqv. .TRUE.) call abort
+ if (acc_is_present (c) .eqv. .TRUE.) call abort
+
+end program main
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/declare-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/declare-1.f90
index f717d1b..2d4b707 100644
--- a/libgomp/testsuite/libgomp.oacc-fortran/declare-1.f90
+++ b/libgomp/testsuite/libgomp.oacc-fortran/declare-1.f90
@@ -1,29 +1,22 @@
! { dg-do run { target openacc_nvidia_accel_selected } }
+! Tests to exercise the declare directive along with
+! the clauses: copy
+! copyin
+! copyout
+! create
+! present
+! present_or_copy
+! present_or_copyin
+! present_or_copyout
+! present_or_create
+
module vars
implicit none
integer z
!$acc declare create (z)
end module vars
-subroutine subr6 (a, d)
- implicit none
- integer, parameter :: N = 8
- integer :: i
- integer :: a(N)
- !$acc declare deviceptr (a)
- integer :: d(N)
-
- i = 0
-
- !$acc parallel copy (d)
- do i = 1, N
- d(i) = a(i) + a(i)
- end do
- !$acc end parallel
-
-end subroutine
-
subroutine subr5 (a, b, c, d)
implicit none
integer, parameter :: N = 8
@@ -201,15 +194,6 @@ subroutine subr0 (a, b, c, d)
if (d(i) .ne. 13) call abort
end do
- call subr6 (a, d)
-
- call test (a, .true.)
- call test (d, .false.)
-
- do i = 1, N
- if (d(i) .ne. 16) call abort
- end do
-
end subroutine
program main
@@ -241,8 +225,7 @@ program main
if (a(i) .ne. 8) call abort
if (b(i) .ne. 8) call abort
if (c(i) .ne. 8) call abort
- if (d(i) .ne. 16) call abort
+ if (d(i) .ne. 13) call abort
end do
-
end program
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/default-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/default-1.f90
new file mode 100644
index 0000000..1059089
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/default-1.f90
@@ -0,0 +1,54 @@
+! { dg-do run }
+
+program main
+ implicit none
+ real a, b
+ real c
+ !$acc declare create (c)
+
+ a = 2.0
+ b = 0.0
+
+ !$acc parallel copy (a) create (b) default (none)
+ b = a
+ a = 1.0
+ a = a + b
+ !$acc end parallel
+
+ if (a .ne. 3.0) call abort
+
+ !$acc kernels copy (a) create (b) default (none)
+ b = a
+ a = 1.0
+ a = a + b
+ !$acc end kernels
+
+ if (a .ne. 4.0) call abort
+
+ !$acc parallel default (none) copy (a) create (b)
+ b = a
+ a = 1.0
+ a = a + b
+ !$acc end parallel
+
+ if (a .ne. 5.0) call abort
+
+ !$acc parallel default (none) copy (a)
+ c = a
+ a = 1.0
+ a = a + c
+ !$acc end parallel
+
+ if (a .ne. 6.0) call abort
+
+ !$acc data copy (a)
+ !$acc parallel default (none)
+ c = a
+ a = 1.0
+ a = a + c
+ !$acc end parallel
+ !$acc end data
+
+ if (a .ne. 7.0) call abort
+
+end program main
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/firstprivate-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/firstprivate-1.f90
new file mode 100644
index 0000000..d3f9093
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/firstprivate-1.f90
@@ -0,0 +1,42 @@
+! { dg-do run }
+
+program firstprivate
+ integer, parameter :: Nupper=100
+ integer :: a, b(Nupper), c, d, n
+ include "openacc_lib.h"
+
+ if (acc_get_device_type () .eq. acc_device_nvidia) then
+ n = Nupper
+ else
+ n = 1
+ end if
+
+ b(:) = -1
+ a = 5
+
+ !$acc parallel firstprivate (a) num_gangs (n)
+ !$acc loop gang
+ do i = 1, n
+ a = a + i
+ b(i) = a
+ end do
+ !$acc end parallel
+
+ do i = 1, n
+ if (b(i) .ne. i + a) call abort ()
+ end do
+
+ !$acc data copy (a)
+ !$acc parallel firstprivate (a) copyout (c)
+ a = 10
+ c = a
+ !$acc end parallel
+
+ !$acc parallel copyout (d) present (a)
+ d = a
+ !$acc end parallel
+ !$acc end data
+
+ if (c .ne. 10) call abort ()
+ if (d .ne. 5) call abort ()
+end program firstprivate
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/gang-static-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/gang-static-1.f90
new file mode 100644
index 0000000..7d56060
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/gang-static-1.f90
@@ -0,0 +1,79 @@
+! { dg-do run }
+
+program main
+ integer, parameter :: n = 100
+ integer i, a(n), b(n)
+ integer x
+
+ do i = 1, n
+ b(i) = i
+ end do
+
+ !$acc parallel loop gang (static:*) num_gangs (10)
+ do i = 1, n
+ a(i) = b(i) + 0
+ end do
+ !$acc end parallel loop
+
+ call test (a, b, 0, n)
+
+ !$acc parallel loop gang (static:1) num_gangs (10)
+ do i = 1, n
+ a(i) = b(i) + 1
+ end do
+ !$acc end parallel loop
+
+ call test (a, b, 1, n)
+
+ !$acc parallel loop gang (static:2) num_gangs (10)
+ do i = 1, n
+ a(i) = b(i) + 2
+ end do
+ !$acc end parallel loop
+
+ call test (a, b, 2, n)
+
+ !$acc parallel loop gang (static:5) num_gangs (10)
+ do i = 1, n
+ a(i) = b(i) + 5
+ end do
+ !$acc end parallel loop
+
+ call test (a, b, 5, n)
+
+ !$acc parallel loop gang (static:20) num_gangs (10)
+ do i = 1, n
+ a(i) = b(i) + 20
+ end do
+ !$acc end parallel loop
+
+ call test (a, b, 20, n)
+
+ x = 5
+ !$acc parallel loop gang (static:0+x) num_gangs (10)
+ do i = 1, n
+ a(i) = b(i) + 5
+ end do
+ !$acc end parallel loop
+
+ call test (a, b, 5, n)
+
+ x = 10
+ !$acc parallel loop gang (static:x) num_gangs (10)
+ do i = 1, n
+ a(i) = b(i) + 10
+ end do
+ !$acc end parallel loop
+
+ call test (a, b, 10, n)
+end program main
+
+subroutine test (a, b, sarg, n)
+ integer n
+ integer a (n), b(n), sarg
+ integer i
+
+ do i = 1, n
+ if (a(i) .ne. b(i) + sarg) call abort ()
+ end do
+end subroutine test
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/if-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/if-1.f90
new file mode 100644
index 0000000..44055e1
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/if-1.f90
@@ -0,0 +1,886 @@
+! { dg-do run }
+! { dg-additional-options "-cpp" }
+
+program main
+ use openacc
+ implicit none
+
+ integer, parameter :: N = 8
+ integer, parameter :: one = 1
+ integer, parameter :: zero = 0
+ integer i, nn
+ real, allocatable :: a(:), b(:)
+ real exp, exp2
+
+ i = 0
+
+ allocate (a(N))
+ allocate (b(N))
+
+ a(:) = 4.0
+
+ !$acc parallel copyin (a(1:N)) copyout (b(1:N)) if (1 == 1)
+ do i = 1, N
+ if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
+ b(i) = a(i) + 1
+ else
+ b(i) = a(i)
+ end if
+ end do
+ !$acc end parallel
+
+#if ACC_MEM_SHARED
+ exp = 5.0
+#else
+ exp = 4.0
+#endif
+
+ do i = 1, N
+ if (b(i) .ne. exp) call abort
+ end do
+
+ a(:) = 16.0
+
+ !$acc parallel if (0 == 1)
+ do i = 1, N
+ if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
+ b(i) = a(i) + 1
+ else
+ b(i) = a(i)
+ end if
+ end do
+ !$acc end parallel
+
+ do i = 1, N
+ if (b(i) .ne. 17.0) call abort
+ end do
+
+ a(:) = 8.0
+
+ !$acc parallel copyin (a(1:N)) copyout (b(1:N)) if (one == 1)
+ do i = 1, N
+ if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
+ b(i) = a(i) + 1
+ else
+ b(i) = a(i)
+ end if
+ end do
+ !$acc end parallel
+
+#if ACC_MEM_SHARED
+ exp = 9.0
+#else
+ exp = 8.0
+#endif
+
+ do i = 1, N
+ if (b(i) .ne. exp) call abort
+ end do
+
+ a(:) = 22.0
+
+ !$acc parallel if (zero == 1)
+ do i = 1, N
+ if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
+ b(i) = a(i) + 1
+ else
+ b(i) = a(i)
+ end if
+ end do
+ !$acc end parallel
+
+ do i = 1, N
+ if (b(i) .ne. 23.0) call abort
+ end do
+
+ a(:) = 16.0
+
+ !$acc parallel copyin (a(1:N)) copyout (b(1:N)) if (.TRUE.)
+ do i = 1, N
+ if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
+ b(i) = a(i) + 1
+ else
+ b(i) = a(i)
+ end if
+ end do
+ !$acc end parallel
+
+#if ACC_MEM_SHARED
+ exp = 17.0;
+#else
+ exp = 16.0;
+#endif
+
+ do i = 1, N
+ if (b(i) .ne. exp) call abort
+ end do
+
+ a(:) = 76.0
+
+ !$acc parallel if (.FALSE.)
+ do i = 1, N
+ if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
+ b(i) = a(i) + 1
+ else
+ b(i) = a(i)
+ end if
+ end do
+ !$acc end parallel
+
+ do i = 1, N
+ if (b(i) .ne. 77.0) call abort
+ end do
+
+ a(:) = 22.0
+
+ nn = 1
+
+ !$acc parallel copyin (a(1:N)) copyout (b(1:N)) if (nn == 1)
+ do i = 1, N
+ if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
+ b(i) = a(i) + 1
+ else
+ b(i) = a(i)
+ end if
+ end do
+ !$acc end parallel
+
+#if ACC_MEM_SHARED
+ exp = 23.0;
+#else
+ exp = 22.0;
+#endif
+
+ do i = 1, N
+ if (b(i) .ne. exp) call abort
+ end do
+
+ a(:) = 18.0
+
+ nn = 0
+
+ !$acc parallel if (nn == 1)
+ do i = 1, N
+ if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
+ b(i) = a(i) + 1
+ else
+ b(i) = a(i)
+ end if
+ end do
+ !$acc end parallel
+
+ do i = 1, N
+ if (b(i) .ne. 19.0) call abort
+ end do
+
+ a(:) = 49.0
+
+ nn = 1
+
+ !$acc parallel copyin (a(1:N)) copyout (b(1:N)) if ((nn + nn) > 0)
+ do i = 1, N
+ if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
+ b(i) = a(i) + 1
+ else
+ b(i) = a(i)
+ end if
+ end do
+ !$acc end parallel
+
+#if ACC_MEM_SHARED
+ exp = 50.0
+#else
+ exp = 49.0
+#endif
+
+ do i = 1, N
+ if (b(i) .ne. exp) call abort
+ end do
+
+ a(:) = 38.0
+
+ nn = 0;
+
+ !$acc parallel copyin (a(1:N)) copyout (b(1:N)) if ((nn + nn) > 0)
+ do i = 1, N
+ if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
+ b(i) = a(i) + 1
+ else
+ b(i) = a(i)
+ end if
+ end do
+ !$acc end parallel
+
+ do i = 1, N
+ if (b(i) .ne. 39.0) call abort
+ end do
+
+ a(:) = 91.0
+
+ !$acc parallel copyin (a(1:N)) copyout (b(1:N)) if (-2 > 0)
+ do i = 1, N
+ if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
+ b(i) = a(i) + 1
+ else
+ b(i) = a(i)
+ end if
+ end do
+ !$acc end parallel
+
+ do i = 1, N
+ if (b(i) .ne. 92.0) call abort
+ end do
+
+ a(:) = 43.0
+
+ !$acc parallel copyin (a(1:N)) copyout (b(1:N)) if (one == 1)
+ do i = 1, N
+ if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
+ b(i) = a(i) + 1
+ else
+ b(i) = a(i)
+ end if
+ end do
+ !$acc end parallel
+
+#if ACC_MEM_SHARED
+ exp = 44.0
+#else
+ exp = 43.0
+#endif
+
+ do i = 1, N
+ if (b(i) .ne. exp) call abort
+ end do
+
+ a(:) = 87.0
+
+ !$acc parallel if (one == 0)
+ do i = 1, N
+ if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
+ b(i) = a(i) + 1
+ else
+ b(i) = a(i)
+ end if
+ end do
+ !$acc end parallel
+
+ do i = 1, N
+ if (b(i) .ne. 88.0) call abort
+ end do
+
+ a(:) = 3.0
+ b(:) = 9.0
+
+#if ACC_MEM_SHARED
+ exp = 0.0
+ exp2 = 0.0
+#else
+ call acc_copyin (a, sizeof (a))
+ call acc_copyin (b, sizeof (b))
+ exp = 3.0;
+ exp2 = 9.0;
+#endif
+
+ !$acc update device (a(1:N), b(1:N)) if (1 == 1)
+
+ a(:) = 0.0
+ b(:) = 0.0
+
+ !$acc update host (a(1:N), b(1:N)) if (1 == 1)
+
+ do i = 1, N
+ if (a(i) .ne. exp) call abort
+ if (b(i) .ne. exp2) call abort
+ end do
+
+ a(:) = 6.0
+ b(:) = 12.0
+
+ !$acc update device (a(1:N), b(1:N)) if (0 == 1)
+
+ a(:) = 0.0
+ b(:) = 0.0
+
+ !$acc update host (a(1:N), b(1:N)) if (1 == 1)
+
+ do i = 1, N
+ if (a(i) .ne. exp) call abort
+ if (b(i) .ne. exp2) call abort
+ end do
+
+ a(:) = 26.0
+ b(:) = 21.0
+
+ !$acc update device (a(1:N), b(1:N)) if (1 == 1)
+
+ a(:) = 0.0
+ b(:) = 0.0
+
+ !$acc update host (a(1:N), b(1:N)) if (0 == 1)
+
+ do i = 1, N
+ if (a(i) .ne. 0.0) call abort
+ if (b(i) .ne. 0.0) call abort
+ end do
+
+#if !ACC_MEM_SHARED
+ call acc_copyout (a, sizeof (a))
+ call acc_copyout (b, sizeof (b))
+#endif
+
+ a(:) = 4.0
+ b(:) = 0.0
+
+ !$acc data copyin (a(1:N)) copyout (b(1:N)) if (1 == 1)
+
+ !$acc parallel present (a(1:N))
+ do i = 1, N
+ b(i) = a(i)
+ end do
+ !$acc end parallel
+ !$acc end data
+
+ do i = 1, N
+ if (b(i) .ne. 4.0) call abort
+ end do
+
+ a(:) = 8.0
+ b(:) = 1.0
+
+ !$acc data copyin (a(1:N)) copyout (b(1:N)) if (0 == 1)
+
+#if !ACC_MEM_SHARED
+ if (acc_is_present (a) .eqv. .TRUE.) call abort
+ if (acc_is_present (b) .eqv. .TRUE.) call abort
+#endif
+
+ !$acc end data
+
+ a(:) = 18.0
+ b(:) = 21.0
+
+ !$acc data copyin (a(1:N)) if (1 == 1)
+
+#if !ACC_MEM_SHARED
+ if (acc_is_present (a) .eqv. .FALSE.) call abort
+#endif
+
+ !$acc data copyout (b(1:N)) if (0 == 1)
+#if !ACC_MEM_SHARED
+ if (acc_is_present (b) .eqv. .TRUE.) call abort
+#endif
+ !$acc data copyout (b(1:N)) if (1 == 1)
+
+ !$acc parallel present (a(1:N)) present (b(1:N))
+ do i = 1, N
+ b(i) = a(i)
+ end do
+ !$acc end parallel
+
+ !$acc end data
+
+#if !ACC_MEM_SHARED
+ if (acc_is_present (b) .eqv. .TRUE.) call abort
+#endif
+ !$acc end data
+ !$acc end data
+
+ do i = 1, N
+ if (b(1) .ne. 18.0) call abort
+ end do
+
+ !$acc enter data copyin (b(1:N)) if (0 == 1)
+
+#if !ACC_MEM_SHARED
+ if (acc_is_present (b) .eqv. .TRUE.) call abort
+#endif
+
+ !$acc exit data delete (b(1:N)) if (0 == 1)
+
+ !$acc enter data copyin (b(1:N)) if (1 == 1)
+
+#if !ACC_MEM_SHARED
+ if (acc_is_present (b) .eqv. .FALSE.) call abort
+#endif
+
+ !$acc exit data delete (b(1:N)) if (1 == 1)
+
+#if !ACC_MEM_SHARED
+ if (acc_is_present (b) .eqv. .TRUE.) call abort
+#endif
+
+ !$acc enter data copyin (b(1:N)) if (zero == 1)
+
+#if !ACC_MEM_SHARED
+ if (acc_is_present (b) .eqv. .TRUE.) call abort
+#endif
+
+ !$acc exit data delete (b(1:N)) if (zero == 1)
+
+ !$acc enter data copyin (b(1:N)) if (one == 1)
+
+#if !ACC_MEM_SHARED
+ if (acc_is_present (b) .eqv. .FALSE.) call abort
+#endif
+
+ !$acc exit data delete (b(1:N)) if (one == 1)
+
+#if !ACC_MEM_SHARED
+ if (acc_is_present (b) .eqv. .TRUE.) call abort
+#endif
+
+ !$acc enter data copyin (b(1:N)) if (one == 0)
+
+#if !ACC_MEM_SHARED
+ if (acc_is_present (b) .eqv. .TRUE.) call abort
+#endif
+
+ !$acc exit data delete (b(1:N)) if (one == 0)
+
+ !$acc enter data copyin (b(1:N)) if (one == 1)
+
+#if !ACC_MEM_SHARED
+ if (acc_is_present (b) .eqv. .FALSE.) call abort
+#endif
+
+ !$acc exit data delete (b(1:N)) if (one == 1)
+
+#if !ACC_MEM_SHARED
+ if (acc_is_present (b) .eqv. .TRUE.) call abort
+#endif
+
+ a(:) = 4.0
+
+ !$acc kernels copyin (a(1:N)) copyout (b(1:N)) if (1 == 1)
+ do i = 1, N
+ if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
+ b(i) = a(i) + 1
+ else
+ b(i) = a(i)
+ end if
+ end do
+ !$acc end kernels
+
+#if ACC_MEM_SHARED
+ exp = 5.0
+#else
+ exp = 4.0
+#endif
+
+ do i = 1, N
+ if (b(i) .ne. exp) call abort
+ end do
+
+ a(:) = 16.0
+
+ !$acc kernels if (0 == 1)
+ do i = 1, N
+ if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
+ b(i) = a(i) + 1
+ else
+ b(i) = a(i)
+ end if
+ end do
+ !$acc end kernels
+
+ do i = 1, N
+ if (b(i) .ne. 17.0) call abort
+ end do
+
+ a(:) = 8.0
+
+ !$acc kernels copyin (a(1:N)) copyout (b(1:N)) if (one == 1)
+ do i = 1, N
+ if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
+ b(i) = a(i) + 1
+ else
+ b(i) = a(i)
+ end if
+ end do
+ !$acc end kernels
+
+#if ACC_MEM_SHARED
+ exp = 9.0
+#else
+ exp = 8.0
+#endif
+
+ do i = 1, N
+ if (b(i) .ne. exp) call abort
+ end do
+
+ a(:) = 22.0
+
+ !$acc kernels if (zero == 1)
+ do i = 1, N
+ if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
+ b(i) = a(i) + 1
+ else
+ b(i) = a(i)
+ end if
+ end do
+ !$acc end kernels
+
+ do i = 1, N
+ if (b(i) .ne. 23.0) call abort
+ end do
+
+ a(:) = 16.0
+
+ !$acc kernels copyin (a(1:N)) copyout (b(1:N)) if (.TRUE.)
+ do i = 1, N
+ if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
+ b(i) = a(i) + 1
+ else
+ b(i) = a(i)
+ end if
+ end do
+ !$acc end kernels
+
+#if ACC_MEM_SHARED
+ exp = 17.0;
+#else
+ exp = 16.0;
+#endif
+
+ do i = 1, N
+ if (b(i) .ne. exp) call abort
+ end do
+
+ a(:) = 76.0
+
+ !$acc kernels if (.FALSE.)
+ do i = 1, N
+ if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
+ b(i) = a(i) + 1
+ else
+ b(i) = a(i)
+ end if
+ end do
+ !$acc end kernels
+
+ do i = 1, N
+ if (b(i) .ne. 77.0) call abort
+ end do
+
+ a(:) = 22.0
+
+ nn = 1
+
+ !$acc kernels copyin (a(1:N)) copyout (b(1:N)) if (nn == 1)
+ do i = 1, N
+ if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
+ b(i) = a(i) + 1
+ else
+ b(i) = a(i)
+ end if
+ end do
+ !$acc end kernels
+
+#if ACC_MEM_SHARED
+ exp = 23.0;
+#else
+ exp = 22.0;
+#endif
+
+ do i = 1, N
+ if (b(i) .ne. exp) call abort
+ end do
+
+ a(:) = 18.0
+
+ nn = 0
+
+ !$acc kernels if (nn == 1)
+ do i = 1, N
+ if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
+ b(i) = a(i) + 1
+ else
+ b(i) = a(i)
+ end if
+ end do
+ !$acc end kernels
+
+ do i = 1, N
+ if (b(i) .ne. 19.0) call abort
+ end do
+
+ a(:) = 49.0
+
+ nn = 1
+
+ !$acc kernels copyin (a(1:N)) copyout (b(1:N)) if ((nn + nn) > 0)
+ do i = 1, N
+ if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
+ b(i) = a(i) + 1
+ else
+ b(i) = a(i)
+ end if
+ end do
+ !$acc end kernels
+
+#if ACC_MEM_SHARED
+ exp = 50.0
+#else
+ exp = 49.0
+#endif
+
+ do i = 1, N
+ if (b(i) .ne. exp) call abort
+ end do
+
+ a(:) = 38.0
+
+ nn = 0;
+
+ !$acc kernels copyin (a(1:N)) copyout (b(1:N)) if ((nn + nn) > 0)
+ do i = 1, N
+ if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
+ b(i) = a(i) + 1
+ else
+ b(i) = a(i)
+ end if
+ end do
+ !$acc end kernels
+
+ do i = 1, N
+ if (b(i) .ne. 39.0) call abort
+ end do
+
+ a(:) = 91.0
+
+ !$acc kernels copyin (a(1:N)) copyout (b(1:N)) if (-2 > 0)
+ do i = 1, N
+ if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
+ b(i) = a(i) + 1
+ else
+ b(i) = a(i)
+ end if
+ end do
+ !$acc end kernels
+
+ do i = 1, N
+ if (b(i) .ne. 92.0) call abort
+ end do
+
+ a(:) = 43.0
+
+ !$acc kernels copyin (a(1:N)) copyout (b(1:N)) if (one == 1)
+ do i = 1, N
+ if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
+ b(i) = a(i) + 1
+ else
+ b(i) = a(i)
+ end if
+ end do
+ !$acc end kernels
+
+#if ACC_MEM_SHARED
+ exp = 44.0
+#else
+ exp = 43.0
+#endif
+
+ do i = 1, N
+ if (b(i) .ne. exp) call abort
+ end do
+
+ a(:) = 87.0
+
+ !$acc kernels if (one == 0)
+ do i = 1, N
+ if (acc_on_device (acc_device_host) .eqv. .TRUE.) then
+ b(i) = a(i) + 1
+ else
+ b(i) = a(i)
+ end if
+ end do
+ !$acc end kernels
+
+ do i = 1, N
+ if (b(i) .ne. 88.0) call abort
+ end do
+
+ a(:) = 3.0
+ b(:) = 9.0
+
+#if ACC_MEM_SHARED
+ exp = 0.0
+ exp2 = 0.0
+#else
+ call acc_copyin (a, sizeof (a))
+ call acc_copyin (b, sizeof (b))
+ exp = 3.0;
+ exp2 = 9.0;
+#endif
+
+ !$acc update device (a(1:N), b(1:N)) if (1 == 1)
+
+ a(:) = 0.0
+ b(:) = 0.0
+
+ !$acc update host (a(1:N), b(1:N)) if (1 == 1)
+
+ do i = 1, N
+ if (a(i) .ne. exp) call abort
+ if (b(i) .ne. exp2) call abort
+ end do
+
+ a(:) = 6.0
+ b(:) = 12.0
+
+ !$acc update device (a(1:N), b(1:N)) if (0 == 1)
+
+ a(:) = 0.0
+ b(:) = 0.0
+
+ !$acc update host (a(1:N), b(1:N)) if (1 == 1)
+
+ do i = 1, N
+ if (a(i) .ne. exp) call abort
+ if (b(i) .ne. exp2) call abort
+ end do
+
+ a(:) = 26.0
+ b(:) = 21.0
+
+ !$acc update device (a(1:N), b(1:N)) if (1 == 1)
+
+ a(:) = 0.0
+ b(:) = 0.0
+
+ !$acc update host (a(1:N), b(1:N)) if (0 == 1)
+
+ do i = 1, N
+ if (a(i) .ne. 0.0) call abort
+ if (b(i) .ne. 0.0) call abort
+ end do
+
+#if !ACC_MEM_SHARED
+ call acc_copyout (a, sizeof (a))
+ call acc_copyout (b, sizeof (b))
+#endif
+
+ a(:) = 4.0
+ b(:) = 0.0
+
+ !$acc data copyin (a(1:N)) copyout (b(1:N)) if (1 == 1)
+
+ !$acc kernels present (a(1:N))
+ do i = 1, N
+ b(i) = a(i)
+ end do
+ !$acc end kernels
+ !$acc end data
+
+ do i = 1, N
+ if (b(i) .ne. 4.0) call abort
+ end do
+
+ a(:) = 8.0
+ b(:) = 1.0
+
+ !$acc data copyin (a(1:N)) copyout (b(1:N)) if (0 == 1)
+
+#if !ACC_MEM_SHARED
+ if (acc_is_present (a) .eqv. .TRUE.) call abort
+ if (acc_is_present (b) .eqv. .TRUE.) call abort
+#endif
+
+ !$acc end data
+
+ a(:) = 18.0
+ b(:) = 21.0
+
+ !$acc data copyin (a(1:N)) if (1 == 1)
+
+#if !ACC_MEM_SHARED
+ if (acc_is_present (a) .eqv. .FALSE.) call abort
+#endif
+
+ !$acc data copyout (b(1:N)) if (0 == 1)
+#if !ACC_MEM_SHARED
+ if (acc_is_present (b) .eqv. .TRUE.) call abort
+#endif
+ !$acc data copyout (b(1:N)) if (1 == 1)
+
+ !$acc kernels present (a(1:N)) present (b(1:N))
+ do i = 1, N
+ b(i) = a(i)
+ end do
+ !$acc end kernels
+
+ !$acc end data
+
+#if !ACC_MEM_SHARED
+ if (acc_is_present (b) .eqv. .TRUE.) call abort
+#endif
+ !$acc end data
+ !$acc end data
+
+ do i = 1, N
+ if (b(1) .ne. 18.0) call abort
+ end do
+
+ !$acc enter data copyin (b(1:N)) if (0 == 1)
+
+#if !ACC_MEM_SHARED
+ if (acc_is_present (b) .eqv. .TRUE.) call abort
+#endif
+
+ !$acc exit data delete (b(1:N)) if (0 == 1)
+
+ !$acc enter data copyin (b(1:N)) if (1 == 1)
+
+#if !ACC_MEM_SHARED
+ if (acc_is_present (b) .eqv. .FALSE.) call abort
+#endif
+
+ !$acc exit data delete (b(1:N)) if (1 == 1)
+
+#if !ACC_MEM_SHARED
+ if (acc_is_present (b) .eqv. .TRUE.) call abort
+#endif
+
+ !$acc enter data copyin (b(1:N)) if (zero == 1)
+
+#if !ACC_MEM_SHARED
+ if (acc_is_present (b) .eqv. .TRUE.) call abort
+#endif
+
+ !$acc exit data delete (b(1:N)) if (zero == 1)
+
+ !$acc enter data copyin (b(1:N)) if (one == 1)
+
+#if !ACC_MEM_SHARED
+ if (acc_is_present (b) .eqv. .FALSE.) call abort
+#endif
+
+ !$acc exit data delete (b(1:N)) if (one == 1)
+
+#if !ACC_MEM_SHARED
+ if (acc_is_present (b) .eqv. .TRUE.) call abort
+#endif
+
+ !$acc enter data copyin (b(1:N)) if (one == 0)
+
+#if !ACC_MEM_SHARED
+ if (acc_is_present (b) .eqv. .TRUE.) call abort
+#endif
+
+ !$acc exit data delete (b(1:N)) if (one == 0)
+
+ !$acc enter data copyin (b(1:N)) if (one == 1)
+
+#if !ACC_MEM_SHARED
+ if (acc_is_present (b) .eqv. .FALSE.) call abort
+#endif
+
+ !$acc exit data delete (b(1:N)) if (one == 1)
+
+#if !ACC_MEM_SHARED
+ if (acc_is_present (b) .eqv. .TRUE.) call abort
+#endif
+
+end program main
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/implicit-firstprivate-ref.f90 b/libgomp/testsuite/libgomp.oacc-fortran/implicit-firstprivate-ref.f90
new file mode 100644
index 0000000..a5f3840
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/implicit-firstprivate-ref.f90
@@ -0,0 +1,42 @@
+! This test checks if the runtime can properly handle implicit
+! firstprivate varaibles inside subroutines in modules.
+
+! { dg-do run }
+
+module test_mod
+ contains
+ subroutine test(x)
+
+ IMPLICIT NONE
+
+ INTEGER :: x, y, j
+
+ x = 5
+
+ !$ACC PARALLEL LOOP copyout (y)
+ DO j=1,10
+ y=x
+ ENDDO
+ !$ACC END PARALLEL LOOP
+
+ y = -1;
+
+ !$ACC PARALLEL LOOP firstprivate (y) copyout (x)
+ DO j=1,10
+ x=y
+ ENDDO
+ !$ACC END PARALLEL LOOP
+ end subroutine test
+end module test_mod
+
+program t
+ use test_mod
+
+ INTEGER :: x_min
+
+ x_min = 8
+
+ CALL test(x_min)
+
+ if (x_min .ne. -1) call abort
+end program t
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/pr68813.f90 b/libgomp/testsuite/libgomp.oacc-fortran/pr68813.f90
new file mode 100644
index 0000000..735350f
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/pr68813.f90
@@ -0,0 +1,19 @@
+program foo
+ implicit none
+ integer, parameter :: n = 100
+ integer, dimension(n,n) :: a
+ integer :: i, j, sum = 0
+
+ a = 1
+
+ !$acc parallel copyin(a(1:n,1:n)) firstprivate (sum)
+ !$acc loop gang reduction(+:sum)
+ do i=1, n
+ !$acc loop vector reduction(+:sum)
+ do j=1, n
+ sum = sum + a(i, j)
+ enddo
+ enddo
+ !$acc end parallel
+
+end program foo
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/private-variables.f90 b/libgomp/testsuite/libgomp.oacc-fortran/private-variables.f90
new file mode 100644
index 0000000..3c1940b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/private-variables.f90
@@ -0,0 +1,544 @@
+! Miscellaneous tests for private variables.
+
+! { dg-do run }
+
+
+! Test of gang-private variables declared on loop directive.
+
+subroutine t1()
+ integer :: x, i, arr(32)
+
+ do i = 1, 32
+ arr(i) = i
+ end do
+
+ !$acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
+ !$acc loop gang private(x)
+ do i = 1, 32
+ x = i * 2;
+ arr(i) = arr(i) + x
+ end do
+ !$acc end parallel
+
+ do i = 1, 32
+ if (arr(i) .ne. i * 3) call abort
+ end do
+end subroutine t1
+
+
+! Test of gang-private variables declared on loop directive, with broadcasting
+! to partitioned workers.
+
+subroutine t2()
+ integer :: x, i, j, arr(0:32*32)
+
+ do i = 0, 32*32-1
+ arr(i) = i
+ end do
+
+ !$acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
+ !$acc loop gang private(x)
+ do i = 0, 31
+ x = i * 2;
+
+ !$acc loop worker
+ do j = 0, 31
+ arr(i * 32 + j) = arr(i * 32 + j) + x
+ end do
+ end do
+ !$acc end parallel
+
+ do i = 0, 32 * 32 - 1
+ if (arr(i) .ne. i + (i / 32) * 2) call abort
+ end do
+end subroutine t2
+
+
+! Test of gang-private variables declared on loop directive, with broadcasting
+! to partitioned vectors.
+
+subroutine t3()
+ integer :: x, i, j, arr(0:32*32)
+
+ do i = 0, 32*32-1
+ arr(i) = i
+ end do
+
+ !$acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
+ !$acc loop gang private(x)
+ do i = 0, 31
+ x = i * 2;
+
+ !$acc loop vector
+ do j = 0, 31
+ arr(i * 32 + j) = arr(i * 32 + j) + x
+ end do
+ end do
+ !$acc end parallel
+
+ do i = 0, 32 * 32 - 1
+ if (arr(i) .ne. i + (i / 32) * 2) call abort
+ end do
+end subroutine t3
+
+
+! Test of gang-private addressable variable declared on loop directive, with
+! broadcasting to partitioned workers.
+
+subroutine t4()
+ type vec3
+ integer x, y, z, attr(13)
+ end type vec3
+
+ integer i, j, arr(0:32*32)
+ type(vec3) pt
+
+ do i = 0, 32*32-1
+ arr(i) = i
+ end do
+
+ !$acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
+ !$acc loop gang private(pt)
+ do i = 0, 31
+ pt%x = i
+ pt%y = i * 2
+ pt%z = i * 4
+ pt%attr(5) = i * 6
+
+ !$acc loop vector
+ do j = 0, 31
+ arr(i * 32 + j) = arr(i * 32 + j) + pt%x + pt%y + pt%z + pt%attr(5);
+ end do
+ end do
+ !$acc end parallel
+
+ do i = 0, 32 * 32 - 1
+ if (arr(i) .ne. i + (i / 32) * 13) call abort
+ end do
+end subroutine t4
+
+
+! Test of vector-private variables declared on loop directive.
+
+subroutine t5()
+ integer :: x, i, j, k, idx, arr(0:32*32*32)
+
+ do i = 0, 32*32*32-1
+ arr(i) = i
+ end do
+
+ !$acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
+ !$acc loop gang
+ do i = 0, 31
+ !$acc loop worker
+ do j = 0, 31
+ !$acc loop vector private(x)
+ do k = 0, 31
+ x = ieor(i, j * 3)
+ arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + x * k
+ end do
+ !$acc loop vector private(x)
+ do k = 0, 31
+ x = ior(i, j * 5)
+ arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + x * k
+ end do
+ end do
+ end do
+ !$acc end parallel
+
+ do i = 0, 32 - 1
+ do j = 0, 32 -1
+ do k = 0, 32 - 1
+ idx = i * 1024 + j * 32 + k
+ if (arr(idx) .ne. idx + ieor(i, j * 3) * k + ior(i, j * 5) * k) then
+ call abort
+ end if
+ end do
+ end do
+ end do
+end subroutine t5
+
+
+! Test of vector-private variables declared on loop directive. Array type.
+
+subroutine t6()
+ integer :: i, j, k, idx, arr(0:32*32*32), pt(2)
+
+ do i = 0, 32*32*32-1
+ arr(i) = i
+ end do
+
+ !$acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
+ !$acc loop gang
+ do i = 0, 31
+ !$acc loop worker
+ do j = 0, 31
+ !$acc loop vector private(x, pt)
+ do k = 0, 31
+ pt(1) = ieor(i, j * 3)
+ pt(2) = ior(i, j * 5)
+ arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + pt(1) * k
+ arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + pt(2) * k
+ end do
+ end do
+ end do
+ !$acc end parallel
+
+ do i = 0, 32 - 1
+ do j = 0, 32 -1
+ do k = 0, 32 - 1
+ idx = i * 1024 + j * 32 + k
+ if (arr(idx) .ne. idx + ieor(i, j * 3) * k + ior(i, j * 5) * k) then
+ call abort
+ end if
+ end do
+ end do
+ end do
+end subroutine t6
+
+
+! Test of worker-private variables declared on a loop directive.
+
+subroutine t7()
+ integer :: x, i, j, arr(0:32*32)
+ common x
+
+ do i = 0, 32*32-1
+ arr(i) = i
+ end do
+
+ !$acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
+ !$acc loop gang private(x)
+ do i = 0, 31
+ !$acc loop worker private(x)
+ do j = 0, 31
+ x = ieor(i, j * 3)
+ arr(i * 32 + j) = arr(i * 32 + j) + x
+ end do
+ end do
+ !$acc end parallel
+
+ do i = 0, 32 * 32 - 1
+ if (arr(i) .ne. i + ieor(i / 32, mod(i, 32) * 3)) call abort
+ end do
+end subroutine t7
+
+
+! Test of worker-private variables declared on a loop directive, broadcasting
+! to vector-partitioned mode.
+
+subroutine t8()
+ integer :: x, i, j, k, idx, arr(0:32*32*32)
+
+ do i = 0, 32*32*32-1
+ arr(i) = i
+ end do
+
+ !$acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
+ !$acc loop gang
+ do i = 0, 31
+ !$acc loop worker private(x)
+ do j = 0, 31
+ x = ieor(i, j * 3)
+
+ !$acc loop vector
+ do k = 0, 31
+ arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + x * k
+ end do
+ end do
+ end do
+ !$acc end parallel
+
+ do i = 0, 32 - 1
+ do j = 0, 32 -1
+ do k = 0, 32 - 1
+ idx = i * 1024 + j * 32 + k
+ if (arr(idx) .ne. idx + ieor(i, j * 3) * k) call abort
+ end do
+ end do
+ end do
+end subroutine t8
+
+
+! Test of worker-private variables declared on a loop directive, broadcasting
+! to vector-partitioned mode. Back-to-back worker loops.
+
+subroutine t9()
+ integer :: x, i, j, k, idx, arr(0:32*32*32)
+
+ do i = 0, 32*32*32-1
+ arr(i) = i
+ end do
+
+ !$acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
+ !$acc loop gang
+ do i = 0, 31
+ !$acc loop worker private(x)
+ do j = 0, 31
+ x = ieor(i, j * 3)
+
+ !$acc loop vector
+ do k = 0, 31
+ arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + x * k
+ end do
+ end do
+
+ !$acc loop worker private(x)
+ do j = 0, 31
+ x = ior(i, j * 5)
+
+ !$acc loop vector
+ do k = 0, 31
+ arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + x * k
+ end do
+ end do
+ end do
+ !$acc end parallel
+
+ do i = 0, 32 - 1
+ do j = 0, 32 -1
+ do k = 0, 32 - 1
+ idx = i * 1024 + j * 32 + k
+ if (arr(idx) .ne. idx + ieor(i, j * 3) * k + ior(i, j * 5) * k) then
+ call abort
+ end if
+ end do
+ end do
+ end do
+end subroutine t9
+
+
+! Test of worker-private variables declared on a loop directive, broadcasting
+! to vector-partitioned mode. Successive vector loops. */
+
+subroutine t10()
+ integer :: x, i, j, k, idx, arr(0:32*32*32)
+
+ do i = 0, 32*32*32-1
+ arr(i) = i
+ end do
+
+ !$acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
+ !$acc loop gang
+ do i = 0, 31
+ !$acc loop worker private(x)
+ do j = 0, 31
+ x = ieor(i, j * 3)
+
+ !$acc loop vector
+ do k = 0, 31
+ arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + x * k
+ end do
+
+ x = ior(i, j * 5)
+
+ !$acc loop vector
+ do k = 0, 31
+ arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + x * k
+ end do
+ end do
+ end do
+ !$acc end parallel
+
+ do i = 0, 32 - 1
+ do j = 0, 32 -1
+ do k = 0, 32 - 1
+ idx = i * 1024 + j * 32 + k
+ if (arr(idx) .ne. idx + ieor(i, j * 3) * k + ior(i, j * 5) * k) then
+ call abort
+ end if
+ end do
+ end do
+ end do
+end subroutine t10
+
+
+! Test of worker-private variables declared on a loop directive, broadcasting
+! to vector-partitioned mode. Addressable worker variable.
+
+subroutine t11()
+ integer :: i, j, k, idx, arr(0:32*32*32)
+ integer, target :: x
+ integer, pointer :: p
+
+ do i = 0, 32*32*32-1
+ arr(i) = i
+ end do
+
+ !$acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
+ !$acc loop gang
+ do i = 0, 31
+ !$acc loop worker private(x, p)
+ do j = 0, 31
+ p => x
+ x = ieor(i, j * 3)
+
+ !$acc loop vector
+ do k = 0, 31
+ arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + x * k
+ end do
+
+ p = ior(i, j * 5)
+
+ !$acc loop vector
+ do k = 0, 31
+ arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + x * k
+ end do
+ end do
+ end do
+ !$acc end parallel
+
+ do i = 0, 32 - 1
+ do j = 0, 32 -1
+ do k = 0, 32 - 1
+ idx = i * 1024 + j * 32 + k
+ if (arr(idx) .ne. idx + ieor(i, j * 3) * k + ior(i, j * 5) * k) then
+ call abort
+ end if
+ end do
+ end do
+ end do
+end subroutine t11
+
+
+! Test of worker-private variables declared on a loop directive, broadcasting
+! to vector-partitioned mode. Aggregate worker variable.
+
+subroutine t12()
+ type vec2
+ integer x, y
+ end type vec2
+
+ integer :: i, j, k, idx, arr(0:32*32*32)
+ type(vec2) :: pt
+
+ do i = 0, 32*32*32-1
+ arr(i) = i
+ end do
+
+ !$acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
+ !$acc loop gang
+ do i = 0, 31
+ !$acc loop worker private(pt)
+ do j = 0, 31
+ pt%x = ieor(i, j * 3)
+ pt%y = ior(i, j * 5)
+
+ !$acc loop vector
+ do k = 0, 31
+ arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + pt%x * k
+ end do
+
+ !$acc loop vector
+ do k = 0, 31
+ arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + pt%y * k
+ end do
+ end do
+ end do
+ !$acc end parallel
+
+ do i = 0, 32 - 1
+ do j = 0, 32 -1
+ do k = 0, 32 - 1
+ idx = i * 1024 + j * 32 + k
+ if (arr(idx) .ne. idx + ieor(i, j * 3) * k + ior(i, j * 5) * k) then
+ call abort
+ end if
+ end do
+ end do
+ end do
+end subroutine t12
+
+
+! Test of worker-private variables declared on loop directive, broadcasting
+! to vector-partitioned mode. Array worker variable.
+
+subroutine t13()
+ integer :: i, j, k, idx, arr(0:32*32*32), pt(2)
+
+ do i = 0, 32*32*32-1
+ arr(i) = i
+ end do
+
+ !$acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
+ !$acc loop gang
+ do i = 0, 31
+ !$acc loop worker private(pt)
+ do j = 0, 31
+ pt(1) = ieor(i, j * 3)
+ pt(2) = ior(i, j * 5)
+
+ !$acc loop vector
+ do k = 0, 31
+ arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + pt(1) * k
+ end do
+
+ !$acc loop vector
+ do k = 0, 31
+ arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + pt(2) * k
+ end do
+ end do
+ end do
+ !$acc end parallel
+
+ do i = 0, 32 - 1
+ do j = 0, 32 -1
+ do k = 0, 32 - 1
+ idx = i * 1024 + j * 32 + k
+ if (arr(idx) .ne. idx + ieor(i, j * 3) * k + ior(i, j * 5) * k) then
+ call abort
+ end if
+ end do
+ end do
+ end do
+end subroutine t13
+
+
+! Test of gang-private variables declared on the parallel directive.
+
+subroutine t14()
+ use openacc
+ integer :: x = 5
+ integer, parameter :: n = 32
+ integer :: arr(n)
+
+ do i = 1, n
+ arr(i) = 3
+ end do
+
+ !$acc parallel private(x) copy(arr) num_gangs(n) num_workers(8) vector_length(32)
+ !$acc loop gang(static:1)
+ do i = 1, n
+ x = i * 2;
+ end do
+
+ !$acc loop gang(static:1)
+ do i = 1, n
+ if (acc_on_device (acc_device_host) .eqv. .TRUE.) x = i * 2
+ arr(i) = arr(i) + x
+ end do
+ !$acc end parallel
+
+ do i = 1, n
+ if (arr(i) .ne. (3 + i * 2)) call abort
+ end do
+
+end subroutine t14
+
+
+program main
+ call t1()
+ call t2()
+ call t3()
+ call t4()
+ call t5()
+ call t6()
+ call t7()
+ call t8()
+ call t9()
+ call t10()
+ call t11()
+ call t12()
+ call t13()
+ call t14()
+end program main