aboutsummaryrefslogtreecommitdiff
path: root/libgomp/testsuite
diff options
context:
space:
mode:
authorCesar Philippidis <cesar@codesourcery.com>2016-04-08 14:09:47 -0700
committerCesar Philippidis <cesar@gcc.gnu.org>2016-04-08 14:09:47 -0700
commitc42cfb5ca3b02756705485e013fa9107aaf28acd (patch)
tree354e1cd54c889228c92a0a83560a5bf53a7aab00 /libgomp/testsuite
parent51a4b0f18711b23f2f696a4f546ccce5b1653cf5 (diff)
downloadgcc-c42cfb5ca3b02756705485e013fa9107aaf28acd.zip
gcc-c42cfb5ca3b02756705485e013fa9107aaf28acd.tar.gz
gcc-c42cfb5ca3b02756705485e013fa9107aaf28acd.tar.bz2
re PR lto/70289 ([openacc] ICE in input_varpool_node)
gcc/ PR lto/70289 PR ipa/70348 PR tree-optimization/70373 PR middle-end/70533 PR middle-end/70534 PR middle-end/70535 * gimplify.c (gimplify_adjust_omp_clauses): Add or adjust data clauses for acc parallel reductions as necessary. Error on those that are private. * omp-low.c (scan_sharing_clauses): Don't install variables which are used in acc parallel reductions. (lower_rec_input_clauses): Remove dead code. (lower_oacc_reductions): Add support for reference reductions. (lower_reduction_clauses): Remove dead code. (lower_omp_target): Don't remap variables appearing in acc parallel reductions. * tree.h (OMP_CLAUSE_MAP_IN_REDUCTION): New macro. gcc/testsuite/ * c-c++-common/goacc/reduction-5.c: New test. * c-c++-common/goacc/reduction-promotions.c: New test. * gfortran.dg/goacc/reduction-3.f95: New test. * gfortran.dg/goacc/reduction-promotions.f90: New test. libgomp/ * testsuite/libgomp.oacc-c-c++-common/loop-reduction-gang-np-1.c: New test. * testsuite/libgomp.oacc-c-c++-common/loop-reduction-gw-np-1.c: New test. * testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-1.c: New test. * testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-2.c: New test. * testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-3.c: New test. * testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-4.c: New test. * testsuite/libgomp.oacc-c-c++-common/loop-reduction-vector-p-1.c: New test. * testsuite/libgomp.oacc-c-c++-common/loop-reduction-vector-p-2.c: New test. * testsuite/libgomp.oacc-c-c++-common/loop-reduction-worker-p-1.c: New test. * testsuite/libgomp.oacc-c-c++-common/loop-reduction-wv-p-1.c: New test. * testsuite/libgomp.oacc-c-c++-common/loop-reduction-wv-p-2.c: New test. * testsuite/libgomp.oacc-c-c++-common/loop-reduction-wv-p-3.c: New test. * testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-1.c: New test. * testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-2.c: New test. * testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-3.c: New test. * testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-4.c: New test. * testsuite/libgomp.oacc-c-c++-common/par-reduction-1.c: Add test coverage. * testsuite/libgomp.oacc-c-c++-common/par-reduction-2.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/parallel-dims.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/parallel-reduction.c: New test. * testsuite/libgomp.oacc-c-c++-common/pr70289.c: New test. * testsuite/libgomp.oacc-c-c++-common/pr70373.c: New test. * testsuite/libgomp.oacc-c-c++-common/reduction-1.c: Add test coverage. * testsuite/libgomp.oacc-c-c++-common/reduction-2.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/reduction-3.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/reduction-4.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/reduction-5.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/reduction-6.c: New test. * testsuite/libgomp.oacc-c-c++-common/reduction.h: New test. * testsuite/libgomp.oacc-fortran/parallel-reduction.f90: New test. * testsuite/libgomp.oacc-fortran/pr70289.f90: New test. * testsuite/libgomp.oacc-fortran/reduction-1.f90: Add test coverage. * testsuite/libgomp.oacc-fortran/reduction-2.f90: Likewise. * testsuite/libgomp.oacc-fortran/reduction-3.f90: Likewise. * testsuite/libgomp.oacc-fortran/reduction-4.f90: Likewise. * testsuite/libgomp.oacc-fortran/reduction-5.f90: Likewise. * testsuite/libgomp.oacc-fortran/reduction-6.f90: Likewise. * testsuite/libgomp.oacc-fortran/reduction-7.f90: New test. From-SVN: r234840
Diffstat (limited to 'libgomp/testsuite')
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gang-np-1.c45
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gv-np-1.c30
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gw-np-1.c30
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-1.c28
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-2.c34
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-3.c33
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-4.c55
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-vector-p-1.c43
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-vector-p-2.c41
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-worker-p-1.c43
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-wv-p-1.c41
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-wv-p-2.c45
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-wv-p-3.c38
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-1.c38
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-2.c40
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-3.c42
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-4.c55
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/par-reduction-1.c22
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/par-reduction-2.c20
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c8
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-reduction.c72
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/pr70289.c13
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/pr70373.c20
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-1.c145
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-2.c163
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-3.c133
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-4.c85
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-5.c57
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-6.c36
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/reduction.h43
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/parallel-reduction.f9047
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/pr70289.f9020
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/reduction-1.f90470
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/reduction-2.f90344
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/reduction-3.f90342
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/reduction-4.f90108
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/reduction-5.f9086
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/reduction-6.f9086
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/reduction-7.f9088
39 files changed, 2559 insertions, 530 deletions
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gang-np-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gang-np-1.c
new file mode 100644
index 0000000..55ab3c9
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gang-np-1.c
@@ -0,0 +1,45 @@
+/* { dg-additional-options "-w" } */
+
+#include <assert.h>
+
+/* Test of reduction on loop directive (gangs, non-private reduction
+ variable). */
+
+int
+main (int argc, char *argv[])
+{
+ int i, arr[1024], res = 0, hres = 0;
+
+ for (i = 0; i < 1024; i++)
+ arr[i] = i;
+
+ #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
+ copy(res)
+ {
+ #pragma acc loop gang reduction(+:res)
+ for (i = 0; i < 1024; i++)
+ res += arr[i];
+ }
+
+ for (i = 0; i < 1024; i++)
+ hres += arr[i];
+
+ assert (res == hres);
+
+ res = hres = 1;
+
+ #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
+ copy(res)
+ {
+ #pragma acc loop gang reduction(*:res)
+ for (i = 0; i < 12; i++)
+ res *= arr[i];
+ }
+
+ for (i = 0; i < 12; i++)
+ hres *= arr[i];
+
+ assert (res == hres);
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gv-np-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gv-np-1.c
new file mode 100644
index 0000000..d4341e9
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gv-np-1.c
@@ -0,0 +1,30 @@
+/* { dg-additional-options "-w" } */
+
+#include <assert.h>
+
+/* Test of reduction on loop directive (gangs and vectors, non-private
+ reduction variable). */
+
+int
+main (int argc, char *argv[])
+{
+ int i, arr[1024], res = 0, hres = 0;
+
+ for (i = 0; i < 1024; i++)
+ arr[i] = i;
+
+ #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
+ copy(res)
+ {
+ #pragma acc loop gang vector reduction(+:res)
+ for (i = 0; i < 1024; i++)
+ res += arr[i];
+ }
+
+ for (i = 0; i < 1024; i++)
+ hres += arr[i];
+
+ assert (res == hres);
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gw-np-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gw-np-1.c
new file mode 100644
index 0000000..2e5668b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gw-np-1.c
@@ -0,0 +1,30 @@
+/* { dg-additional-options "-w" } */
+
+#include <assert.h>
+
+/* Test of reduction on loop directive (gangs and workers, non-private
+ reduction variable). */
+
+int
+main (int argc, char *argv[])
+{
+ int i, arr[1024], res = 0, hres = 0;
+
+ for (i = 0; i < 1024; i++)
+ arr[i] = i;
+
+ #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
+ copy(res)
+ {
+ #pragma acc loop gang worker reduction(+:res)
+ for (i = 0; i < 1024; i++)
+ res += arr[i];
+ }
+
+ for (i = 0; i < 1024; i++)
+ hres += arr[i];
+
+ assert (res == hres);
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-1.c
new file mode 100644
index 0000000..d610373
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-1.c
@@ -0,0 +1,28 @@
+#include <assert.h>
+
+/* Test of reduction on loop directive (gangs, workers and vectors, non-private
+ reduction variable). */
+
+int
+main (int argc, char *argv[])
+{
+ int i, arr[1024], res = 0, hres = 0;
+
+ for (i = 0; i < 1024; i++)
+ arr[i] = i;
+
+ #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
+ copy(res)
+ {
+ #pragma acc loop gang worker vector reduction(+:res)
+ for (i = 0; i < 1024; i++)
+ res += arr[i];
+ }
+
+ for (i = 0; i < 1024; i++)
+ hres += arr[i];
+
+ assert (res == hres);
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-2.c
new file mode 100644
index 0000000..ea5c151
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-2.c
@@ -0,0 +1,34 @@
+#include <assert.h>
+
+/* Test of reduction on loop directive (gangs, workers and vectors, non-private
+ reduction variable: separate gang and worker/vector loops). */
+
+int
+main (int argc, char *argv[])
+{
+ int i, j, arr[32768], res = 0, hres = 0;
+
+ for (i = 0; i < 32768; i++)
+ arr[i] = i;
+
+ #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
+ copy(res)
+ {
+ #pragma acc loop gang reduction(+:res)
+ for (j = 0; j < 32; j++)
+ {
+ #pragma acc loop worker vector reduction(+:res)
+ for (i = 0; i < 1024; i++)
+ res += arr[j * 1024 + i];
+ }
+ /* "res" is non-private, and is not available until after the parallel
+ region. */
+ }
+
+ for (i = 0; i < 32768; i++)
+ hres += arr[i];
+
+ assert (res == hres);
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-3.c
new file mode 100644
index 0000000..0056f3c
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-3.c
@@ -0,0 +1,33 @@
+#include <assert.h>
+
+/* Test of reduction on loop directive (gangs, workers and vectors, non-private
+ reduction variable: separate gang and worker/vector loops). */
+
+int
+main (int argc, char *argv[])
+{
+ int i, j;
+ double arr[32768], res = 0, hres = 0;
+
+ for (i = 0; i < 32768; i++)
+ arr[i] = i;
+
+ #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
+ copyin(arr) copy(res)
+ {
+ #pragma acc loop gang reduction(+:res)
+ for (j = 0; j < 32; j++)
+ {
+ #pragma acc loop worker vector reduction(+:res)
+ for (i = 0; i < 1024; i++)
+ res += arr[j * 1024 + i];
+ }
+ }
+
+ for (i = 0; i < 32768; i++)
+ hres += arr[i];
+
+ assert (res == hres);
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-4.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-4.c
new file mode 100644
index 0000000..e69d0ec
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-4.c
@@ -0,0 +1,55 @@
+#include <assert.h>
+
+/* Test of reduction on loop directive (gangs, workers and vectors, multiple
+ non-private reduction variables, float type). */
+
+int
+main (int argc, char *argv[])
+{
+ int i, j;
+ float arr[32768];
+ float res = 0, mres = 0, hres = 0, hmres = 0;
+
+ for (i = 0; i < 32768; i++)
+ arr[i] = i;
+
+ #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
+ copy(res, mres)
+ {
+ #pragma acc loop gang reduction(+:res) reduction(max:mres)
+ for (j = 0; j < 32; j++)
+ {
+ #pragma acc loop worker vector reduction(+:res) reduction(max:mres)
+ for (i = 0; i < 1024; i++)
+ {
+ res += arr[j * 1024 + i];
+ if (arr[j * 1024 + i] > mres)
+ mres = arr[j * 1024 + i];
+ }
+
+ #pragma acc loop worker vector reduction(+:res) reduction(max:mres)
+ for (i = 0; i < 1024; i++)
+ {
+ res += arr[j * 1024 + (1023 - i)];
+ if (arr[j * 1024 + (1023 - i)] > mres)
+ mres = arr[j * 1024 + (1023 - i)];
+ }
+ }
+ }
+
+ for (j = 0; j < 32; j++)
+ for (i = 0; i < 1024; i++)
+ {
+ hres += arr[j * 1024 + i];
+ hres += arr[j * 1024 + (1023 - i)];
+ if (arr[j * 1024 + i] > hmres)
+ hmres = arr[j * 1024 + i];
+ if (arr[j * 1024 + (1023 - i)] > hmres)
+ hmres = arr[j * 1024 + (1023 - i)];
+ }
+
+ assert (res == hres);
+ assert (mres == hmres);
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-vector-p-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-vector-p-1.c
new file mode 100644
index 0000000..31e4366
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-vector-p-1.c
@@ -0,0 +1,43 @@
+/* { dg-additional-options "-w" } */
+
+#include <assert.h>
+
+/* Test of reduction on loop directive (vectors, private reduction
+ variable). */
+
+int
+main (int argc, char *argv[])
+{
+ int i, j, arr[1024], out[32], res = 0, hres = 0;
+
+ for (i = 0; i < 1024; i++)
+ arr[i] = i;
+
+ #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
+ private(res) copyout(out)
+ {
+ #pragma acc loop gang
+ for (j = 0; j < 32; j++)
+ {
+ res = 0;
+
+ #pragma acc loop vector reduction(+:res)
+ for (i = 0; i < 32; i++)
+ res += arr[j * 32 + i];
+
+ out[j] = res;
+ }
+ }
+
+ for (j = 0; j < 32; j++)
+ {
+ hres = 0;
+
+ for (i = 0; i < 32; i++)
+ hres += arr[j * 32 + i];
+
+ assert (out[j] == hres);
+ }
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-vector-p-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-vector-p-2.c
new file mode 100644
index 0000000..15f0053
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-vector-p-2.c
@@ -0,0 +1,41 @@
+#include <assert.h>
+
+/* Test of reduction on loop directive (vector reduction in
+ gang-partitioned/worker-partitioned mode, private reduction variable). */
+
+int
+main (int argc, char *argv[])
+{
+ int i, j, k;
+ double ina[1024], inb[1024], out[1024], acc;
+
+ for (j = 0; j < 32; j++)
+ for (i = 0; i < 32; i++)
+ {
+ ina[j * 32 + i] = (i == j) ? 2.0 : 0.0;
+ inb[j * 32 + i] = (double) (i + j);
+ }
+
+ #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
+ private(acc) copyin(ina, inb) copyout(out)
+ {
+ #pragma acc loop gang worker
+ for (k = 0; k < 32; k++)
+ for (j = 0; j < 32; j++)
+ {
+ acc = 0;
+
+ #pragma acc loop vector reduction(+:acc)
+ for (i = 0; i < 32; i++)
+ acc += ina[k * 32 + i] * inb[i * 32 + j];
+
+ out[k * 32 + j] = acc;
+ }
+ }
+
+ for (j = 0; j < 32; j++)
+ for (i = 0; i < 32; i++)
+ assert (out[j * 32 + i] == (i + j) * 2);
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-worker-p-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-worker-p-1.c
new file mode 100644
index 0000000..4a92503
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-worker-p-1.c
@@ -0,0 +1,43 @@
+/* { dg-additional-options "-w" } */
+
+#include <assert.h>
+
+/* Test of reduction on loop directive (workers, private reduction
+ variable). */
+
+int
+main (int argc, char *argv[])
+{
+ int i, j, arr[1024], out[32], res = 0, hres = 0;
+
+ for (i = 0; i < 1024; i++)
+ arr[i] = i;
+
+ #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
+ private(res) copyout(out)
+ {
+ #pragma acc loop gang
+ for (j = 0; j < 32; j++)
+ {
+ res = 0;
+
+ #pragma acc loop worker reduction(+:res)
+ for (i = 0; i < 32; i++)
+ res += arr[j * 32 + i];
+
+ out[j] = res;
+ }
+ }
+
+ for (j = 0; j < 32; j++)
+ {
+ hres = 0;
+
+ for (i = 0; i < 32; i++)
+ hres += arr[j * 32 + i];
+
+ assert (out[j] == hres);
+ }
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-wv-p-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-wv-p-1.c
new file mode 100644
index 0000000..1bfb284
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-wv-p-1.c
@@ -0,0 +1,41 @@
+#include <assert.h>
+
+/* Test of reduction on loop directive (workers and vectors, private reduction
+ variable). */
+
+int
+main (int argc, char *argv[])
+{
+ int i, j, arr[1024], out[32], res = 0, hres = 0;
+
+ for (i = 0; i < 1024; i++)
+ arr[i] = i;
+
+ #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
+ private(res) copyout(out)
+ {
+ #pragma acc loop gang
+ for (j = 0; j < 32; j++)
+ {
+ res = 0;
+
+ #pragma acc loop worker vector reduction(+:res)
+ for (i = 0; i < 32; i++)
+ res += arr[j * 32 + i];
+
+ out[j] = res;
+ }
+ }
+
+ for (j = 0; j < 32; j++)
+ {
+ hres = 0;
+
+ for (i = 0; i < 32; i++)
+ hres += arr[j * 32 + i];
+
+ assert (out[j] == hres);
+ }
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-wv-p-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-wv-p-2.c
new file mode 100644
index 0000000..93ab78f
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-wv-p-2.c
@@ -0,0 +1,45 @@
+#include <assert.h>
+
+/* Test of reduction on loop directive (workers and vectors, private reduction
+ variable). */
+
+int
+main (int argc, char *argv[])
+{
+ int i, j, arr[32768], out[32], res = 0, hres = 0;
+
+ for (i = 0; i < 32768; i++)
+ arr[i] = i;
+
+ #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
+ private(res) copyout(out)
+ {
+ #pragma acc loop gang
+ for (j = 0; j < 32; j++)
+ {
+ res = j;
+
+ #pragma acc loop worker reduction(+:res)
+ for (i = 0; i < 1024; i++)
+ res += arr[j * 1024 + i];
+
+ #pragma acc loop vector reduction(+:res)
+ for (i = 1023; i >= 0; i--)
+ res += arr[j * 1024 + i];
+
+ out[j] = res;
+ }
+ }
+
+ for (j = 0; j < 32; j++)
+ {
+ hres = j;
+
+ for (i = 0; i < 1024; i++)
+ hres += arr[j * 1024 + i] * 2;
+
+ assert (out[j] == hres);
+ }
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-wv-p-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-wv-p-3.c
new file mode 100644
index 0000000..298e25c
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-wv-p-3.c
@@ -0,0 +1,38 @@
+#include <assert.h>
+
+/* Test of reduction on loop directive (workers and vectors, private reduction
+ variable: gang-redundant mode). */
+
+int
+main (int argc, char *argv[])
+{
+ int i, arr[1024], out[32], res = 0, hres = 0;
+
+ for (i = 0; i < 1024; i++)
+ arr[i] = i ^ 33;
+
+ #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
+ private(res) copyin(arr) copyout(out)
+ {
+ /* Private variables aren't initialized by default in openacc. */
+ res = 0;
+
+ /* "res" should be available at the end of the following loop (and should
+ have the same value redundantly in each gang). */
+ #pragma acc loop worker vector reduction(+:res)
+ for (i = 0; i < 1024; i++)
+ res += arr[i];
+
+ #pragma acc loop gang (static: 1)
+ for (i = 0; i < 32; i++)
+ out[i] = res;
+ }
+
+ for (i = 0; i < 1024; i++)
+ hres += arr[i];
+
+ for (i = 0; i < 32; i++)
+ assert (out[i] == hres);
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-1.c
new file mode 100644
index 0000000..5e82e1d
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-1.c
@@ -0,0 +1,38 @@
+#include <assert.h>
+
+/* Test of reduction on both parallel and loop directives (worker and
+ vector-partitioned loops individually in gang-partitioned mode, int
+ type). */
+
+int
+main (int argc, char *argv[])
+{
+ int i, j, arr[32768], res = 0, hres = 0;
+
+ for (i = 0; i < 32768; i++)
+ arr[i] = i;
+
+ #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
+ reduction(+:res) copy(res)
+ {
+ #pragma acc loop gang
+ for (j = 0; j < 32; j++)
+ {
+ #pragma acc loop worker reduction(+:res)
+ for (i = 0; i < 1024; i++)
+ res += arr[j * 1024 + i];
+
+ #pragma acc loop vector reduction(+:res)
+ for (i = 1023; i >= 0; i--)
+ res += arr[j * 1024 + i];
+ }
+ }
+
+ for (j = 0; j < 32; j++)
+ for (i = 0; i < 1024; i++)
+ hres += arr[j * 1024 + i] * 2;
+
+ assert (res == hres);
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-2.c
new file mode 100644
index 0000000..a7a75a9
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-2.c
@@ -0,0 +1,40 @@
+#include <assert.h>
+
+/* Test of reduction on both parallel and loop directives (workers and vectors
+ in gang-partitioned mode, int type with XOR). */
+
+int
+main (int argc, char *argv[])
+{
+ int i, j, arr[32768], res = 0, hres = 0;
+
+ for (i = 0; i < 32768; i++)
+ arr[i] = i;
+
+ #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
+ reduction(^:res)
+ {
+ #pragma acc loop gang
+ for (j = 0; j < 32; j++)
+ {
+ #pragma acc loop worker vector reduction(^:res)
+ for (i = 0; i < 1024; i++)
+ res ^= arr[j * 1024 + i];
+
+ #pragma acc loop worker vector reduction(^:res)
+ for (i = 0; i < 1024; i++)
+ res ^= arr[j * 1024 + (1023 - i)];
+ }
+ }
+
+ for (j = 0; j < 32; j++)
+ for (i = 0; i < 1024; i++)
+ {
+ hres ^= arr[j * 1024 + i];
+ hres ^= arr[j * 1024 + (1023 - i)];
+ }
+
+ assert (res == hres);
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-3.c
new file mode 100644
index 0000000..8d85fed
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-3.c
@@ -0,0 +1,42 @@
+#include <assert.h>
+
+/* Test of reduction on both parallel and loop directives (workers and vectors
+ together in gang-partitioned mode, float type). */
+
+int
+main (int argc, char *argv[])
+{
+ int i, j;
+ float arr[32768];
+ float res = 0, hres = 0;
+
+ for (i = 0; i < 32768; i++)
+ arr[i] = i;
+
+ #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
+ reduction(+:res) copy(res)
+ {
+ #pragma acc loop gang
+ for (j = 0; j < 32; j++)
+ {
+ #pragma acc loop worker vector reduction(+:res)
+ for (i = 0; i < 1024; i++)
+ res += arr[j * 1024 + i];
+
+ #pragma acc loop worker vector reduction(+:res)
+ for (i = 0; i < 1024; i++)
+ res += arr[j * 1024 + (1023 - i)];
+ }
+ }
+
+ for (j = 0; j < 32; j++)
+ for (i = 0; i < 1024; i++)
+ {
+ hres += arr[j * 1024 + i];
+ hres += arr[j * 1024 + (1023 - i)];
+ }
+
+ assert (res == hres);
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-4.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-4.c
new file mode 100644
index 0000000..1904b4a
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-4.c
@@ -0,0 +1,55 @@
+#include <assert.h>
+
+/* Test of reduction on both parallel and loop directives (workers and vectors
+ together in gang-partitioned mode, float type, multiple reductions). */
+
+int
+main (int argc, char *argv[])
+{
+ int i, j;
+ float arr[32768];
+ float res = 0, mres = 0, hres = 0, hmres = 0;
+
+ for (i = 0; i < 32768; i++)
+ arr[i] = i;
+
+ #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
+ reduction(+:res) reduction(max:mres) copy(res, mres)
+ {
+ #pragma acc loop gang
+ for (j = 0; j < 32; j++)
+ {
+ #pragma acc loop worker vector reduction(+:res) reduction(max:mres)
+ for (i = 0; i < 1024; i++)
+ {
+ res += arr[j * 1024 + i];
+ if (arr[j * 1024 + i] > mres)
+ mres = arr[j * 1024 + i];
+ }
+
+ #pragma acc loop worker vector reduction(+:res) reduction(max:mres)
+ for (i = 0; i < 1024; i++)
+ {
+ res += arr[j * 1024 + (1023 - i)];
+ if (arr[j * 1024 + (1023 - i)] > mres)
+ mres = arr[j * 1024 + (1023 - i)];
+ }
+ }
+ }
+
+ for (j = 0; j < 32; j++)
+ for (i = 0; i < 1024; i++)
+ {
+ hres += arr[j * 1024 + i];
+ hres += arr[j * 1024 + (1023 - i)];
+ if (arr[j * 1024 + i] > hmres)
+ hmres = arr[j * 1024 + i];
+ if (arr[j * 1024 + (1023 - i)] > hmres)
+ hmres = arr[j * 1024 + (1023 - i)];
+ }
+
+ assert (res == hres);
+ assert (mres == hmres);
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/par-reduction-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/par-reduction-1.c
index dceac39..a88b60f 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/par-reduction-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/par-reduction-1.c
@@ -1,40 +1,54 @@
+/* { dg-additional-options "-w" } */
+
#include <assert.h>
+/* Test of reduction on parallel directive. */
+
+#define ACTUAL_GANGS 256
+
int
main (int argc, char *argv[])
{
- int res, res2 = 0;
+ int res, res1 = 0, res2 = 0;
#if defined(ACC_DEVICE_TYPE_host)
# define GANGS 1
#else
# define GANGS 256
#endif
- #pragma acc parallel num_gangs(GANGS) copy(res2)
+ #pragma acc parallel num_gangs(GANGS) num_workers(32) vector_length(32) \
+ reduction(+:res1) copy(res2, res1)
{
+ res1 += 5;
+
#pragma acc atomic
res2 += 5;
}
res = GANGS * 5;
+ assert (res == res1);
assert (res == res2);
#undef GANGS
- res = res2 = 1;
+ res = res1 = res2 = 1;
#if defined(ACC_DEVICE_TYPE_host)
# define GANGS 1
#else
# define GANGS 8
#endif
- #pragma acc parallel num_gangs(GANGS) copy(res2)
+ #pragma acc parallel num_gangs(GANGS) num_workers(32) vector_length(32) \
+ reduction(*:res1) copy(res1, res2)
{
+ res1 *= 5;
+
#pragma acc atomic
res2 *= 5;
}
for (int i = 0; i < GANGS; ++i)
res *= 5;
+ assert (res == res1);
assert (res == res2);
#undef GANGS
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/par-reduction-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/par-reduction-2.c
index bd5715c..911b76c 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/par-reduction-2.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/par-reduction-2.c
@@ -1,18 +1,25 @@
+/* { dg-additional-options "-w" } */
+
#include <assert.h>
#include <openacc.h>
+/* Test of reduction on parallel directive (with async). */
+
int
main (int argc, char *argv[])
{
- int res, res2 = 0;
+ int res, res1 = 0, res2 = 0;
#if defined(ACC_DEVICE_TYPE_host)
# define GANGS 1
#else
# define GANGS 256
#endif
- #pragma acc parallel num_gangs(GANGS) copy(res2) async(1)
+ #pragma acc parallel num_gangs(GANGS) num_workers(32) vector_length(32) \
+ reduction(+:res1) copy(res1, res2) async(1)
{
+ res1 += 5;
+
#pragma acc atomic
res2 += 5;
}
@@ -20,18 +27,22 @@ main (int argc, char *argv[])
acc_wait (1);
+ assert (res == res1);
assert (res == res2);
#undef GANGS
- res = res2 = 1;
+ res = res1 = res2 = 1;
#if defined(ACC_DEVICE_TYPE_host)
# define GANGS 1
#else
# define GANGS 8
#endif
- #pragma acc parallel num_gangs(GANGS) copy(res2) async(1)
+ #pragma acc parallel num_gangs(GANGS) num_workers(32) vector_length(32) \
+ reduction(*:res1) copy(res1, res2) async(1)
{
+ res1 *= 5;
+
#pragma acc atomic
res2 *= 5;
}
@@ -40,6 +51,7 @@ main (int argc, char *argv[])
acc_wait (1);
+ assert (res == res1);
assert (res == res2);
return 0;
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c
index 39357ce..f5766a4 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c
@@ -5,12 +5,20 @@
int main ()
{
+ int dummy[10];
+
#pragma acc parallel num_workers (2<<20) /* { dg-error "using num_workers" } */
{
+#pragma acc loop worker
+ for (int i = 0; i < 10; i++)
+ dummy[i] = i;
}
#pragma acc parallel vector_length (2<<20) /* { dg-error "using vector_length" } */
{
+#pragma acc loop vector
+ for (int i = 0; i < 10; i++)
+ dummy[i] = i;
}
return 0;
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-reduction.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-reduction.c
new file mode 100644
index 0000000..b2c60e5
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-reduction.c
@@ -0,0 +1,72 @@
+/* { dg-do run } */
+/* { dg-additional-options "-w" } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+#define N 10
+
+int
+main ()
+{
+ int s1 = 0, s2 = 0;
+ int i;
+ int dummy = 0;
+
+#pragma acc data copy (dummy)
+ {
+#pragma acc parallel num_gangs (N) reduction (+:s1) copy(s1)
+ {
+ s1++;
+ }
+ }
+
+ if (acc_get_device_type () != acc_device_nvidia)
+ {
+ if (s1 != 1)
+ abort ();
+ }
+ else
+ {
+ if (s1 != N)
+ abort ();
+ }
+
+ s1 = 0;
+ s2 = 0;
+
+#pragma acc parallel num_gangs (10) reduction (+:s1, s2) copy(s1, s2)
+ {
+ s1++;
+ s2 += N;
+ }
+
+ if (acc_get_device_type () != acc_device_nvidia)
+ {
+ if (s1 != 1)
+ abort ();
+ if (s2 != N)
+ abort ();
+ }
+ else
+ {
+ if (s1 != N)
+ abort ();
+ if (s2 != N*N)
+ abort ();
+ }
+
+ s1 = 0;
+
+#pragma acc parallel num_gangs (10) reduction (+:s1) copy(s1)
+ {
+#pragma acc loop gang reduction (+:s1)
+ for (i = 0; i < 10; i++)
+ s1++;
+ }
+
+ if (s1 != N)
+ abort ();
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr70289.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr70289.c
new file mode 100644
index 0000000..6d52222
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr70289.c
@@ -0,0 +1,13 @@
+int
+main ()
+{
+ int i;
+ static int temp;
+
+#pragma acc parallel reduction(+:temp)
+ {
+ temp++;
+ }
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr70373.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr70373.c
new file mode 100644
index 0000000..af629c8
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr70373.c
@@ -0,0 +1,20 @@
+#define N 32
+
+int
+foo (unsigned int sum)
+{
+#pragma acc parallel reduction (+:sum)
+ {
+ sum;
+ }
+
+ return sum;
+}
+
+int
+main (void)
+{
+ unsigned int sum = 0;
+ foo (sum);
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-1.c
index e557931..10eb2788 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-1.c
@@ -1,46 +1,59 @@
/* { dg-do run } */
+/* Ignore vector_length warnings for offloaded (nvptx) targets. */
+/* { dg-additional-options "-foffload=-w" } */
+
/* Integer reductions. */
#include <stdlib.h>
-#include <stdbool.h>
-
-#define vl 32
-
-#define DO_PRAGMA(x) _Pragma (#x)
-
-#define check_reduction_op(type, op, init, b) \
- { \
- type res, vres; \
- res = (init); \
- DO_PRAGMA (acc parallel vector_length (vl) copy(res)) \
-DO_PRAGMA (acc loop reduction (op:res))\
- for (i = 0; i < n; i++) \
- res = res op (b); \
- \
- vres = (init); \
- for (i = 0; i < n; i++) \
- vres = vres op (b); \
- \
- if (res != vres) \
- abort (); \
- }
+#include "reduction.h"
+
+const int ng = 8;
+const int nw = 4;
+const int vl = 32;
static void
-test_reductions_int (void)
+test_reductions (void)
{
- const int n = 1000;
+ const int n = 100;
int i;
int array[n];
for (i = 0; i < n; i++)
- array[i] = i;
-
- check_reduction_op (int, +, 0, array[i]);
- check_reduction_op (int, *, 1, array[i]);
- check_reduction_op (int, &, -1, array[i]);
- check_reduction_op (int, |, 0, array[i]);
- check_reduction_op (int, ^, 0, array[i]);
+ array[i] = i+1;
+
+ /* Gang reductions. */
+ check_reduction_op (int, +, 0, array[i], num_gangs (ng), gang);
+ check_reduction_op (int, *, 1, array[i], num_gangs (ng), gang);
+ check_reduction_op (int, &, -1, array[i], num_gangs (ng), gang);
+ check_reduction_op (int, |, 0, array[i], num_gangs (ng), gang);
+ check_reduction_op (int, ^, 0, array[i], num_gangs (ng), gang);
+
+ /* Worker reductions. */
+ check_reduction_op (int, +, 0, array[i], num_workers (nw), worker);
+ check_reduction_op (int, *, 1, array[i], num_workers (nw), worker);
+ check_reduction_op (int, &, -1, array[i], num_workers (nw), worker);
+ check_reduction_op (int, |, 0, array[i], num_workers (nw), worker);
+ check_reduction_op (int, ^, 0, array[i], num_workers (nw), worker);
+
+ /* Vector reductions. */
+ check_reduction_op (int, +, 0, array[i], vector_length (vl), vector);
+ check_reduction_op (int, *, 1, array[i], vector_length (vl), vector);
+ check_reduction_op (int, &, -1, array[i], vector_length (vl), vector);
+ check_reduction_op (int, |, 0, array[i], vector_length (vl), vector);
+ check_reduction_op (int, ^, 0, array[i], vector_length (vl), vector);
+
+ /* Combined reductions. */
+ check_reduction_op (int, +, 0, array[i], num_gangs (ng) num_workers (nw)
+ vector_length (vl), gang worker vector);
+ check_reduction_op (int, *, 1, array[i], num_gangs (ng) num_workers (nw)
+ vector_length (vl), gang worker vector);
+ check_reduction_op (int, &, -1, array[i], num_gangs (ng) num_workers (nw)
+ vector_length (vl), gang worker vector);
+ check_reduction_op (int, |, 0, array[i], num_gangs (ng) num_workers (nw)
+ vector_length (vl), gang worker vector);
+ check_reduction_op (int, ^, 0, array[i], num_gangs (ng) num_workers (nw)
+ vector_length (vl), gang worker vector);
}
static void
@@ -55,29 +68,31 @@ test_reductions_bool (void)
array[i] = i;
cmp_val = 5;
- check_reduction_op (bool, &&, true, (cmp_val > array[i]));
- check_reduction_op (bool, ||, false, (cmp_val > array[i]));
-}
-#define check_reduction_macro(type, op, init, b) \
- { \
- type res, vres; \
- res = (init); \
-DO_PRAGMA (acc parallel vector_length (vl) copy(res))\
-DO_PRAGMA (acc loop reduction (op:res))\
- for (i = 0; i < n; i++) \
- res = op (res, (b)); \
- \
- vres = (init); \
- for (i = 0; i < n; i++) \
- vres = op (vres, (b)); \
- \
- if (res != vres) \
- abort (); \
- }
-
-#define max(a, b) (((a) > (b)) ? (a) : (b))
-#define min(a, b) (((a) < (b)) ? (a) : (b))
+ /* Gang reductions. */
+ check_reduction_op (int, &&, 1, (cmp_val > array[i]), num_gangs (ng),
+ gang);
+ check_reduction_op (int, ||, 0, (cmp_val > array[i]), num_gangs (ng),
+ gang);
+
+ /* Worker reductions. */
+ check_reduction_op (int, &&, 1, (cmp_val > array[i]), num_workers (nw),
+ worker);
+ check_reduction_op (int, ||, 0, (cmp_val > array[i]), num_workers (nw),
+ worker);
+
+ /* Vector reductions. */
+ check_reduction_op (int, &&, 1, (cmp_val > array[i]), vector_length (vl),
+ vector);
+ check_reduction_op (int, ||, 0, (cmp_val > array[i]), vector_length (vl),
+ vector);
+
+ /* Combined reductions. */
+ check_reduction_op (int, &&, 1, (cmp_val > array[i]), num_gangs (ng)
+ num_workers (nw) vector_length (vl), gang worker vector);
+ check_reduction_op (int, ||, 0, (cmp_val > array[i]), num_gangs (ng)
+ num_workers (nw) vector_length (vl), gang worker vector);
+}
static void
test_reductions_minmax (void)
@@ -89,14 +104,32 @@ test_reductions_minmax (void)
for (i = 0; i < n; i++)
array[i] = i;
- check_reduction_macro (int, min, n + 1, array[i]);
- check_reduction_macro (int, max, -1, array[i]);
+ /* Gang reductions. */
+ check_reduction_macro (int, min, n + 1, array[i], num_gangs (ng), gang);
+ check_reduction_macro (int, max, -1, array[i], num_gangs (ng), gang);
+
+ /* Worker reductions. */
+ check_reduction_macro (int, min, n + 1, array[i], num_workers (nw), worker);
+ check_reduction_macro (int, max, -1, array[i], num_workers (nw), worker);
+
+ /* Vector reductions. */
+ check_reduction_macro (int, min, n + 1, array[i], vector_length (vl),
+ vector);
+ check_reduction_macro (int, max, -1, array[i], vector_length (vl), vector);
+
+ /* Combined reductions. */
+ check_reduction_macro (int, min, n + 1, array[i], num_gangs (ng)
+ num_workers (nw) vector_length (vl), gang worker
+ vector);
+ check_reduction_macro (int, max, -1, array[i], num_gangs (ng)
+ num_workers (nw) vector_length (vl), gang worker
+ vector);
}
int
main (void)
{
- test_reductions_int ();
+ test_reductions ();
test_reductions_bool ();
test_reductions_minmax ();
return 0;
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-2.c
index 8a0b0d6..7cb9497 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-2.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-2.c
@@ -1,124 +1,83 @@
/* { dg-do run } */
+/* Ignore vector_length warnings for offloaded (nvptx) targets. */
+/* { dg-additional-options "-foffload=-w" } */
+
/* float reductions. */
#include <stdlib.h>
-#include <stdbool.h>
-#include <math.h>
+#include "reduction.h"
-#define vl 32
+const int ng = 8;
+const int nw = 4;
+const int vl = 32;
-int
-main(void)
+static void
+test_reductions (void)
{
- const int n = 1000;
+ const int n = 100;
int i;
- float vresult, result, array[n];
- bool lvresult, lresult;
+ float array[n];
for (i = 0; i < n; i++)
- array[i] = i;
-
- result = 0;
- vresult = 0;
+ array[i] = i+1;
- /* '+' reductions. */
-#pragma acc parallel vector_length (vl) copy(result)
-#pragma acc loop reduction (+:result)
- for (i = 0; i < n; i++)
- result += array[i];
+ /* Gang reductions. */
+ check_reduction_op (float, +, 0, array[i], num_gangs (ng), gang);
+ check_reduction_op (float, *, 1, array[i], num_gangs (ng), gang);
- /* Verify the reduction. */
- for (i = 0; i < n; i++)
- vresult += array[i];
-
- if (result != vresult)
- abort ();
-
- result = 0;
- vresult = 0;
-
- /* '*' reductions. */
-#pragma acc parallel vector_length (vl) copy(result)
-#pragma acc loop reduction (*:result)
- for (i = 0; i < n; i++)
- result *= array[i];
+ /* Worker reductions. */
+ check_reduction_op (float, +, 0, array[i], num_workers (nw), worker);
+ check_reduction_op (float, *, 1, array[i], num_workers (nw), worker);
- /* Verify the reduction. */
- for (i = 0; i < n; i++)
- vresult *= array[i];
-
- if (fabs(result - vresult) > .0001)
- abort ();
- result = 0;
- vresult = 0;
-
- /* 'max' reductions. */
-#pragma acc parallel vector_length (vl) copy(result)
-#pragma acc loop reduction (max:result)
- for (i = 0; i < n; i++)
- result = result > array[i] ? result : array[i];
-
- /* Verify the reduction. */
- for (i = 0; i < n; i++)
- vresult = vresult > array[i] ? vresult : array[i];
-
- if (result != vresult)
- abort ();
-
- result = 0;
- vresult = 0;
-
- /* 'min' reductions. */
-#pragma acc parallel vector_length (vl) copy(result)
-#pragma acc loop reduction (min:result)
- for (i = 0; i < n; i++)
- result = result < array[i] ? result : array[i];
+ /* Vector reductions. */
+ check_reduction_op (float, +, 0, array[i], vector_length (vl), vector);
+ check_reduction_op (float, *, 1, array[i], vector_length (vl), vector);
- /* Verify the reduction. */
- for (i = 0; i < n; i++)
- vresult = vresult < array[i] ? vresult : array[i];
-
- if (result != vresult)
- abort ();
-
- result = 5;
- vresult = 5;
-
- lresult = false;
- lvresult = false;
-
- /* '&&' reductions. */
-#pragma acc parallel vector_length (vl) copy(lresult)
-#pragma acc loop reduction (&&:lresult)
- for (i = 0; i < n; i++)
- lresult = lresult && (result > array[i]);
-
- /* Verify the reduction. */
- for (i = 0; i < n; i++)
- lvresult = lresult && (result > array[i]);
-
- if (lresult != lvresult)
- abort ();
-
- result = 5;
- vresult = 5;
-
- lresult = false;
- lvresult = false;
+ /* Combined reductions. */
+ check_reduction_op (float, +, 0, array[i], num_gangs (ng) num_workers (nw)
+ vector_length (vl), gang worker vector);
+ check_reduction_op (float, *, 1, array[i], num_gangs (ng) num_workers (nw)
+ vector_length (vl), gang worker vector);
+}
- /* '||' reductions. */
-#pragma acc parallel vector_length (vl) copy(lresult)
-#pragma acc loop reduction (||:lresult)
- for (i = 0; i < n; i++)
- lresult = lresult || (result > array[i]);
+static void
+test_reductions_minmax (void)
+{
+ const int n = 1000;
+ int i;
+ float array[n];
- /* Verify the reduction. */
for (i = 0; i < n; i++)
- lvresult = lresult || (result > array[i]);
+ array[i] = i;
- if (lresult != lvresult)
- abort ();
+ /* Gang reductions. */
+ check_reduction_macro (float, min, n + 1, array[i], num_gangs (ng), gang);
+ check_reduction_macro (float, max, -1, array[i], num_gangs (ng), gang);
+
+ /* Worker reductions. */
+ check_reduction_macro (float, min, n + 1, array[i], num_workers (nw),
+ worker);
+ check_reduction_macro (float, max, -1, array[i], num_workers (nw), worker);
+
+ /* Vector reductions. */
+ check_reduction_macro (float, min, n + 1, array[i], vector_length (vl),
+ vector);
+ check_reduction_macro (float, max, -1, array[i], vector_length (vl), vector);
+
+ /* Combined reductions. */
+ check_reduction_macro (float, min, n + 1, array[i], num_gangs (ng)
+ num_workers (nw) vector_length (vl), gang worker
+ vector);
+ check_reduction_macro (float, max, -1, array[i], num_gangs (ng)
+ num_workers (nw)vector_length (vl), gang worker
+ vector);
+}
+int
+main (void)
+{
+ test_reductions ();
+ test_reductions_minmax ();
return 0;
}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-3.c
index a233e29..1b948be 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-3.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-3.c
@@ -1,93 +1,84 @@
/* { dg-do run } */
+/* Ignore vector_length warnings for offloaded (nvptx) targets. */
+/* { dg-additional-options "-foffload=-w" } */
+
/* double reductions. */
#include <stdlib.h>
-#include <stdbool.h>
-#include <math.h>
+#include "reduction.h"
-#define vl 32
+const int ng = 8;
+const int nw = 4;
+const int vl = 32;
-int
-main(void)
+static void
+test_reductions (void)
{
- const int n = 1000;
+ const int n = 10;
int i;
- double vresult, result, array[n];
- bool lvresult, lresult;
-
- for (i = 0; i < n; i++)
- array[i] = i;
-
- result = 0;
- vresult = 0;
-
- /* 'max' reductions. */
-#pragma acc parallel vector_length (vl) copy(result)
-#pragma acc loop reduction (max:result)
- for (i = 0; i < n; i++)
- result = result > array[i] ? result : array[i];
-
- /* Verify the reduction. */
- for (i = 0; i < n; i++)
- vresult = vresult > array[i] ? vresult : array[i];
-
- if (result != vresult)
- abort ();
-
- result = 0;
- vresult = 0;
-
- /* 'min' reductions. */
-#pragma acc parallel vector_length (vl) copy(result)
-#pragma acc loop reduction (min:result)
- for (i = 0; i < n; i++)
- result = result < array[i] ? result : array[i];
+ double array[n];
- /* Verify the reduction. */
for (i = 0; i < n; i++)
- vresult = vresult < array[i] ? vresult : array[i];
+ array[i] = i+1;
- if (result != vresult)
- abort ();
+ /* Gang reductions. */
+ check_reduction_op (double, +, 0, array[i], num_gangs (ng), gang);
+ check_reduction_op (double, *, 1, array[i], num_gangs (ng), gang);
- result = 5;
- vresult = 5;
+ /* Worker reductions. */
+ check_reduction_op (double, +, 0, array[i], num_workers (nw), worker);
+ check_reduction_op (double, *, 1, array[i], num_workers (nw), worker);
- lresult = false;
- lvresult = false;
+ /* Vector reductions. */
+ check_reduction_op (double, +, 0, array[i], vector_length (vl), vector);
+ check_reduction_op (double, *, 1, array[i], vector_length (vl), vector);
- /* '&&' reductions. */
-#pragma acc parallel vector_length (vl) copy(lresult)
-#pragma acc loop reduction (&&:lresult)
- for (i = 0; i < n; i++)
- lresult = lresult && (result > array[i]);
-
- /* Verify the reduction. */
- for (i = 0; i < n; i++)
- lvresult = lresult && (result > array[i]);
-
- if (lresult != lvresult)
- abort ();
-
- result = 5;
- vresult = 5;
-
- lresult = false;
- lvresult = false;
+ /* Combined reductions. */
+ check_reduction_op (double, +, 0, array[i], num_gangs (ng) num_workers (nw)
+ vector_length (vl), gang worker vector);
+ check_reduction_op (double, *, 1, array[i], num_gangs (ng) num_workers (nw)
+ vector_length (vl), gang worker vector);
+}
- /* '||' reductions. */
-#pragma acc parallel vector_length (vl) copy(lresult)
-#pragma acc loop reduction (||:lresult)
- for (i = 0; i < n; i++)
- lresult = lresult || (result > array[i]);
+static void
+test_reductions_minmax (void)
+{
+ const int n = 1000;
+ int i;
+ double array[n];
- /* Verify the reduction. */
for (i = 0; i < n; i++)
- lvresult = lresult || (result > array[i]);
+ array[i] = i;
- if (lresult != lvresult)
- abort ();
+ /* Gang reductions. */
+ check_reduction_macro (double, min, n + 1, array[i], num_gangs (ng), gang);
+ check_reduction_macro (double, max, -1, array[i], num_gangs (ng), gang);
+
+ /* Worker reductions. */
+ check_reduction_macro (double, min, n + 1, array[i], num_workers (nw),
+ worker);
+ check_reduction_macro (double, max, -1, array[i], num_workers (nw), worker);
+
+ /* Vector reductions. */
+ check_reduction_macro (double, min, n + 1, array[i], vector_length (vl),
+ vector);
+ check_reduction_macro (double, max, -1, array[i], vector_length (vl),
+ vector);
+
+ /* Combined reductions. */
+ check_reduction_macro (double, min, n + 1, array[i], num_gangs (ng)
+ num_workers (nw) vector_length (vl), gang worker
+ vector);
+ check_reduction_macro (double, max, -1, array[i], num_gangs (ng)
+ num_workers (nw) vector_length (vl), gang worker
+ vector);
+}
+int
+main (void)
+{
+ test_reductions ();
+ test_reductions_minmax ();
return 0;
}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-4.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-4.c
index 59d49c1..79355ed 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-4.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-4.c
@@ -1,59 +1,56 @@
/* { dg-do run { target { ! { hppa*-*-hpux* } } } } */
+/* Ignore vector_length warnings for offloaded (nvptx) targets. */
+/* { dg-additional-options "-foffload=-w" } */
+
/* complex reductions. */
#include <stdlib.h>
-#include <stdbool.h>
-#include <math.h>
#include <complex.h>
+#include "reduction.h"
-#define vl 32
+const int ng = 8;
+const int nw = 4;
+const int vl = 32;
-int
-main(void)
+static void
+test_reductions (void)
{
- const int n = 1000;
+ const int n = 10;
int i;
- double _Complex vresult, result, array[n];
- bool lvresult, lresult;
-
- for (i = 0; i < n; i++)
- array[i] = i;
-
- result = 0;
- vresult = 0;
-
- /* '&&' reductions. */
-#pragma acc parallel vector_length (vl) copy(lresult)
-#pragma acc loop reduction (&&:lresult)
- for (i = 0; i < n; i++)
- lresult = lresult && (creal(result) > creal(array[i]));
-
- /* Verify the reduction. */
- for (i = 0; i < n; i++)
- lvresult = lresult && (creal(result) > creal(array[i]));
+ double _Complex array[n];
- if (lresult != lvresult)
- abort ();
-
- result = 5;
- vresult = 5;
-
- lresult = false;
- lvresult = false;
-
- /* '||' reductions. */
-#pragma acc parallel vector_length (vl) copy(lresult)
-#pragma acc loop reduction (||:lresult)
- for (i = 0; i < n; i++)
- lresult = lresult || (creal(result) > creal(array[i]));
-
- /* Verify the reduction. */
for (i = 0; i < n; i++)
- lvresult = lresult || (creal(result) > creal(array[i]));
-
- if (lresult != lvresult)
- abort ();
+ array[i] = i+1;
+
+ /* Gang reductions. */
+ check_reduction_op (double, +, 0, creal (array[i]), num_gangs (ng), gang);
+ check_reduction_op (double, *, 1, creal (array[i]), num_gangs (ng), gang);
+
+ /* Worker reductions. */
+ check_reduction_op (double, +, 0, creal (array[i]), num_workers (nw),
+ worker);
+ check_reduction_op (double, *, 1, creal (array[i]), num_workers (nw),
+ worker);
+
+ /* Vector reductions. */
+ check_reduction_op (double, +, 0, creal (array[i]), vector_length (vl),
+ vector);
+ check_reduction_op (double, *, 1, creal (array[i]), vector_length (vl),
+ vector);
+
+ /* Combined reductions. */
+ check_reduction_op (double, +, 0, creal (array[i]), num_gangs (ng)
+ num_workers (nw) vector_length (vl), gang worker
+ vector);
+ check_reduction_op (double, *, 1, creal (array[i]), num_gangs (ng)
+ num_workers (nw) vector_length (vl), gang worker
+ vector);
+}
+int
+main (void)
+{
+ test_reductions ();
return 0;
}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-5.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-5.c
index efe8702..46b553a 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-5.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-5.c
@@ -1,32 +1,57 @@
+/* { dg-do run } */
+/* { dg-additional-options "-w" } */
+
+/* Ignore vector_length warnings for offloaded (nvptx) targets. */
+/* { dg-additional-options "-foffload=-w" } */
+
+/* Multiple reductions. */
+
#include <stdio.h>
#include <stdlib.h>
+const int ng = 8;
+const int nw = 4;
+const int vl = 32;
+
+const int n = 100;
+
+#define DO_PRAGMA(x) _Pragma (#x)
+
+#define check_reduction(gwv_par, gwv_loop) \
+ { \
+ s1 = 2; s2 = 5; \
+DO_PRAGMA (acc parallel gwv_par copy (s1, s2)) \
+DO_PRAGMA (acc loop gwv_loop reduction (+:s1, s2)) \
+ for (i = 0; i < n; i++) \
+ { \
+ s1 = s1 + 3; \
+ s2 = s2 + 5; \
+ } \
+ \
+ if (s1 != v1 && s2 != v2) \
+ abort (); \
+ }
+
int
main (void)
{
int s1 = 2, s2 = 5, v1 = 2, v2 = 5;
- int n = 100;
int i;
-#pragma acc parallel vector_length (32) copy(s1,s2)
-#pragma acc loop reduction (+:s1, s2)
- for (i = 0; i < n; i++)
- {
- s1 = s1 + 3;
- s2 = s2 + 2;
- }
-
for (i = 0; i < n; i++)
{
v1 = v1 + 3;
v2 = v2 + 2;
}
-
- if (s1 != v1)
- abort ();
-
- if (s2 != v2)
- abort ();
-
+
+ check_reduction (num_gangs (ng), gang);
+
+ /* Nvptx targets require a vector_length or 32 in to allow spinlocks with
+ gangs. */
+ check_reduction (num_workers (nw) vector_length (vl), worker);
+ check_reduction (vector_length (vl), vector);
+ check_reduction (num_gangs (ng) num_workers (nw) vector_length (vl), gang
+ worker vector);
+
return 0;
}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-6.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-6.c
new file mode 100644
index 0000000..af30b31
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-6.c
@@ -0,0 +1,36 @@
+/* { dg-do run } */
+/* { dg-additional-options "-w" } */
+
+/* Test reductions on explicitly private variables. */
+
+#include <assert.h>
+
+int
+main ()
+{
+ int i, j, red[10];
+ int v;
+
+ for (i = 0; i < 10; i++)
+ red[i] = -1;
+
+#pragma acc parallel copyout(red)
+ {
+#pragma acc loop gang private(v)
+ for (j = 0; j < 10; j++)
+ {
+ v = j;
+
+#pragma acc loop vector reduction (+:v)
+ for (i = 0; i < 100; i++)
+ v++;
+
+ red[j] = v;
+ }
+ }
+
+ for (i = 0; i < 10; i++)
+ assert (red[i] == i + 100);
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction.h b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction.h
new file mode 100644
index 0000000..1b3f8d4
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction.h
@@ -0,0 +1,43 @@
+#ifndef REDUCTION_H
+#define REDUCTION_H
+
+#define DO_PRAGMA(x) _Pragma (#x)
+
+#define check_reduction_op(type, op, init, b, gwv_par, gwv_loop) \
+ { \
+ type res, vres; \
+ res = (init); \
+DO_PRAGMA (acc parallel gwv_par copy (res)) \
+DO_PRAGMA (acc loop gwv_loop reduction (op:res)) \
+ for (i = 0; i < n; i++) \
+ res = res op (b); \
+ \
+ vres = (init); \
+ for (i = 0; i < n; i++) \
+ vres = vres op (b); \
+ \
+ if (res != vres) \
+ abort (); \
+ }
+
+#define check_reduction_macro(type, op, init, b, gwv_par, gwv_loop) \
+ { \
+ type res, vres; \
+ res = (init); \
+ DO_PRAGMA (acc parallel gwv_par copy(res)) \
+DO_PRAGMA (acc loop gwv_loop reduction (op:res)) \
+ for (i = 0; i < n; i++) \
+ res = op (res, (b)); \
+ \
+ vres = (init); \
+ for (i = 0; i < n; i++) \
+ vres = op (vres, (b)); \
+ \
+ if (res != vres) \
+ abort (); \
+ }
+
+#define max(a, b) (((a) > (b)) ? (a) : (b))
+#define min(a, b) (((a) < (b)) ? (a) : (b))
+
+#endif
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/parallel-reduction.f90 b/libgomp/testsuite/libgomp.oacc-fortran/parallel-reduction.f90
new file mode 100644
index 0000000..31db7e1
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/parallel-reduction.f90
@@ -0,0 +1,47 @@
+! { dg-do run }
+! { dg-additional-options "-w" }
+
+program reduction
+ implicit none
+ integer, parameter :: n = 10
+ integer s1, s2
+ include "openacc_lib.h"
+
+ s1 = 0
+ s2 = 0
+
+ !$acc parallel reduction(+:s1,s2) num_gangs (n) copy(s1)
+ s1 = s1 + 1
+ s2 = s2 + 1
+ !$acc end parallel
+
+ if (acc_get_device_type () .eq. acc_device_nvidia) then
+ if (s1 .ne. n) call abort
+ if (s2 .ne. n) call abort
+ else
+ if (s1 .ne. 1) call abort
+ if (s2 .ne. 1) call abort
+ end if
+
+ ! Test reductions inside subroutines
+
+ s1 = 0
+ s2 = 0
+ call redsub (s1, s2, n)
+
+ if (acc_get_device_type () .eq. acc_device_nvidia) then
+ if (s1 .ne. n) call abort
+ else
+ if (s2 .ne. 1) call abort
+ end if
+end program reduction
+
+subroutine redsub(s1, s2, n)
+ implicit none
+ integer :: s1, s2, n
+
+ !$acc parallel reduction(+:s1,s2) num_gangs (10) copy(s1)
+ s1 = s1 + 1
+ s2 = s2 + 1
+ !$acc end parallel
+end subroutine redsub
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/pr70289.f90 b/libgomp/testsuite/libgomp.oacc-fortran/pr70289.f90
new file mode 100644
index 0000000..63bde44
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/pr70289.f90
@@ -0,0 +1,20 @@
+program foo
+ implicit none
+ integer :: i
+ integer :: temp = 0
+ integer :: temp2 = 0
+
+ !$acc parallel
+ !$acc loop gang private(temp)
+ do i=1, 10000
+ temp = 0
+ enddo
+ !$acc end parallel
+
+ !$acc parallel reduction(+:temp2)
+ !$acc loop gang reduction(+:temp2)
+ do i=1, 10000
+ temp2 = 0
+ enddo
+ !$acc end parallel
+end program foo
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/reduction-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/reduction-1.f90
index db0a52d..e51509f 100644
--- a/libgomp/testsuite/libgomp.oacc-fortran/reduction-1.f90
+++ b/libgomp/testsuite/libgomp.oacc-fortran/reduction-1.f90
@@ -1,28 +1,55 @@
! { dg-do run }
+! { dg-additional-options "-w" }
! Integer reductions
program reduction_1
implicit none
- integer, parameter :: n = 10, vl = 32
- integer :: i, vresult, result
- logical :: lresult, lvresult
+ integer, parameter :: n = 10, ng = 8, nw = 4, vl = 32
+ integer :: i, vresult, rg, rw, rv, rc
+ logical :: lrg, lrw, lrv, lrc, lvresult
integer, dimension (n) :: array
do i = 1, n
array(i) = i
end do
- result = 0
+ !
+ ! '+' reductions
+ !
+
+ rg = 0
+ rw = 0
+ rv = 0
+ rc = 0
vresult = 0
- ! '+' reductions
+ !$acc parallel num_gangs(ng) copy(rg)
+ !$acc loop reduction(+:rg) gang
+ do i = 1, n
+ rg = rg + array(i)
+ end do
+ !$acc end parallel
- !$acc parallel vector_length(vl) num_gangs(1) copy(result)
- !$acc loop reduction(+:result)
+ !$acc parallel num_workers(nw) copy(rw)
+ !$acc loop reduction(+:rw) worker
do i = 1, n
- result = result + array(i)
+ rw = rw + array(i)
+ end do
+ !$acc end parallel
+
+ !$acc parallel vector_length(vl) copy(rv)
+ !$acc loop reduction(+:rv) vector
+ do i = 1, n
+ rv = rv + array(i)
+ end do
+ !$acc end parallel
+
+ !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc)
+ !$acc loop reduction(+:rc) gang worker vector
+ do i = 1, n
+ rc = rc + array(i)
end do
!$acc end parallel
@@ -31,17 +58,46 @@ program reduction_1
vresult = vresult + array(i)
end do
- if (result.ne.vresult) call abort
-
- result = 0
- vresult = 0
+ if (rg .ne. vresult) call abort
+ if (rw .ne. vresult) call abort
+ if (rv .ne. vresult) call abort
+ if (rc .ne. vresult) call abort
+ !
! '*' reductions
+ !
- !$acc parallel vector_length(vl) num_gangs(1) copy(result)
- !$acc loop reduction(*:result)
+ rg = 1
+ rw = 1
+ rv = 1
+ rc = 1
+ vresult = 1
+
+ !$acc parallel num_gangs(ng) copy(rg)
+ !$acc loop reduction(*:rg) gang
do i = 1, n
- result = result * array(i)
+ rg = rg * array(i)
+ end do
+ !$acc end parallel
+
+ !$acc parallel num_workers(nw) copy(rw)
+ !$acc loop reduction(*:rw) worker
+ do i = 1, n
+ rw = rw * array(i)
+ end do
+ !$acc end parallel
+
+ !$acc parallel vector_length(vl) copy(rv)
+ !$acc loop reduction(*:rv) vector
+ do i = 1, n
+ rv = rv * array(i)
+ end do
+ !$acc end parallel
+
+ !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc)
+ !$acc loop reduction(*:rc) gang worker vector
+ do i = 1, n
+ rc = rc * array(i)
end do
!$acc end parallel
@@ -50,17 +106,46 @@ program reduction_1
vresult = vresult * array(i)
end do
- if (result.ne.vresult) call abort
+ if (rg .ne. vresult) call abort
+ if (rw .ne. vresult) call abort
+ if (rv .ne. vresult) call abort
+ if (rc .ne. vresult) call abort
+
+ !
+ ! 'max' reductions
+ !
- result = 0
+ rg = 0
+ rw = 0
+ rv = 0
+ rc = 0
vresult = 0
- ! 'max' reductions
+ !$acc parallel num_gangs(ng) copy(rg)
+ !$acc loop reduction(max:rg) gang
+ do i = 1, n
+ rg = max (rg, array(i))
+ end do
+ !$acc end parallel
+
+ !$acc parallel num_workers(nw) copy(rw)
+ !$acc loop reduction(max:rw) worker
+ do i = 1, n
+ rw = max (rw, array(i))
+ end do
+ !$acc end parallel
+
+ !$acc parallel vector_length(vl) copy(rv)
+ !$acc loop reduction(max:rv) vector
+ do i = 1, n
+ rv = max (rv, array(i))
+ end do
+ !$acc end parallel
- !$acc parallel vector_length(vl) num_gangs(1) copy(result)
- !$acc loop reduction(max:result)
+ !$acc parallel num_gangs(ng) Num_workers(nw) vector_length(vl) copy(rc)
+ !$acc loop reduction(max:rc) gang worker vector
do i = 1, n
- result = max (result, array(i))
+ rc = max (rc, array(i))
end do
!$acc end parallel
@@ -69,17 +154,46 @@ program reduction_1
vresult = max (vresult, array(i))
end do
- if (result.ne.vresult) call abort
-
- result = 1
- vresult = 1
+ if (rg .ne. vresult) call abort
+ if (rw .ne. vresult) call abort
+ if (rv .ne. vresult) call abort
+ if (rc .ne. vresult) call abort
+ !
! 'min' reductions
+ !
+
+ rg = 0
+ rw = 0
+ rv = 0
+ rc = 0
+ vresult = 0
+
+ !$acc parallel num_gangs(ng) copy(rg)
+ !$acc loop reduction(min:rg) gang
+ do i = 1, n
+ rg = min (rg, array(i))
+ end do
+ !$acc end parallel
- !$acc parallel vector_length(vl) num_gangs(1) copy(result)
- !$acc loop reduction(min:result)
+ !$acc parallel num_workers(nw) copy(rw)
+ !$acc loop reduction(min:rw) worker
do i = 1, n
- result = min (result, array(i))
+ rw = min (rw, array(i))
+ end do
+ !$acc end parallel
+
+ !$acc parallel vector_length(vl) copy(rv)
+ !$acc loop reduction(min:rv) vector
+ do i = 1, n
+ rv = min (rv, array(i))
+ end do
+ !$acc end parallel
+
+ !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc)
+ !$acc loop reduction(min:rc) gang worker vector
+ do i = 1, n
+ rc = min (rc, array(i))
end do
!$acc end parallel
@@ -88,17 +202,46 @@ program reduction_1
vresult = min (vresult, array(i))
end do
- if (result.ne.vresult) call abort
+ if (rg .ne. vresult) call abort
+ if (rw .ne. vresult) call abort
+ if (rv .ne. vresult) call abort
+ if (rc .ne. vresult) call abort
+
+ !
+ ! 'iand' reductions
+ !
- result = 1
+ rg = 1
+ rw = 1
+ rv = 1
+ rc = 1
vresult = 1
- ! 'iand' reductions
+ !$acc parallel num_gangs(ng) copy(rg)
+ !$acc loop reduction(iand:rg) gang
+ do i = 1, n
+ rg = iand (rg, array(i))
+ end do
+ !$acc end parallel
+
+ !$acc parallel num_workers(nw) copy(rw)
+ !$acc loop reduction(iand:rw) worker
+ do i = 1, n
+ rw = iand (rw, array(i))
+ end do
+ !$acc end parallel
+
+ !$acc parallel vector_length(vl) copy(rv)
+ !$acc loop reduction(iand:rv) vector
+ do i = 1, n
+ rv = iand (rv, array(i))
+ end do
+ !$acc end parallel
- !$acc parallel vector_length(vl) num_gangs(1) copy(result)
- !$acc loop reduction(iand:result)
+ !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc)
+ !$acc loop reduction(iand:rc) gang worker vector
do i = 1, n
- result = iand (result, array(i))
+ rc = iand (rc, array(i))
end do
!$acc end parallel
@@ -107,17 +250,46 @@ program reduction_1
vresult = iand (vresult, array(i))
end do
- if (result.ne.vresult) call abort
-
- result = 1
- vresult = 1
+ if (rg .ne. vresult) call abort
+ if (rw .ne. vresult) call abort
+ if (rv .ne. vresult) call abort
+ if (rc .ne. vresult) call abort
+ !
! 'ior' reductions
+ !
+
+ rg = 0
+ rw = 0
+ rv = 0
+ rc = 0
+ vresult = 0
+
+ !$acc parallel num_gangs(ng) copy(rg)
+ !$acc loop reduction(ior:rg) gang
+ do i = 1, n
+ rg = ior (rg, array(i))
+ end do
+ !$acc end parallel
- !$acc parallel vector_length(vl) num_gangs(1) copy(result)
- !$acc loop reduction(ior:result)
+ !$acc parallel num_workers(nw) copy(rw)
+ !$acc loop reduction(ior:rw) worker
do i = 1, n
- result = ior (result, array(i))
+ rw = ior (rw, array(i))
+ end do
+ !$acc end parallel
+
+ !$acc parallel vector_length(vl) copy(rv)
+ !$acc loop reduction(ior:rv) gang
+ do i = 1, n
+ rv = ior (rv, array(i))
+ end do
+ !$acc end parallel
+
+ !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc)
+ !$acc loop reduction(ior:rc) gang worker vector
+ do i = 1, n
+ rc = ior (rc, array(i))
end do
!$acc end parallel
@@ -126,17 +298,46 @@ program reduction_1
vresult = ior (vresult, array(i))
end do
- if (result.ne.vresult) call abort
+ if (rg .ne. vresult) call abort
+ if (rw .ne. vresult) call abort
+ if (rv .ne. vresult) call abort
+ if (rc .ne. vresult) call abort
- result = 0
+ !
+ ! 'ieor' reductions
+ !
+
+ rg = 0
+ rw = 0
+ rv = 0
+ rc = 0
vresult = 0
- ! 'ieor' reductions
+ !$acc parallel num_gangs(ng) copy(rg)
+ !$acc loop reduction(ieor:rg) gang
+ do i = 1, n
+ rg = ieor (rg, array(i))
+ end do
+ !$acc end parallel
+
+ !$acc parallel num_workers(nw) copy(rw)
+ !$acc loop reduction(ieor:rw) worker
+ do i = 1, n
+ rw = ieor (rw, array(i))
+ end do
+ !$acc end parallel
- !$acc parallel vector_length(vl) num_gangs(1) copy(result)
- !$acc loop reduction(ieor:result)
+ !$acc parallel vector_length(vl) copy(rv)
+ !$acc loop reduction(ieor:rv) vector
do i = 1, n
- result = ieor (result, array(i))
+ rv = ieor (rv, array(i))
+ end do
+ !$acc end parallel
+
+ !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc)
+ !$acc loop reduction(ieor:rc) gang worker vector
+ do i = 1, n
+ rc = ieor (rc, array(i))
end do
!$acc end parallel
@@ -145,17 +346,46 @@ program reduction_1
vresult = ieor (vresult, array(i))
end do
- if (result.ne.vresult) call abort
-
- lresult = .false.
- lvresult = .false.
+ if (rg .ne. vresult) call abort
+ if (rw .ne. vresult) call abort
+ if (rv .ne. vresult) call abort
+ if (rc .ne. vresult) call abort
+ !
! '.and.' reductions
+ !
+
+ lrg = .true.
+ lrw = .true.
+ lrv = .true.
+ lrc = .true.
+ lvresult = .true.
+
+ !$acc parallel num_gangs(ng) copy(lrg)
+ !$acc loop reduction(.and.:lrg) gang
+ do i = 1, n
+ lrg = lrg .and. (array(i) .ge. 5)
+ end do
+ !$acc end parallel
+
+ !$acc parallel num_workers(nw) copy(lrw)
+ !$acc loop reduction(.and.:lrw) worker
+ do i = 1, n
+ lrw = lrw .and. (array(i) .ge. 5)
+ end do
+ !$acc end parallel
+
+ !$acc parallel vector_length(vl) copy(lrv)
+ !$acc loop reduction(.and.:lrv) vector
+ do i = 1, n
+ lrv = lrv .and. (array(i) .ge. 5)
+ end do
+ !$acc end parallel
- !$acc parallel vector_length(vl) num_gangs(1) copy(lresult)
- !$acc loop reduction(.and.:lresult)
+ !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(lrc)
+ !$acc loop reduction(.and.:lrc) gang worker vector
do i = 1, n
- lresult = lresult .and. (array(i) .ge. 5)
+ lrc = lrc .and. (array(i) .ge. 5)
end do
!$acc end parallel
@@ -164,17 +394,46 @@ program reduction_1
lvresult = lvresult .and. (array(i) .ge. 5)
end do
- if (result.ne.vresult) call abort
+ if (lrg .neqv. lvresult) call abort
+ if (lrw .neqv. lvresult) call abort
+ if (lrv .neqv. lvresult) call abort
+ if (lrc .neqv. lvresult) call abort
+
+ !
+ ! '.or.' reductions
+ !
- lresult = .false.
+ lrg = .true.
+ lrw = .true.
+ lrv = .true.
+ lrc = .true.
lvresult = .false.
- ! '.or.' reductions
+ !$acc parallel num_gangs(ng) copy(lrg)
+ !$acc loop reduction(.or.:lrg) gang
+ do i = 1, n
+ lrg = lrg .or. (array(i) .ge. 5)
+ end do
+ !$acc end parallel
+
+ !$acc parallel num_workers(nw) copy(lrw)
+ !$acc loop reduction(.or.:lrw) worker
+ do i = 1, n
+ lrw = lrw .or. (array(i) .ge. 5)
+ end do
+ !$acc end parallel
+
+ !$acc parallel vector_length(vl) copy(lrv)
+ !$acc loop reduction(.or.:lrv) vector
+ do i = 1, n
+ lrv = lrv .or. (array(i) .ge. 5)
+ end do
+ !$acc end parallel
- !$acc parallel vector_length(vl) num_gangs(1) copy(lresult)
- !$acc loop reduction(.or.:lresult)
+ !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(lrc)
+ !$acc loop reduction(.or.:lrc) gang worker vector
do i = 1, n
- lresult = lresult .or. (array(i) .ge. 5)
+ lrc = lrc .or. (array(i) .ge. 5)
end do
!$acc end parallel
@@ -183,17 +442,46 @@ program reduction_1
lvresult = lvresult .or. (array(i) .ge. 5)
end do
- if (result.ne.vresult) call abort
-
- lresult = .false.
- lvresult = .false.
+ if (lrg .neqv. lvresult) call abort
+ if (lrw .neqv. lvresult) call abort
+ if (lrv .neqv. lvresult) call abort
+ if (lrc .neqv. lvresult) call abort
+ !
! '.eqv.' reductions
+ !
+
+ lrg = .true.
+ lrw = .true.
+ lrv = .true.
+ lrc = .true.
+ lvresult = .true.
+
+ !$acc parallel num_gangs(ng) copy(lrg)
+ !$acc loop reduction(.eqv.:lrg) gang
+ do i = 1, n
+ lrg = lrg .eqv. (array(i) .ge. 5)
+ end do
+ !$acc end parallel
+
+ !$acc parallel num_workers(nw) copy(lrw)
+ !$acc loop reduction(.eqv.:lrw) worker
+ do i = 1, n
+ lrw = lrw .eqv. (array(i) .ge. 5)
+ end do
+ !$acc end parallel
+
+ !$acc parallel vector_length(vl) copy(lrv)
+ !$acc loop reduction(.eqv.:lrv) vector
+ do i = 1, n
+ lrv = lrv .eqv. (array(i) .ge. 5)
+ end do
+ !$acc end parallel
- !$acc parallel vector_length(vl) num_gangs(1) copy(lresult)
- !$acc loop reduction(.eqv.:lresult)
+ !$acc parallel num_workers(nw) vector_length(vl) copy(lrc)
+ !$acc loop reduction(.eqv.:lrc) gang worker vector
do i = 1, n
- lresult = lresult .eqv. (array(i) .ge. 5)
+ lrc = lrc .eqv. (array(i) .ge. 5)
end do
!$acc end parallel
@@ -202,17 +490,46 @@ program reduction_1
lvresult = lvresult .eqv. (array(i) .ge. 5)
end do
- if (result.ne.vresult) call abort
-
- lresult = .false.
- lvresult = .false.
+ if (lrg .neqv. lvresult) call abort
+ if (lrw .neqv. lvresult) call abort
+ if (lrv .neqv. lvresult) call abort
+ if (lrc .neqv. lvresult) call abort
+ !
! '.neqv.' reductions
+ !
+
+ lrg = .true.
+ lrw = .true.
+ lrv = .true.
+ lrc = .true.
+ lvresult = .true.
+
+ !$acc parallel num_gangs(ng) copy(lrg)
+ !$acc loop reduction(.neqv.:lrg) gang
+ do i = 1, n
+ lrg = lrg .neqv. (array(i) .ge. 5)
+ end do
+ !$acc end parallel
+
+ !$acc parallel num_workers(nw) copy(lrw)
+ !$acc loop reduction(.neqv.:lrw) worker
+ do i = 1, n
+ lrw = lrw .neqv. (array(i) .ge. 5)
+ end do
+ !$acc end parallel
+
+ !$acc parallel vector_length(vl) copy(lrv)
+ !$acc loop reduction(.neqv.:lrv) vector
+ do i = 1, n
+ lrv = lrv .neqv. (array(i) .ge. 5)
+ end do
+ !$acc end parallel
- !$acc parallel vector_length(vl) num_gangs(1) copy(lresult)
- !$acc loop reduction(.neqv.:lresult)
+ !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(lrc)
+ !$acc loop reduction(.neqv.:lrc) gang worker vector
do i = 1, n
- lresult = lresult .neqv. (array(i) .ge. 5)
+ lrc = lrc .neqv. (array(i) .ge. 5)
end do
!$acc end parallel
@@ -221,5 +538,8 @@ program reduction_1
lvresult = lvresult .neqv. (array(i) .ge. 5)
end do
- if (result.ne.vresult) call abort
+ if (lrg .neqv. lvresult) call abort
+ if (lrw .neqv. lvresult) call abort
+ if (lrv .neqv. lvresult) call abort
+ if (lrc .neqv. lvresult) call abort
end program reduction_1
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/reduction-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/reduction-2.f90
index 96955ce..b828feb 100644
--- a/libgomp/testsuite/libgomp.oacc-fortran/reduction-2.f90
+++ b/libgomp/testsuite/libgomp.oacc-fortran/reduction-2.f90
@@ -5,26 +5,52 @@
program reduction_2
implicit none
- integer, parameter :: n = 10, vl = 32
+ integer, parameter :: n = 10, ng = 8, nw = 4, vl = 32
integer :: i
- real, parameter :: e = .001
- real :: vresult, result
- logical :: lresult, lvresult
- real, dimension (n) :: array
+ real :: vresult, rg, rw, rv, rc
+ real, parameter :: e = 0.001
+ logical :: lrg, lrw, lrv, lrc, lvresult
+ real, dimension (n) :: array
do i = 1, n
array(i) = i
end do
- result = 0
+ !
+ ! '+' reductions
+ !
+
+ rg = 0
+ rw = 0
+ rv = 0
+ rc = 0
vresult = 0
- ! '+' reductions
+ !$acc parallel num_gangs(ng) copy(rg)
+ !$acc loop reduction(+:rg) gang
+ do i = 1, n
+ rg = rg + array(i)
+ end do
+ !$acc end parallel
- !$acc parallel vector_length(vl) num_gangs(1) copy(result)
- !$acc loop reduction(+:result)
+ !$acc parallel num_workers(nw) copy(rw)
+ !$acc loop reduction(+:rw) worker
do i = 1, n
- result = result + array(i)
+ rw = rw + array(i)
+ end do
+ !$acc end parallel
+
+ !$acc parallel vector_length(vl) copy(rv)
+ !$acc loop reduction(+:rv) vector
+ do i = 1, n
+ rv = rv + array(i)
+ end do
+ !$acc end parallel
+
+ !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc)
+ !$acc loop reduction(+:rc) gang worker vector
+ do i = 1, n
+ rc = rc + array(i)
end do
!$acc end parallel
@@ -33,17 +59,46 @@ program reduction_2
vresult = vresult + array(i)
end do
- if (abs (result - vresult) .ge. e) call abort
+ if (rg .ne. vresult) call abort
+ if (rw .ne. vresult) call abort
+ if (rv .ne. vresult) call abort
+ if (rc .ne. vresult) call abort
+
+ !
+ ! '*' reductions
+ !
- result = 1
+ rg = 1
+ rw = 1
+ rv = 1
+ rc = 1
vresult = 1
- ! '*' reductions
+ !$acc parallel num_gangs(ng) copy(rg)
+ !$acc loop reduction(*:rg) gang
+ do i = 1, n
+ rg = rg * array(i)
+ end do
+ !$acc end parallel
- !$acc parallel vector_length(vl) num_gangs(1) copy(result)
- !$acc loop reduction(*:result)
+ !$acc parallel num_workers(nw) copy(rw)
+ !$acc loop reduction(*:rw) worker
do i = 1, n
- result = result * array(i)
+ rw = rw * array(i)
+ end do
+ !$acc end parallel
+
+ !$acc parallel vector_length(vl) copy(rv)
+ !$acc loop reduction(*:rv) vector
+ do i = 1, n
+ rv = rv * array(i)
+ end do
+ !$acc end parallel
+
+ !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc)
+ !$acc loop reduction(*:rc) gang worker vector
+ do i = 1, n
+ rc = rc * array(i)
end do
!$acc end parallel
@@ -52,17 +107,46 @@ program reduction_2
vresult = vresult * array(i)
end do
- if (result.ne.vresult) call abort
+ if (abs (rg - vresult) .ge. e) call abort
+ if (abs (rw - vresult) .ge. e) call abort
+ if (abs (rv - vresult) .ge. e) call abort
+ if (abs (rc - vresult) .ge. e) call abort
+
+ !
+ ! 'max' reductions
+ !
- result = 0
+ rg = 0
+ rw = 0
+ rg = 0
+ rc = 0
vresult = 0
- ! 'max' reductions
+ !$acc parallel num_gangs(ng) copy(rg)
+ !$acc loop reduction(max:rg) gang
+ do i = 1, n
+ rg = max (rg, array(i))
+ end do
+ !$acc end parallel
+
+ !$acc parallel num_workers(nw) copy(rw)
+ !$acc loop reduction(max:rw) worker
+ do i = 1, n
+ rw = max (rw, array(i))
+ end do
+ !$acc end parallel
- !$acc parallel vector_length(vl) num_gangs(1) copy(result)
- !$acc loop reduction(max:result)
+ !$acc parallel vector_length(vl) copy(rv)
+ !$acc loop reduction(max:rv) vector
do i = 1, n
- result = max (result, array(i))
+ rv = max (rv, array(i))
+ end do
+ !$acc end parallel
+
+ !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc)
+ !$acc loop reduction(max:rc) gang worker vector
+ do i = 1, n
+ rc = max (rc, array(i))
end do
!$acc end parallel
@@ -71,17 +155,46 @@ program reduction_2
vresult = max (vresult, array(i))
end do
- if (result.ne.vresult) call abort
-
- result = 1
- vresult = 1
+ if (abs (rg - vresult) .ge. e) call abort
+ if (abs (rw - vresult) .ge. e) call abort
+ if (abs (rg - vresult) .ge. e) call abort
+ if (abs (rc - vresult) .ge. e) call abort
+ !
! 'min' reductions
+ !
- !$acc parallel vector_length(vl) num_gangs(1) copy(result)
- !$acc loop reduction(min:result)
+ rg = 0
+ rw = 0
+ rv = 0
+ rc = 0
+ vresult = 0
+
+ !$acc parallel num_gangs(ng) copy(rg)
+ !$acc loop reduction(min:rg) gang
+ do i = 1, n
+ rg = min (rg, array(i))
+ end do
+ !$acc end parallel
+
+ !$acc parallel num_workers(nw) copy(rw)
+ !$acc loop reduction(min:rw) worker
+ do i = 1, n
+ rw = min (rw, array(i))
+ end do
+ !$acc end parallel
+
+ !$acc parallel vector_length(vl) copy(rv)
+ !$acc loop reduction(min:rv) vector
do i = 1, n
- result = min (result, array(i))
+ rv = min (rv, array(i))
+ end do
+ !$acc end parallel
+
+ !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc)
+ !$acc loop reduction(min:rc) gang worker vector
+ do i = 1, n
+ rc = min (rc, array(i))
end do
!$acc end parallel
@@ -90,17 +203,46 @@ program reduction_2
vresult = min (vresult, array(i))
end do
- if (result.ne.vresult) call abort
+ if (rg .ne. vresult) call abort
+ if (rv .ne. vresult) call abort
+ if (rw .ne. vresult) call abort
+ if (rc .ne. vresult) call abort
+
+ !
+ ! '.and.' reductions
+ !
- lresult = .true.
+ lrg = .true.
+ lrw = .true.
+ lrv = .true.
+ lrc = .true.
lvresult = .true.
- ! '.and.' reductions
+ !$acc parallel num_gangs(ng) copy(lrg)
+ !$acc loop reduction(.and.:lrg) gang
+ do i = 1, n
+ lrg = lrg .and. (array(i) .ge. 5)
+ end do
+ !$acc end parallel
- !$acc parallel vector_length(vl) num_gangs(1) copy(lresult)
- !$acc loop reduction(.and.:lresult)
+ !$acc parallel num_workers(nw) copy(lrw)
+ !$acc loop reduction(.and.:lrw) worker
do i = 1, n
- lresult = lresult .and. (array(i) .ge. 5)
+ lrw = lrw .and. (array(i) .ge. 5)
+ end do
+ !$acc end parallel
+
+ !$acc parallel vector_length(vl) copy(lrv)
+ !$acc loop reduction(.and.:lrv) vector
+ do i = 1, n
+ lrv = lrv .and. (array(i) .ge. 5)
+ end do
+ !$acc end parallel
+
+ !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(lrc)
+ !$acc loop reduction(.and.:lrc) gang worker vector
+ do i = 1, n
+ lrc = lrc .and. (array(i) .ge. 5)
end do
!$acc end parallel
@@ -109,17 +251,46 @@ program reduction_2
lvresult = lvresult .and. (array(i) .ge. 5)
end do
- if (result.ne.vresult) call abort
+ if (lrg .neqv. lvresult) call abort
+ if (lrw .neqv. lvresult) call abort
+ if (lrv .neqv. lvresult) call abort
+ if (lrc .neqv. lvresult) call abort
+
+ !
+ ! '.or.' reductions
+ !
- lresult = .false.
+ lrg = .false.
+ lrw = .false.
+ lrv = .false.
+ lrc = .false.
lvresult = .false.
- ! '.or.' reductions
+ !$acc parallel num_gangs(ng) copy(lrg)
+ !$acc loop reduction(.or.:lrg) gang
+ do i = 1, n
+ lrg = lrg .or. (array(i) .ge. 5)
+ end do
+ !$acc end parallel
+
+ !$acc parallel num_workers(nw) copy(lrw)
+ !$acc loop reduction(.or.:lrw) worker
+ do i = 1, n
+ lrw = lrw .or. (array(i) .ge. 5)
+ end do
+ !$acc end parallel
- !$acc parallel vector_length(vl) num_gangs(1) copy(lresult)
- !$acc loop reduction(.or.:lresult)
+ !$acc parallel vector_length(vl) copy(lrv)
+ !$acc loop reduction(.or.:lrv) vector
do i = 1, n
- lresult = lresult .or. (array(i) .ge. 5)
+ lrv = lrv .or. (array(i) .ge. 5)
+ end do
+ !$acc end parallel
+
+ !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(lrc)
+ !$acc loop reduction(.or.:lrc) gang worker vector
+ do i = 1, n
+ lrc = lrc .or. (array(i) .ge. 5)
end do
!$acc end parallel
@@ -128,17 +299,46 @@ program reduction_2
lvresult = lvresult .or. (array(i) .ge. 5)
end do
- if (result.ne.vresult) call abort
-
- lresult = .false.
- lvresult = .false.
+ if (lrg .neqv. lvresult) call abort
+ if (lrw .neqv. lvresult) call abort
+ if (lrv .neqv. lvresult) call abort
+ if (lrc .neqv. lvresult) call abort
+ !
! '.eqv.' reductions
+ !
- !$acc parallel vector_length(vl) num_gangs(1) copy(lresult)
- !$acc loop reduction(.eqv.:lresult)
+ lrg = .true.
+ lrw = .true.
+ lrv = .true.
+ lrc = .true.
+ lvresult = .true.
+
+ !$acc parallel num_gangs(ng) copy(lrg)
+ !$acc loop reduction(.eqv.:lrg) gang
+ do i = 1, n
+ lrg = lrg .eqv. (array(i) .ge. 5)
+ end do
+ !$acc end parallel
+
+ !$acc parallel num_workers(nw) copy(lrw)
+ !$acc loop reduction(.eqv.:lrw) worker
do i = 1, n
- lresult = lresult .eqv. (array(i) .ge. 5)
+ lrw = lrw .eqv. (array(i) .ge. 5)
+ end do
+ !$acc end parallel
+
+ !$acc parallel vector_length(vl) copy(lrv)
+ !$acc loop reduction(.eqv.:lrv) vector
+ do i = 1, n
+ lrv = lrv .eqv. (array(i) .ge. 5)
+ end do
+ !$acc end parallel
+
+ !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(lrc)
+ !$acc loop reduction(.eqv.:lrc) gang worker vector
+ do i = 1, n
+ lrc = lrc .eqv. (array(i) .ge. 5)
end do
!$acc end parallel
@@ -147,17 +347,46 @@ program reduction_2
lvresult = lvresult .eqv. (array(i) .ge. 5)
end do
- if (result.ne.vresult) call abort
-
- lresult = .false.
- lvresult = .false.
+ if (lrg .neqv. lvresult) call abort
+ if (lrw .neqv. lvresult) call abort
+ if (lrv .neqv. lvresult) call abort
+ if (lrc .neqv. lvresult) call abort
+ !
! '.neqv.' reductions
+ !
+
+ lrg = .true.
+ lrw = .true.
+ lrv = .true.
+ lrc = .true.
+ lvresult = .true.
+
+ !$acc parallel num_gangs(ng) copy(lrg)
+ !$acc loop reduction(.neqv.:lrg) gang
+ do i = 1, n
+ lrg = lrg .neqv. (array(i) .ge. 5)
+ end do
+ !$acc end parallel
+
+ !$acc parallel num_workers(nw) copy(lrw)
+ !$acc loop reduction(.neqv.:lrw) worker
+ do i = 1, n
+ lrw = lrw .neqv. (array(i) .ge. 5)
+ end do
+ !$acc end parallel
+
+ !$acc parallel vector_length(vl) copy(lrv)
+ !$acc loop reduction(.neqv.:lrv) vector
+ do i = 1, n
+ lrv = lrv .neqv. (array(i) .ge. 5)
+ end do
+ !$acc end parallel
- !$acc parallel vector_length(vl) num_gangs(1) copy(lresult)
- !$acc loop reduction(.neqv.:lresult)
+ !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(lrc)
+ !$acc loop reduction(.neqv.:lrc) gang worker vector
do i = 1, n
- lresult = lresult .neqv. (array(i) .ge. 5)
+ lrc = lrc .neqv. (array(i) .ge. 5)
end do
!$acc end parallel
@@ -166,5 +395,8 @@ program reduction_2
lvresult = lvresult .neqv. (array(i) .ge. 5)
end do
- if (result.ne.vresult) call abort
+ if (lrg .neqv. lvresult) call abort
+ if (lrw .neqv. lvresult) call abort
+ if (lrv .neqv. lvresult) call abort
+ if (lrc .neqv. lvresult) call abort
end program reduction_2
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/reduction-3.f90 b/libgomp/testsuite/libgomp.oacc-fortran/reduction-3.f90
index ecf7fbe..3d8d753 100644
--- a/libgomp/testsuite/libgomp.oacc-fortran/reduction-3.f90
+++ b/libgomp/testsuite/libgomp.oacc-fortran/reduction-3.f90
@@ -5,26 +5,52 @@
program reduction_3
implicit none
- integer, parameter :: n = 10, vl = 32
+ integer, parameter :: n = 10, ng = 8, nw = 4, vl = 32
integer :: i
- double precision, parameter :: e = .001
- double precision :: vresult, result
- logical :: lresult, lvresult
+ double precision :: vresult, rg, rw, rv, rc
+ double precision, parameter :: e = 0.001
+ logical :: lrg, lrw, lrv, lrc, lvresult
double precision, dimension (n) :: array
do i = 1, n
array(i) = i
end do
- result = 0
+ !
+ ! '+' reductions
+ !
+
+ rg = 0
+ rw = 0
+ rv = 0
+ rc = 0
vresult = 0
- ! '+' reductions
+ !$acc parallel num_gangs(ng) copy(rg)
+ !$acc loop reduction(+:rg) gang
+ do i = 1, n
+ rg = rg + array(i)
+ end do
+ !$acc end parallel
- !$acc parallel vector_length(vl) num_gangs(1) copy(result)
- !$acc loop reduction(+:result)
+ !$acc parallel num_workers(nw) copy(rw)
+ !$acc loop reduction(+:rw) worker
do i = 1, n
- result = result + array(i)
+ rw = rw + array(i)
+ end do
+ !$acc end parallel
+
+ !$acc parallel vector_length(vl) copy(rv)
+ !$acc loop reduction(+:rv) vector
+ do i = 1, n
+ rv = rv + array(i)
+ end do
+ !$acc end parallel
+
+ !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc)
+ !$acc loop reduction(+:rc) gang worker vector
+ do i = 1, n
+ rc = rc + array(i)
end do
!$acc end parallel
@@ -33,17 +59,46 @@ program reduction_3
vresult = vresult + array(i)
end do
- if (abs (result - vresult) .ge. e) call abort
+ if (abs (rg - vresult) .ge. e) call abort
+ if (abs (rw - vresult) .ge. e) call abort
+ if (abs (rv - vresult) .ge. e) call abort
+ if (abs (rc - vresult) .ge. e) call abort
+
+ !
+ ! '*' reductions
+ !
- result = 1
+ rg = 1
+ rw = 1
+ rv = 1
+ rc = 1
vresult = 1
- ! '*' reductions
+ !$acc parallel num_gangs(ng) copy(rg)
+ !$acc loop reduction(*:rg) gang
+ do i = 1, n
+ rg = rg * array(i)
+ end do
+ !$acc end parallel
- !$acc parallel vector_length(vl) num_gangs(1) copy(result)
- !$acc loop reduction(*:result)
+ !$acc parallel num_workers(nw) copy(rw)
+ !$acc loop reduction(*:rw) worker
do i = 1, n
- result = result * array(i)
+ rw = rw * array(i)
+ end do
+ !$acc end parallel
+
+ !$acc parallel vector_length(vl) copy(rv)
+ !$acc loop reduction(*:rv) vector
+ do i = 1, n
+ rv = rv * array(i)
+ end do
+ !$acc end parallel
+
+ !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc)
+ !$acc loop reduction(*:rc) gang worker vector
+ do i = 1, n
+ rc = rc * array(i)
end do
!$acc end parallel
@@ -52,17 +107,46 @@ program reduction_3
vresult = vresult * array(i)
end do
- if (result.ne.vresult) call abort
+ if (abs (rg - vresult) .ge. e) call abort
+ if (abs (rw - vresult) .ge. e) call abort
+ if (abs (rv - vresult) .ge. e) call abort
+ if (abs (rc - vresult) .ge. e) call abort
+
+ !
+ ! 'max' reductions
+ !
- result = 0
+ rg = 0
+ rw = 0
+ rv = 0
+ rc = 0
vresult = 0
- ! 'max' reductions
+ !$acc parallel num_gangs(ng) copy(rg)
+ !$acc loop reduction(max:rg) gang
+ do i = 1, n
+ rg = max (rg, array(i))
+ end do
+ !$acc end parallel
+
+ !$acc parallel num_workers(nw) copy(rw)
+ !$acc loop reduction(max:rw) worker
+ do i = 1, n
+ rw = max (rw, array(i))
+ end do
+ !$acc end parallel
- !$acc parallel vector_length(vl) num_gangs(1) copy(result)
- !$acc loop reduction(max:result)
+ !$acc parallel vector_length(vl) copy(rv)
+ !$acc loop reduction(max:rv) vector
do i = 1, n
- result = max (result, array(i))
+ rv = max (rv, array(i))
+ end do
+ !$acc end parallel
+
+ !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc)
+ !$acc loop reduction(max:rc) gang worker vector
+ do i = 1, n
+ rc = max (rc, array(i))
end do
!$acc end parallel
@@ -71,17 +155,46 @@ program reduction_3
vresult = max (vresult, array(i))
end do
- if (result.ne.vresult) call abort
-
- result = 1
- vresult = 1
+ if (abs (rg - vresult) .ge. e) call abort
+ if (abs (rw - vresult) .ge. e) call abort
+ if (abs (rv - vresult) .ge. e) call abort
+ if (abs (rc - vresult) .ge. e) call abort
+ !
! 'min' reductions
+ !
- !$acc parallel vector_length(vl) num_gangs(1) copy(result)
- !$acc loop reduction(min:result)
+ rg = 0
+ rw = 0
+ rv = 0
+ rc = 0
+ vresult = 0
+
+ !$acc parallel num_gangs(ng) copy(rg)
+ !$acc loop reduction(min:rg) gang
+ do i = 1, n
+ rg = min (rg, array(i))
+ end do
+ !$acc end parallel
+
+ !$acc parallel num_workers(nw) copy(rw)
+ !$acc loop reduction(min:rw) worker
+ do i = 1, n
+ rw = min (rw, array(i))
+ end do
+ !$acc end parallel
+
+ !$acc parallel vector_length(vl) copy(rv)
+ !$acc loop reduction(min:rv) vector
do i = 1, n
- result = min (result, array(i))
+ rv = min (rv, array(i))
+ end do
+ !$acc end parallel
+
+ !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc)
+ !$acc loop reduction(min:rc) gang worker vector
+ do i = 1, n
+ rc = min (rc, array(i))
end do
!$acc end parallel
@@ -90,17 +203,46 @@ program reduction_3
vresult = min (vresult, array(i))
end do
- if (result.ne.vresult) call abort
+ if (rg .ne. vresult) call abort
+ if (rw .ne. vresult) call abort
+ if (rv .ne. vresult) call abort
+ if (rc .ne. vresult) call abort
+
+ !
+ ! '.and.' reductions
+ !
- lresult = .true.
+ lrg = .true.
+ lrw = .true.
+ lrv = .true.
+ lrc = .true.
lvresult = .true.
- ! '.and.' reductions
+ !$acc parallel num_gangs(ng) copy(lrg)
+ !$acc loop reduction(.and.:lrg) gang
+ do i = 1, n
+ lrg = lrg .and. (array(i) .ge. 5)
+ end do
+ !$acc end parallel
- !$acc parallel vector_length(vl) num_gangs(1) copy(lresult)
- !$acc loop reduction(.and.:lresult)
+ !$acc parallel num_workers(nw) copy(lrw)
+ !$acc loop reduction(.and.:lrw) worker
do i = 1, n
- lresult = lresult .and. (array(i) .ge. 5)
+ lrw = lrw .and. (array(i) .ge. 5)
+ end do
+ !$acc end parallel
+
+ !$acc parallel vector_length(vl) copy(lrv)
+ !$acc loop reduction(.and.:lrv) vector
+ do i = 1, n
+ lrv = lrv .and. (array(i) .ge. 5)
+ end do
+ !$acc end parallel
+
+ !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(lrc)
+ !$acc loop reduction(.and.:lrc) gang worker vector
+ do i = 1, n
+ lrc = lrc .and. (array(i) .ge. 5)
end do
!$acc end parallel
@@ -109,17 +251,46 @@ program reduction_3
lvresult = lvresult .and. (array(i) .ge. 5)
end do
- if (result.ne.vresult) call abort
+ if (lrg .neqv. lvresult) call abort
+ if (lrw .neqv. lvresult) call abort
+ if (lrv .neqv. lvresult) call abort
+ if (lrc .neqv. lvresult) call abort
+
+ !
+ ! '.or.' reductions
+ !
- lresult = .false.
+ lrg = .false.
+ lrw = .false.
+ lrv = .false.
+ lrc = .false.
lvresult = .false.
- ! '.or.' reductions
+ !$acc parallel num_gangs(ng) copy(lrg)
+ !$acc loop reduction(.or.:lrg) gang
+ do i = 1, n
+ lrg = lrg .or. (array(i) .ge. 5)
+ end do
+ !$acc end parallel
+
+ !$acc parallel num_workers(nw) copy(lrw)
+ !$acc loop reduction(.or.:lrw) worker
+ do i = 1, n
+ lrw = lrw .or. (array(i) .ge. 5)
+ end do
+ !$acc end parallel
- !$acc parallel vector_length(vl) num_gangs(1) copy(lresult)
- !$acc loop reduction(.or.:lresult)
+ !$acc parallel vector_length(vl) copy(lrv)
+ !$acc loop reduction(.or.:lrv) vector
do i = 1, n
- lresult = lresult .or. (array(i) .ge. 5)
+ lrv = lrv .or. (array(i) .ge. 5)
+ end do
+ !$acc end parallel
+
+ !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(lrc)
+ !$acc loop reduction(.or.:lrc) gang worker vector
+ do i = 1, n
+ lrc = lrc .or. (array(i) .ge. 5)
end do
!$acc end parallel
@@ -128,17 +299,46 @@ program reduction_3
lvresult = lvresult .or. (array(i) .ge. 5)
end do
- if (result.ne.vresult) call abort
-
- lresult = .false.
- lvresult = .false.
+ if (lrg .neqv. lvresult) call abort
+ if (lrw .neqv. lvresult) call abort
+ if (lrv .neqv. lvresult) call abort
+ if (lrc .neqv. lvresult) call abort
+ !
! '.eqv.' reductions
+ !
- !$acc parallel vector_length(vl) num_gangs(1) copy(lresult)
- !$acc loop reduction(.eqv.:lresult)
+ lrg = .true.
+ lrw = .true.
+ lrv = .true.
+ lrc = .true.
+ lvresult = .true.
+
+ !$acc parallel num_gangs(ng) copy(lrg)
+ !$acc loop reduction(.eqv.:lrg) gang
+ do i = 1, n
+ lrg = lrg .eqv. (array(i) .ge. 5)
+ end do
+ !$acc end parallel
+
+ !$acc parallel num_workers(nw) copy(lrw)
+ !$acc loop reduction(.eqv.:lrw) worker
do i = 1, n
- lresult = lresult .eqv. (array(i) .ge. 5)
+ lrw = lrw .eqv. (array(i) .ge. 5)
+ end do
+ !$acc end parallel
+
+ !$acc parallel vector_length(vl) copy(lrv)
+ !$acc loop reduction(.eqv.:lrv) vector
+ do i = 1, n
+ lrv = lrv .eqv. (array(i) .ge. 5)
+ end do
+ !$acc end parallel
+
+ !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(lrc)
+ !$acc loop reduction(.eqv.:lrc) gang worker vector
+ do i = 1, n
+ lrc = lrc .eqv. (array(i) .ge. 5)
end do
!$acc end parallel
@@ -147,17 +347,46 @@ program reduction_3
lvresult = lvresult .eqv. (array(i) .ge. 5)
end do
- if (result.ne.vresult) call abort
-
- lresult = .false.
- lvresult = .false.
+ if (lrg .neqv. lvresult) call abort
+ if (lrw .neqv. lvresult) call abort
+ if (lrv .neqv. lvresult) call abort
+ if (lrc .neqv. lvresult) call abort
+ !
! '.neqv.' reductions
+ !
+
+ lrg = .true.
+ lrw = .true.
+ lrv = .true.
+ lrc = .true.
+ lvresult = .true.
+
+ !$acc parallel num_gangs(ng) copy(lrg)
+ !$acc loop reduction(.neqv.:lrg) gang
+ do i = 1, n
+ lrg = lrg .neqv. (array(i) .ge. 5)
+ end do
+ !$acc end parallel
+
+ !$acc parallel num_workers(nw) copy(lrw)
+ !$acc loop reduction(.neqv.:lrw) worker
+ do i = 1, n
+ lrw = lrw .neqv. (array(i) .ge. 5)
+ end do
+ !$acc end parallel
+
+ !$acc parallel vector_length(vl) copy(lrv)
+ !$acc loop reduction(.neqv.:lrv) vector
+ do i = 1, n
+ lrv = lrv .neqv. (array(i) .ge. 5)
+ end do
+ !$acc end parallel
- !$acc parallel vector_length(vl) num_gangs(1) copy(lresult)
- !$acc loop reduction(.neqv.:lresult)
+ !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(lrc)
+ !$acc loop reduction(.neqv.:lrc) gang worker vector
do i = 1, n
- lresult = lresult .neqv. (array(i) .ge. 5)
+ lrc = lrc .neqv. (array(i) .ge. 5)
end do
!$acc end parallel
@@ -166,5 +395,8 @@ program reduction_3
lvresult = lvresult .neqv. (array(i) .ge. 5)
end do
- if (result.ne.vresult) call abort
+ if (lrg .neqv. lvresult) call abort
+ if (lrw .neqv. lvresult) call abort
+ if (lrv .neqv. lvresult) call abort
+ if (lrc .neqv. lvresult) call abort
end program reduction_3
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/reduction-4.f90 b/libgomp/testsuite/libgomp.oacc-fortran/reduction-4.f90
index 8c99fdb..c3bdaf6 100644
--- a/libgomp/testsuite/libgomp.oacc-fortran/reduction-4.f90
+++ b/libgomp/testsuite/libgomp.oacc-fortran/reduction-4.f90
@@ -5,50 +5,108 @@
program reduction_4
implicit none
- integer, parameter :: n = 10, vl = 32
+ integer, parameter :: n = 10, ng = 8, nw = 4, vl = 32
integer :: i
- complex :: vresult, result
+ real :: vresult, rg, rw, rv, rc
complex, dimension (n) :: array
do i = 1, n
array(i) = i
end do
- result = 0
+ !
+ ! '+' reductions
+ !
+
+ rg = 0
+ rw = 0
+ rv = 0
+ rc = 0
vresult = 0
- ! '+' reductions
+ !$acc parallel num_gangs(ng) copy(rg)
+ !$acc loop reduction(+:rg) gang
+ do i = 1, n
+ rg = rg + REAL(array(i))
+ end do
+ !$acc end parallel
+
+ !$acc parallel num_workers(nw) copy(rw)
+ !$acc loop reduction(+:rw) worker
+ do i = 1, n
+ rw = rw + REAL(array(i))
+ end do
+ !$acc end parallel
+
+ !$acc parallel vector_length(vl) copy(rv)
+ !$acc loop reduction(+:rv) vector
+ do i = 1, n
+ rv = rv + REAL(array(i))
+ end do
+ !$acc end parallel
- !$acc parallel vector_length(vl) num_gangs(1) copy(result)
- !$acc loop reduction(+:result)
+ !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc)
+ !$acc loop reduction(+:rc) gang worker vector
do i = 1, n
- result = result + array(i)
+ rc = rc + REAL(array(i))
end do
!$acc end parallel
! Verify the results
do i = 1, n
- vresult = vresult + array(i)
+ vresult = vresult + REAL(array(i))
end do
- if (result .ne. vresult) call abort
+ if (rg .ne. vresult) call abort
+ if (rw .ne. vresult) call abort
+ if (rv .ne. vresult) call abort
+ if (rc .ne. vresult) call abort
- result = 1
+ !
+ ! '*' reductions
+ !
+
+ rg = 1
+ rw = 1
+ rv = 1
+ rc = 1
vresult = 1
-! ! '*' reductions
-!
-! !$acc parallel vector_length(vl)
-! !$acc loop reduction(*:result)
-! do i = 1, n
-! result = result * array(i)
-! end do
-! !$acc end parallel
-!
-! ! Verify the results
-! do i = 1, n
-! vresult = vresult * array(i)
-! end do
-!
-! if (result.ne.vresult) call abort
+ !$acc parallel num_gangs(ng) copy(rg)
+ !$acc loop reduction(*:rg) gang
+ do i = 1, n
+ rg = rg * REAL(array(i))
+ end do
+ !$acc end parallel
+
+ !$acc parallel num_workers(nw) copy(rw)
+ !$acc loop reduction(*:rw) worker
+ do i = 1, n
+ rw = rw * REAL(array(i))
+ end do
+ !$acc end parallel
+
+ !$acc parallel vector_length(vl) copy(rv)
+ !$acc loop reduction(*:rv) vector
+ do i = 1, n
+ rv = rv * REAL(array(i))
+ end do
+ !$acc end parallel
+
+ !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc)
+ !$acc loop reduction(*:rc) gang worker vector
+ do i = 1, n
+ rc = rc * REAL(array(i))
+ end do
+ !$acc end parallel
+
+ ! Verify the results
+ do i = 1, n
+ vresult = vresult * REAL(array(i))
+ end do
+
+ if (rg .ne. vresult) call abort
+ if (rw .ne. vresult) call abort
+ if (rv .ne. vresult) call abort
+ if (rc .ne. vresult) call abort
end program reduction_4
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/reduction-5.f90 b/libgomp/testsuite/libgomp.oacc-fortran/reduction-5.f90
index ec13e4e..4210648 100644
--- a/libgomp/testsuite/libgomp.oacc-fortran/reduction-5.f90
+++ b/libgomp/testsuite/libgomp.oacc-fortran/reduction-5.f90
@@ -1,12 +1,17 @@
! { dg-do run }
+! { dg-additional-options "-w" }
! subroutine reduction
program reduction
integer, parameter :: n = 40, c = 10
- integer :: i, vsum, sum
+ integer :: i, vsum, gs, ws, vs, cs, ns
- call redsub (sum, n, c)
+ call redsub_gang (gs, n, c)
+ call redsub_worker (ws, n, c)
+ call redsub_vector (vs, n, c)
+ call redsub_combined (cs, n, c)
+ call redsub_nested (ns, n, c)
vsum = 0
@@ -15,21 +20,80 @@ program reduction
vsum = vsum + c
end do
- if (sum.ne.vsum) call abort ()
+ if (gs .ne. vsum) call abort ()
+ if (ws .ne. vsum) call abort ()
+ if (vs .ne. vsum) call abort ()
+ if (cs .ne. vsum) call abort ()
+ if (ns .ne. vsum) call abort ()
end program reduction
-subroutine redsub(sum, n, c)
+subroutine redsub_gang(sum, n, c)
integer :: sum, n, c
- integer :: s
- s = 0
+ sum = 0
- !$acc parallel vector_length(32) copyin (n, c) copy (s) num_gangs(1)
- !$acc loop reduction(+:s)
+ !$acc parallel copyin (n, c) num_gangs(n) copy(sum)
+ !$acc loop reduction(+:sum) gang
do i = 1, n
- s = s + c
+ sum = sum + c
end do
!$acc end parallel
+end subroutine redsub_gang
- sum = s
-end subroutine redsub
+subroutine redsub_worker(sum, n, c)
+ integer :: sum, n, c
+
+ sum = 0
+
+ !$acc parallel copyin (n, c) num_workers(4) vector_length (32) copy(sum)
+ !$acc loop reduction(+:sum) worker
+ do i = 1, n
+ sum = sum + c
+ end do
+ !$acc end parallel
+end subroutine redsub_worker
+
+subroutine redsub_vector(sum, n, c)
+ integer :: sum, n, c
+
+ sum = 0
+
+ !$acc parallel copyin (n, c) vector_length(32) copy(sum)
+ !$acc loop reduction(+:sum) vector
+ do i = 1, n
+ sum = sum + c
+ end do
+ !$acc end parallel
+end subroutine redsub_vector
+
+subroutine redsub_combined(sum, n, c)
+ integer :: sum, n, c
+
+ sum = 0
+
+ !$acc parallel num_gangs (8) num_workers (4) vector_length(32) copy(sum)
+ !$acc loop reduction(+:sum) gang worker vector
+ do i = 1, n
+ sum = sum + c
+ end do
+ !$acc end parallel
+end subroutine redsub_combined
+
+subroutine redsub_nested(sum, n, c)
+ integer :: sum, n, c
+ integer :: ii, jj
+
+ ii = n / 10;
+ jj = 10;
+ sum = 0
+
+ !$acc parallel num_gangs (8) copy(sum)
+ !$acc loop reduction(+:sum) gang
+ do i = 1, ii
+ !$acc loop reduction(+:sum) vector
+ do j = 1, jj
+ sum = sum + c
+ end do
+ end do
+ !$acc end parallel
+end subroutine redsub_nested
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/reduction-6.f90 b/libgomp/testsuite/libgomp.oacc-fortran/reduction-6.f90
index 2ff6f5f..f3ed275 100644
--- a/libgomp/testsuite/libgomp.oacc-fortran/reduction-6.f90
+++ b/libgomp/testsuite/libgomp.oacc-fortran/reduction-6.f90
@@ -1,30 +1,94 @@
! { dg-do run }
+! { dg-additional-options "-cpp -w" }
program reduction
implicit none
- integer, parameter :: n = 100
- integer :: i, s1, s2, vs1, vs2
+ integer, parameter :: n = 100, n2 = 1000, chunksize = 10
+ integer :: i, gs1, gs2, ws1, ws2, vs1, vs2, cs1, cs2, hs1, hs2
+ integer :: j, red, vred
- s1 = 0
- s2 = 0
+ gs1 = 0
+ gs2 = 0
+ ws1 = 0
+ ws2 = 0
vs1 = 0
vs2 = 0
+ cs1 = 0
+ cs2 = 0
+ hs1 = 0
+ hs2 = 0
- !$acc parallel vector_length (32) copy(s1, s2)
- !$acc loop reduction(+:s1, s2)
+ !$acc parallel num_gangs (1000) copy(gs1, gs2)
+ !$acc loop reduction(+:gs1, gs2) gang
do i = 1, n
- s1 = s1 + 1
- s2 = s2 + 2
+ gs1 = gs1 + 1
+ gs2 = gs2 + 2
end do
!$acc end parallel
- ! Verify the results
+ !$acc parallel num_workers (4) vector_length (32) copy(ws1, ws2)
+ !$acc loop reduction(+:ws1, ws2) worker
+ do i = 1, n
+ ws1 = ws1 + 1
+ ws2 = ws2 + 2
+ end do
+ !$acc end parallel
+
+ !$acc parallel vector_length (32) copy(vs1, vs2)
+ !$acc loop reduction(+:vs1, vs2) vector
do i = 1, n
vs1 = vs1 + 1
vs2 = vs2 + 2
end do
+ !$acc end parallel
+
+ !$acc parallel num_gangs(8) num_workers(4) vector_length(32) copy(cs1, cs2)
+ !$acc loop reduction(+:cs1, cs2) gang worker vector
+ do i = 1, n
+ cs1 = cs1 + 1
+ cs2 = cs2 + 2
+ end do
+ !$acc end parallel
+
+ ! Verify the results on the host
+ do i = 1, n
+ hs1 = hs1 + 1
+ hs2 = hs2 + 2
+ end do
+
+ if (gs1 .ne. hs1) call abort ()
+ if (gs2 .ne. hs2) call abort ()
+
+ if (ws1 .ne. hs1) call abort ()
+ if (ws2 .ne. hs2) call abort ()
+
+ if (vs1 .ne. hs1) call abort ()
+ if (vs2 .ne. hs2) call abort ()
+
+ if (cs1 .ne. hs1) call abort ()
+ if (cs2 .ne. hs2) call abort ()
+
+ ! Nested reductions.
+
+ red = 0
+ vred = 0
+
+ !$acc parallel num_gangs(10) vector_length(32) copy(red)
+ !$acc loop reduction(+:red) gang
+ do i = 1, n/chunksize
+ !$acc loop reduction(+:red) vector
+ do j = 1, chunksize
+ red = red + chunksize
+ end do
+ end do
+ !$acc end parallel
+
+ do i = 1, n/chunksize
+ do j = 1, chunksize
+ vred = vred + chunksize
+ end do
+ end do
- if (s1.ne.vs1) call abort ()
- if (s2.ne.vs2) call abort ()
+ if (red .ne. vred) call abort ()
end program reduction
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/reduction-7.f90 b/libgomp/testsuite/libgomp.oacc-fortran/reduction-7.f90
new file mode 100644
index 0000000..8ec36ad
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/reduction-7.f90
@@ -0,0 +1,88 @@
+! { dg-do run }
+! { dg-additional-options "-w" }
+
+! subroutine reduction with private and firstprivate variables
+
+program reduction
+ integer, parameter :: n = 100
+ integer :: i, j, vsum, cs, arr(n)
+
+ call redsub_private (cs, n, arr)
+ call redsub_bogus (cs, n)
+ call redsub_combined (cs, n, arr)
+
+ vsum = 0
+
+ ! Verify the results
+ do i = 1, n
+ vsum = i
+ do j = 1, n
+ vsum = vsum + 1;
+ end do
+ if (vsum .ne. arr(i)) call abort ()
+ end do
+end program reduction
+
+! This subroutine tests a reduction with an explicit private variable.
+
+subroutine redsub_private(sum, n, arr)
+ integer :: sum, n, arr(n)
+ integer :: i, j, v
+
+ !$acc parallel copyout (arr)
+ !$acc loop gang private (v)
+ do j = 1, n
+ v = j
+
+ !$acc loop vector reduction (+:v)
+ do i = 1, 100
+ v = v + 1
+ end do
+
+ arr(j) = v
+ end do
+ !$acc end parallel
+
+ ! verify the results
+ do i = 1, 10
+ if (arr(i) .ne. 100+i) call abort ()
+ end do
+end subroutine redsub_private
+
+
+! Bogus reduction on an impliclitly firstprivate variable. The results do
+! survive the parallel region. The goal here is to ensure that gfortran
+! doesn't ICE.
+
+subroutine redsub_bogus(sum, n)
+ integer :: sum, n, arr(n)
+ integer :: i
+
+ !$acc parallel
+ !$acc loop gang worker vector reduction (+:sum)
+ do i = 1, n
+ sum = sum + 1
+ end do
+ !$acc end parallel
+end subroutine redsub_bogus
+
+! This reduction involving a firstprivate variable yields legitimate results.
+
+subroutine redsub_combined(sum, n, arr)
+ integer :: sum, n, arr(n)
+ integer :: i, j
+
+ !$acc parallel copy (arr)
+ !$acc loop gang
+ do i = 1, n
+ sum = i;
+
+ !$acc loop reduction(+:sum)
+ do j = 1, n
+ sum = sum + 1
+ end do
+
+ arr(i) = sum
+ end do
+ !$acc end parallel
+end subroutine redsub_combined