diff options
author | Cesar Philippidis <cesar@codesourcery.com> | 2016-04-08 14:09:47 -0700 |
---|---|---|
committer | Cesar Philippidis <cesar@gcc.gnu.org> | 2016-04-08 14:09:47 -0700 |
commit | c42cfb5ca3b02756705485e013fa9107aaf28acd (patch) | |
tree | 354e1cd54c889228c92a0a83560a5bf53a7aab00 /libgomp/testsuite | |
parent | 51a4b0f18711b23f2f696a4f546ccce5b1653cf5 (diff) | |
download | gcc-c42cfb5ca3b02756705485e013fa9107aaf28acd.zip gcc-c42cfb5ca3b02756705485e013fa9107aaf28acd.tar.gz gcc-c42cfb5ca3b02756705485e013fa9107aaf28acd.tar.bz2 |
re PR lto/70289 ([openacc] ICE in input_varpool_node)
gcc/
PR lto/70289
PR ipa/70348
PR tree-optimization/70373
PR middle-end/70533
PR middle-end/70534
PR middle-end/70535
* gimplify.c (gimplify_adjust_omp_clauses): Add or adjust data
clauses for acc parallel reductions as necessary. Error on those
that are private.
* omp-low.c (scan_sharing_clauses): Don't install variables which
are used in acc parallel reductions.
(lower_rec_input_clauses): Remove dead code.
(lower_oacc_reductions): Add support for reference reductions.
(lower_reduction_clauses): Remove dead code.
(lower_omp_target): Don't remap variables appearing in acc parallel
reductions.
* tree.h (OMP_CLAUSE_MAP_IN_REDUCTION): New macro.
gcc/testsuite/
* c-c++-common/goacc/reduction-5.c: New test.
* c-c++-common/goacc/reduction-promotions.c: New test.
* gfortran.dg/goacc/reduction-3.f95: New test.
* gfortran.dg/goacc/reduction-promotions.f90: New test.
libgomp/
* testsuite/libgomp.oacc-c-c++-common/loop-reduction-gang-np-1.c: New
test.
* testsuite/libgomp.oacc-c-c++-common/loop-reduction-gw-np-1.c: New
test.
* testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-1.c: New
test.
* testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-2.c: New
test.
* testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-3.c: New
test.
* testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-4.c: New
test.
* testsuite/libgomp.oacc-c-c++-common/loop-reduction-vector-p-1.c: New
test.
* testsuite/libgomp.oacc-c-c++-common/loop-reduction-vector-p-2.c: New
test.
* testsuite/libgomp.oacc-c-c++-common/loop-reduction-worker-p-1.c: New
test.
* testsuite/libgomp.oacc-c-c++-common/loop-reduction-wv-p-1.c: New test.
* testsuite/libgomp.oacc-c-c++-common/loop-reduction-wv-p-2.c: New test.
* testsuite/libgomp.oacc-c-c++-common/loop-reduction-wv-p-3.c: New test.
* testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-1.c: New
test.
* testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-2.c: New
test.
* testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-3.c: New
test.
* testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-4.c: New
test.
* testsuite/libgomp.oacc-c-c++-common/par-reduction-1.c: Add test
coverage.
* testsuite/libgomp.oacc-c-c++-common/par-reduction-2.c: Likewise.
* testsuite/libgomp.oacc-c-c++-common/parallel-dims.c: Likewise.
* testsuite/libgomp.oacc-c-c++-common/parallel-reduction.c: New test.
* testsuite/libgomp.oacc-c-c++-common/pr70289.c: New test.
* testsuite/libgomp.oacc-c-c++-common/pr70373.c: New test.
* testsuite/libgomp.oacc-c-c++-common/reduction-1.c: Add test
coverage.
* testsuite/libgomp.oacc-c-c++-common/reduction-2.c: Likewise.
* testsuite/libgomp.oacc-c-c++-common/reduction-3.c: Likewise.
* testsuite/libgomp.oacc-c-c++-common/reduction-4.c: Likewise.
* testsuite/libgomp.oacc-c-c++-common/reduction-5.c: Likewise.
* testsuite/libgomp.oacc-c-c++-common/reduction-6.c: New test.
* testsuite/libgomp.oacc-c-c++-common/reduction.h: New test.
* testsuite/libgomp.oacc-fortran/parallel-reduction.f90: New test.
* testsuite/libgomp.oacc-fortran/pr70289.f90: New test.
* testsuite/libgomp.oacc-fortran/reduction-1.f90: Add test coverage.
* testsuite/libgomp.oacc-fortran/reduction-2.f90: Likewise.
* testsuite/libgomp.oacc-fortran/reduction-3.f90: Likewise.
* testsuite/libgomp.oacc-fortran/reduction-4.f90: Likewise.
* testsuite/libgomp.oacc-fortran/reduction-5.f90: Likewise.
* testsuite/libgomp.oacc-fortran/reduction-6.f90: Likewise.
* testsuite/libgomp.oacc-fortran/reduction-7.f90: New test.
From-SVN: r234840
Diffstat (limited to 'libgomp/testsuite')
39 files changed, 2559 insertions, 530 deletions
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gang-np-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gang-np-1.c new file mode 100644 index 0000000..55ab3c9 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gang-np-1.c @@ -0,0 +1,45 @@ +/* { dg-additional-options "-w" } */ + +#include <assert.h> + +/* Test of reduction on loop directive (gangs, non-private reduction + variable). */ + +int +main (int argc, char *argv[]) +{ + int i, arr[1024], res = 0, hres = 0; + + for (i = 0; i < 1024; i++) + arr[i] = i; + + #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \ + copy(res) + { + #pragma acc loop gang reduction(+:res) + for (i = 0; i < 1024; i++) + res += arr[i]; + } + + for (i = 0; i < 1024; i++) + hres += arr[i]; + + assert (res == hres); + + res = hres = 1; + + #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \ + copy(res) + { + #pragma acc loop gang reduction(*:res) + for (i = 0; i < 12; i++) + res *= arr[i]; + } + + for (i = 0; i < 12; i++) + hres *= arr[i]; + + assert (res == hres); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gv-np-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gv-np-1.c new file mode 100644 index 0000000..d4341e9 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gv-np-1.c @@ -0,0 +1,30 @@ +/* { dg-additional-options "-w" } */ + +#include <assert.h> + +/* Test of reduction on loop directive (gangs and vectors, non-private + reduction variable). */ + +int +main (int argc, char *argv[]) +{ + int i, arr[1024], res = 0, hres = 0; + + for (i = 0; i < 1024; i++) + arr[i] = i; + + #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \ + copy(res) + { + #pragma acc loop gang vector reduction(+:res) + for (i = 0; i < 1024; i++) + res += arr[i]; + } + + for (i = 0; i < 1024; i++) + hres += arr[i]; + + assert (res == hres); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gw-np-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gw-np-1.c new file mode 100644 index 0000000..2e5668b --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gw-np-1.c @@ -0,0 +1,30 @@ +/* { dg-additional-options "-w" } */ + +#include <assert.h> + +/* Test of reduction on loop directive (gangs and workers, non-private + reduction variable). */ + +int +main (int argc, char *argv[]) +{ + int i, arr[1024], res = 0, hres = 0; + + for (i = 0; i < 1024; i++) + arr[i] = i; + + #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \ + copy(res) + { + #pragma acc loop gang worker reduction(+:res) + for (i = 0; i < 1024; i++) + res += arr[i]; + } + + for (i = 0; i < 1024; i++) + hres += arr[i]; + + assert (res == hres); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-1.c new file mode 100644 index 0000000..d610373 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-1.c @@ -0,0 +1,28 @@ +#include <assert.h> + +/* Test of reduction on loop directive (gangs, workers and vectors, non-private + reduction variable). */ + +int +main (int argc, char *argv[]) +{ + int i, arr[1024], res = 0, hres = 0; + + for (i = 0; i < 1024; i++) + arr[i] = i; + + #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \ + copy(res) + { + #pragma acc loop gang worker vector reduction(+:res) + for (i = 0; i < 1024; i++) + res += arr[i]; + } + + for (i = 0; i < 1024; i++) + hres += arr[i]; + + assert (res == hres); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-2.c new file mode 100644 index 0000000..ea5c151 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-2.c @@ -0,0 +1,34 @@ +#include <assert.h> + +/* Test of reduction on loop directive (gangs, workers and vectors, non-private + reduction variable: separate gang and worker/vector loops). */ + +int +main (int argc, char *argv[]) +{ + int i, j, arr[32768], res = 0, hres = 0; + + for (i = 0; i < 32768; i++) + arr[i] = i; + + #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \ + copy(res) + { + #pragma acc loop gang reduction(+:res) + for (j = 0; j < 32; j++) + { + #pragma acc loop worker vector reduction(+:res) + for (i = 0; i < 1024; i++) + res += arr[j * 1024 + i]; + } + /* "res" is non-private, and is not available until after the parallel + region. */ + } + + for (i = 0; i < 32768; i++) + hres += arr[i]; + + assert (res == hres); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-3.c new file mode 100644 index 0000000..0056f3c --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-3.c @@ -0,0 +1,33 @@ +#include <assert.h> + +/* Test of reduction on loop directive (gangs, workers and vectors, non-private + reduction variable: separate gang and worker/vector loops). */ + +int +main (int argc, char *argv[]) +{ + int i, j; + double arr[32768], res = 0, hres = 0; + + for (i = 0; i < 32768; i++) + arr[i] = i; + + #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \ + copyin(arr) copy(res) + { + #pragma acc loop gang reduction(+:res) + for (j = 0; j < 32; j++) + { + #pragma acc loop worker vector reduction(+:res) + for (i = 0; i < 1024; i++) + res += arr[j * 1024 + i]; + } + } + + for (i = 0; i < 32768; i++) + hres += arr[i]; + + assert (res == hres); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-4.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-4.c new file mode 100644 index 0000000..e69d0ec --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-gwv-np-4.c @@ -0,0 +1,55 @@ +#include <assert.h> + +/* Test of reduction on loop directive (gangs, workers and vectors, multiple + non-private reduction variables, float type). */ + +int +main (int argc, char *argv[]) +{ + int i, j; + float arr[32768]; + float res = 0, mres = 0, hres = 0, hmres = 0; + + for (i = 0; i < 32768; i++) + arr[i] = i; + + #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \ + copy(res, mres) + { + #pragma acc loop gang reduction(+:res) reduction(max:mres) + for (j = 0; j < 32; j++) + { + #pragma acc loop worker vector reduction(+:res) reduction(max:mres) + for (i = 0; i < 1024; i++) + { + res += arr[j * 1024 + i]; + if (arr[j * 1024 + i] > mres) + mres = arr[j * 1024 + i]; + } + + #pragma acc loop worker vector reduction(+:res) reduction(max:mres) + for (i = 0; i < 1024; i++) + { + res += arr[j * 1024 + (1023 - i)]; + if (arr[j * 1024 + (1023 - i)] > mres) + mres = arr[j * 1024 + (1023 - i)]; + } + } + } + + for (j = 0; j < 32; j++) + for (i = 0; i < 1024; i++) + { + hres += arr[j * 1024 + i]; + hres += arr[j * 1024 + (1023 - i)]; + if (arr[j * 1024 + i] > hmres) + hmres = arr[j * 1024 + i]; + if (arr[j * 1024 + (1023 - i)] > hmres) + hmres = arr[j * 1024 + (1023 - i)]; + } + + assert (res == hres); + assert (mres == hmres); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-vector-p-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-vector-p-1.c new file mode 100644 index 0000000..31e4366 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-vector-p-1.c @@ -0,0 +1,43 @@ +/* { dg-additional-options "-w" } */ + +#include <assert.h> + +/* Test of reduction on loop directive (vectors, private reduction + variable). */ + +int +main (int argc, char *argv[]) +{ + int i, j, arr[1024], out[32], res = 0, hres = 0; + + for (i = 0; i < 1024; i++) + arr[i] = i; + + #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \ + private(res) copyout(out) + { + #pragma acc loop gang + for (j = 0; j < 32; j++) + { + res = 0; + + #pragma acc loop vector reduction(+:res) + for (i = 0; i < 32; i++) + res += arr[j * 32 + i]; + + out[j] = res; + } + } + + for (j = 0; j < 32; j++) + { + hres = 0; + + for (i = 0; i < 32; i++) + hres += arr[j * 32 + i]; + + assert (out[j] == hres); + } + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-vector-p-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-vector-p-2.c new file mode 100644 index 0000000..15f0053 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-vector-p-2.c @@ -0,0 +1,41 @@ +#include <assert.h> + +/* Test of reduction on loop directive (vector reduction in + gang-partitioned/worker-partitioned mode, private reduction variable). */ + +int +main (int argc, char *argv[]) +{ + int i, j, k; + double ina[1024], inb[1024], out[1024], acc; + + for (j = 0; j < 32; j++) + for (i = 0; i < 32; i++) + { + ina[j * 32 + i] = (i == j) ? 2.0 : 0.0; + inb[j * 32 + i] = (double) (i + j); + } + + #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \ + private(acc) copyin(ina, inb) copyout(out) + { + #pragma acc loop gang worker + for (k = 0; k < 32; k++) + for (j = 0; j < 32; j++) + { + acc = 0; + + #pragma acc loop vector reduction(+:acc) + for (i = 0; i < 32; i++) + acc += ina[k * 32 + i] * inb[i * 32 + j]; + + out[k * 32 + j] = acc; + } + } + + for (j = 0; j < 32; j++) + for (i = 0; i < 32; i++) + assert (out[j * 32 + i] == (i + j) * 2); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-worker-p-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-worker-p-1.c new file mode 100644 index 0000000..4a92503 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-worker-p-1.c @@ -0,0 +1,43 @@ +/* { dg-additional-options "-w" } */ + +#include <assert.h> + +/* Test of reduction on loop directive (workers, private reduction + variable). */ + +int +main (int argc, char *argv[]) +{ + int i, j, arr[1024], out[32], res = 0, hres = 0; + + for (i = 0; i < 1024; i++) + arr[i] = i; + + #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \ + private(res) copyout(out) + { + #pragma acc loop gang + for (j = 0; j < 32; j++) + { + res = 0; + + #pragma acc loop worker reduction(+:res) + for (i = 0; i < 32; i++) + res += arr[j * 32 + i]; + + out[j] = res; + } + } + + for (j = 0; j < 32; j++) + { + hres = 0; + + for (i = 0; i < 32; i++) + hres += arr[j * 32 + i]; + + assert (out[j] == hres); + } + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-wv-p-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-wv-p-1.c new file mode 100644 index 0000000..1bfb284 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-wv-p-1.c @@ -0,0 +1,41 @@ +#include <assert.h> + +/* Test of reduction on loop directive (workers and vectors, private reduction + variable). */ + +int +main (int argc, char *argv[]) +{ + int i, j, arr[1024], out[32], res = 0, hres = 0; + + for (i = 0; i < 1024; i++) + arr[i] = i; + + #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \ + private(res) copyout(out) + { + #pragma acc loop gang + for (j = 0; j < 32; j++) + { + res = 0; + + #pragma acc loop worker vector reduction(+:res) + for (i = 0; i < 32; i++) + res += arr[j * 32 + i]; + + out[j] = res; + } + } + + for (j = 0; j < 32; j++) + { + hres = 0; + + for (i = 0; i < 32; i++) + hres += arr[j * 32 + i]; + + assert (out[j] == hres); + } + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-wv-p-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-wv-p-2.c new file mode 100644 index 0000000..93ab78f --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-wv-p-2.c @@ -0,0 +1,45 @@ +#include <assert.h> + +/* Test of reduction on loop directive (workers and vectors, private reduction + variable). */ + +int +main (int argc, char *argv[]) +{ + int i, j, arr[32768], out[32], res = 0, hres = 0; + + for (i = 0; i < 32768; i++) + arr[i] = i; + + #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \ + private(res) copyout(out) + { + #pragma acc loop gang + for (j = 0; j < 32; j++) + { + res = j; + + #pragma acc loop worker reduction(+:res) + for (i = 0; i < 1024; i++) + res += arr[j * 1024 + i]; + + #pragma acc loop vector reduction(+:res) + for (i = 1023; i >= 0; i--) + res += arr[j * 1024 + i]; + + out[j] = res; + } + } + + for (j = 0; j < 32; j++) + { + hres = j; + + for (i = 0; i < 1024; i++) + hres += arr[j * 1024 + i] * 2; + + assert (out[j] == hres); + } + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-wv-p-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-wv-p-3.c new file mode 100644 index 0000000..298e25c --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-reduction-wv-p-3.c @@ -0,0 +1,38 @@ +#include <assert.h> + +/* Test of reduction on loop directive (workers and vectors, private reduction + variable: gang-redundant mode). */ + +int +main (int argc, char *argv[]) +{ + int i, arr[1024], out[32], res = 0, hres = 0; + + for (i = 0; i < 1024; i++) + arr[i] = i ^ 33; + + #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \ + private(res) copyin(arr) copyout(out) + { + /* Private variables aren't initialized by default in openacc. */ + res = 0; + + /* "res" should be available at the end of the following loop (and should + have the same value redundantly in each gang). */ + #pragma acc loop worker vector reduction(+:res) + for (i = 0; i < 1024; i++) + res += arr[i]; + + #pragma acc loop gang (static: 1) + for (i = 0; i < 32; i++) + out[i] = res; + } + + for (i = 0; i < 1024; i++) + hres += arr[i]; + + for (i = 0; i < 32; i++) + assert (out[i] == hres); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-1.c new file mode 100644 index 0000000..5e82e1d --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-1.c @@ -0,0 +1,38 @@ +#include <assert.h> + +/* Test of reduction on both parallel and loop directives (worker and + vector-partitioned loops individually in gang-partitioned mode, int + type). */ + +int +main (int argc, char *argv[]) +{ + int i, j, arr[32768], res = 0, hres = 0; + + for (i = 0; i < 32768; i++) + arr[i] = i; + + #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \ + reduction(+:res) copy(res) + { + #pragma acc loop gang + for (j = 0; j < 32; j++) + { + #pragma acc loop worker reduction(+:res) + for (i = 0; i < 1024; i++) + res += arr[j * 1024 + i]; + + #pragma acc loop vector reduction(+:res) + for (i = 1023; i >= 0; i--) + res += arr[j * 1024 + i]; + } + } + + for (j = 0; j < 32; j++) + for (i = 0; i < 1024; i++) + hres += arr[j * 1024 + i] * 2; + + assert (res == hres); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-2.c new file mode 100644 index 0000000..a7a75a9 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-2.c @@ -0,0 +1,40 @@ +#include <assert.h> + +/* Test of reduction on both parallel and loop directives (workers and vectors + in gang-partitioned mode, int type with XOR). */ + +int +main (int argc, char *argv[]) +{ + int i, j, arr[32768], res = 0, hres = 0; + + for (i = 0; i < 32768; i++) + arr[i] = i; + + #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \ + reduction(^:res) + { + #pragma acc loop gang + for (j = 0; j < 32; j++) + { + #pragma acc loop worker vector reduction(^:res) + for (i = 0; i < 1024; i++) + res ^= arr[j * 1024 + i]; + + #pragma acc loop worker vector reduction(^:res) + for (i = 0; i < 1024; i++) + res ^= arr[j * 1024 + (1023 - i)]; + } + } + + for (j = 0; j < 32; j++) + for (i = 0; i < 1024; i++) + { + hres ^= arr[j * 1024 + i]; + hres ^= arr[j * 1024 + (1023 - i)]; + } + + assert (res == hres); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-3.c new file mode 100644 index 0000000..8d85fed --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-3.c @@ -0,0 +1,42 @@ +#include <assert.h> + +/* Test of reduction on both parallel and loop directives (workers and vectors + together in gang-partitioned mode, float type). */ + +int +main (int argc, char *argv[]) +{ + int i, j; + float arr[32768]; + float res = 0, hres = 0; + + for (i = 0; i < 32768; i++) + arr[i] = i; + + #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \ + reduction(+:res) copy(res) + { + #pragma acc loop gang + for (j = 0; j < 32; j++) + { + #pragma acc loop worker vector reduction(+:res) + for (i = 0; i < 1024; i++) + res += arr[j * 1024 + i]; + + #pragma acc loop worker vector reduction(+:res) + for (i = 0; i < 1024; i++) + res += arr[j * 1024 + (1023 - i)]; + } + } + + for (j = 0; j < 32; j++) + for (i = 0; i < 1024; i++) + { + hres += arr[j * 1024 + i]; + hres += arr[j * 1024 + (1023 - i)]; + } + + assert (res == hres); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-4.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-4.c new file mode 100644 index 0000000..1904b4a --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/par-loop-comb-reduction-4.c @@ -0,0 +1,55 @@ +#include <assert.h> + +/* Test of reduction on both parallel and loop directives (workers and vectors + together in gang-partitioned mode, float type, multiple reductions). */ + +int +main (int argc, char *argv[]) +{ + int i, j; + float arr[32768]; + float res = 0, mres = 0, hres = 0, hmres = 0; + + for (i = 0; i < 32768; i++) + arr[i] = i; + + #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \ + reduction(+:res) reduction(max:mres) copy(res, mres) + { + #pragma acc loop gang + for (j = 0; j < 32; j++) + { + #pragma acc loop worker vector reduction(+:res) reduction(max:mres) + for (i = 0; i < 1024; i++) + { + res += arr[j * 1024 + i]; + if (arr[j * 1024 + i] > mres) + mres = arr[j * 1024 + i]; + } + + #pragma acc loop worker vector reduction(+:res) reduction(max:mres) + for (i = 0; i < 1024; i++) + { + res += arr[j * 1024 + (1023 - i)]; + if (arr[j * 1024 + (1023 - i)] > mres) + mres = arr[j * 1024 + (1023 - i)]; + } + } + } + + for (j = 0; j < 32; j++) + for (i = 0; i < 1024; i++) + { + hres += arr[j * 1024 + i]; + hres += arr[j * 1024 + (1023 - i)]; + if (arr[j * 1024 + i] > hmres) + hmres = arr[j * 1024 + i]; + if (arr[j * 1024 + (1023 - i)] > hmres) + hmres = arr[j * 1024 + (1023 - i)]; + } + + assert (res == hres); + assert (mres == hmres); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/par-reduction-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/par-reduction-1.c index dceac39..a88b60f 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/par-reduction-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/par-reduction-1.c @@ -1,40 +1,54 @@ +/* { dg-additional-options "-w" } */ + #include <assert.h> +/* Test of reduction on parallel directive. */ + +#define ACTUAL_GANGS 256 + int main (int argc, char *argv[]) { - int res, res2 = 0; + int res, res1 = 0, res2 = 0; #if defined(ACC_DEVICE_TYPE_host) # define GANGS 1 #else # define GANGS 256 #endif - #pragma acc parallel num_gangs(GANGS) copy(res2) + #pragma acc parallel num_gangs(GANGS) num_workers(32) vector_length(32) \ + reduction(+:res1) copy(res2, res1) { + res1 += 5; + #pragma acc atomic res2 += 5; } res = GANGS * 5; + assert (res == res1); assert (res == res2); #undef GANGS - res = res2 = 1; + res = res1 = res2 = 1; #if defined(ACC_DEVICE_TYPE_host) # define GANGS 1 #else # define GANGS 8 #endif - #pragma acc parallel num_gangs(GANGS) copy(res2) + #pragma acc parallel num_gangs(GANGS) num_workers(32) vector_length(32) \ + reduction(*:res1) copy(res1, res2) { + res1 *= 5; + #pragma acc atomic res2 *= 5; } for (int i = 0; i < GANGS; ++i) res *= 5; + assert (res == res1); assert (res == res2); #undef GANGS diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/par-reduction-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/par-reduction-2.c index bd5715c..911b76c 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/par-reduction-2.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/par-reduction-2.c @@ -1,18 +1,25 @@ +/* { dg-additional-options "-w" } */ + #include <assert.h> #include <openacc.h> +/* Test of reduction on parallel directive (with async). */ + int main (int argc, char *argv[]) { - int res, res2 = 0; + int res, res1 = 0, res2 = 0; #if defined(ACC_DEVICE_TYPE_host) # define GANGS 1 #else # define GANGS 256 #endif - #pragma acc parallel num_gangs(GANGS) copy(res2) async(1) + #pragma acc parallel num_gangs(GANGS) num_workers(32) vector_length(32) \ + reduction(+:res1) copy(res1, res2) async(1) { + res1 += 5; + #pragma acc atomic res2 += 5; } @@ -20,18 +27,22 @@ main (int argc, char *argv[]) acc_wait (1); + assert (res == res1); assert (res == res2); #undef GANGS - res = res2 = 1; + res = res1 = res2 = 1; #if defined(ACC_DEVICE_TYPE_host) # define GANGS 1 #else # define GANGS 8 #endif - #pragma acc parallel num_gangs(GANGS) copy(res2) async(1) + #pragma acc parallel num_gangs(GANGS) num_workers(32) vector_length(32) \ + reduction(*:res1) copy(res1, res2) async(1) { + res1 *= 5; + #pragma acc atomic res2 *= 5; } @@ -40,6 +51,7 @@ main (int argc, char *argv[]) acc_wait (1); + assert (res == res1); assert (res == res2); return 0; diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c index 39357ce..f5766a4 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c @@ -5,12 +5,20 @@ int main () { + int dummy[10]; + #pragma acc parallel num_workers (2<<20) /* { dg-error "using num_workers" } */ { +#pragma acc loop worker + for (int i = 0; i < 10; i++) + dummy[i] = i; } #pragma acc parallel vector_length (2<<20) /* { dg-error "using vector_length" } */ { +#pragma acc loop vector + for (int i = 0; i < 10; i++) + dummy[i] = i; } return 0; diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-reduction.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-reduction.c new file mode 100644 index 0000000..b2c60e5 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-reduction.c @@ -0,0 +1,72 @@ +/* { dg-do run } */ +/* { dg-additional-options "-w" } */ + +#include <stdlib.h> +#include <openacc.h> + +#define N 10 + +int +main () +{ + int s1 = 0, s2 = 0; + int i; + int dummy = 0; + +#pragma acc data copy (dummy) + { +#pragma acc parallel num_gangs (N) reduction (+:s1) copy(s1) + { + s1++; + } + } + + if (acc_get_device_type () != acc_device_nvidia) + { + if (s1 != 1) + abort (); + } + else + { + if (s1 != N) + abort (); + } + + s1 = 0; + s2 = 0; + +#pragma acc parallel num_gangs (10) reduction (+:s1, s2) copy(s1, s2) + { + s1++; + s2 += N; + } + + if (acc_get_device_type () != acc_device_nvidia) + { + if (s1 != 1) + abort (); + if (s2 != N) + abort (); + } + else + { + if (s1 != N) + abort (); + if (s2 != N*N) + abort (); + } + + s1 = 0; + +#pragma acc parallel num_gangs (10) reduction (+:s1) copy(s1) + { +#pragma acc loop gang reduction (+:s1) + for (i = 0; i < 10; i++) + s1++; + } + + if (s1 != N) + abort (); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr70289.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr70289.c new file mode 100644 index 0000000..6d52222 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr70289.c @@ -0,0 +1,13 @@ +int +main () +{ + int i; + static int temp; + +#pragma acc parallel reduction(+:temp) + { + temp++; + } + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr70373.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr70373.c new file mode 100644 index 0000000..af629c8 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr70373.c @@ -0,0 +1,20 @@ +#define N 32 + +int +foo (unsigned int sum) +{ +#pragma acc parallel reduction (+:sum) + { + sum; + } + + return sum; +} + +int +main (void) +{ + unsigned int sum = 0; + foo (sum); + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-1.c index e557931..10eb2788 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-1.c @@ -1,46 +1,59 @@ /* { dg-do run } */ +/* Ignore vector_length warnings for offloaded (nvptx) targets. */ +/* { dg-additional-options "-foffload=-w" } */ + /* Integer reductions. */ #include <stdlib.h> -#include <stdbool.h> - -#define vl 32 - -#define DO_PRAGMA(x) _Pragma (#x) - -#define check_reduction_op(type, op, init, b) \ - { \ - type res, vres; \ - res = (init); \ - DO_PRAGMA (acc parallel vector_length (vl) copy(res)) \ -DO_PRAGMA (acc loop reduction (op:res))\ - for (i = 0; i < n; i++) \ - res = res op (b); \ - \ - vres = (init); \ - for (i = 0; i < n; i++) \ - vres = vres op (b); \ - \ - if (res != vres) \ - abort (); \ - } +#include "reduction.h" + +const int ng = 8; +const int nw = 4; +const int vl = 32; static void -test_reductions_int (void) +test_reductions (void) { - const int n = 1000; + const int n = 100; int i; int array[n]; for (i = 0; i < n; i++) - array[i] = i; - - check_reduction_op (int, +, 0, array[i]); - check_reduction_op (int, *, 1, array[i]); - check_reduction_op (int, &, -1, array[i]); - check_reduction_op (int, |, 0, array[i]); - check_reduction_op (int, ^, 0, array[i]); + array[i] = i+1; + + /* Gang reductions. */ + check_reduction_op (int, +, 0, array[i], num_gangs (ng), gang); + check_reduction_op (int, *, 1, array[i], num_gangs (ng), gang); + check_reduction_op (int, &, -1, array[i], num_gangs (ng), gang); + check_reduction_op (int, |, 0, array[i], num_gangs (ng), gang); + check_reduction_op (int, ^, 0, array[i], num_gangs (ng), gang); + + /* Worker reductions. */ + check_reduction_op (int, +, 0, array[i], num_workers (nw), worker); + check_reduction_op (int, *, 1, array[i], num_workers (nw), worker); + check_reduction_op (int, &, -1, array[i], num_workers (nw), worker); + check_reduction_op (int, |, 0, array[i], num_workers (nw), worker); + check_reduction_op (int, ^, 0, array[i], num_workers (nw), worker); + + /* Vector reductions. */ + check_reduction_op (int, +, 0, array[i], vector_length (vl), vector); + check_reduction_op (int, *, 1, array[i], vector_length (vl), vector); + check_reduction_op (int, &, -1, array[i], vector_length (vl), vector); + check_reduction_op (int, |, 0, array[i], vector_length (vl), vector); + check_reduction_op (int, ^, 0, array[i], vector_length (vl), vector); + + /* Combined reductions. */ + check_reduction_op (int, +, 0, array[i], num_gangs (ng) num_workers (nw) + vector_length (vl), gang worker vector); + check_reduction_op (int, *, 1, array[i], num_gangs (ng) num_workers (nw) + vector_length (vl), gang worker vector); + check_reduction_op (int, &, -1, array[i], num_gangs (ng) num_workers (nw) + vector_length (vl), gang worker vector); + check_reduction_op (int, |, 0, array[i], num_gangs (ng) num_workers (nw) + vector_length (vl), gang worker vector); + check_reduction_op (int, ^, 0, array[i], num_gangs (ng) num_workers (nw) + vector_length (vl), gang worker vector); } static void @@ -55,29 +68,31 @@ test_reductions_bool (void) array[i] = i; cmp_val = 5; - check_reduction_op (bool, &&, true, (cmp_val > array[i])); - check_reduction_op (bool, ||, false, (cmp_val > array[i])); -} -#define check_reduction_macro(type, op, init, b) \ - { \ - type res, vres; \ - res = (init); \ -DO_PRAGMA (acc parallel vector_length (vl) copy(res))\ -DO_PRAGMA (acc loop reduction (op:res))\ - for (i = 0; i < n; i++) \ - res = op (res, (b)); \ - \ - vres = (init); \ - for (i = 0; i < n; i++) \ - vres = op (vres, (b)); \ - \ - if (res != vres) \ - abort (); \ - } - -#define max(a, b) (((a) > (b)) ? (a) : (b)) -#define min(a, b) (((a) < (b)) ? (a) : (b)) + /* Gang reductions. */ + check_reduction_op (int, &&, 1, (cmp_val > array[i]), num_gangs (ng), + gang); + check_reduction_op (int, ||, 0, (cmp_val > array[i]), num_gangs (ng), + gang); + + /* Worker reductions. */ + check_reduction_op (int, &&, 1, (cmp_val > array[i]), num_workers (nw), + worker); + check_reduction_op (int, ||, 0, (cmp_val > array[i]), num_workers (nw), + worker); + + /* Vector reductions. */ + check_reduction_op (int, &&, 1, (cmp_val > array[i]), vector_length (vl), + vector); + check_reduction_op (int, ||, 0, (cmp_val > array[i]), vector_length (vl), + vector); + + /* Combined reductions. */ + check_reduction_op (int, &&, 1, (cmp_val > array[i]), num_gangs (ng) + num_workers (nw) vector_length (vl), gang worker vector); + check_reduction_op (int, ||, 0, (cmp_val > array[i]), num_gangs (ng) + num_workers (nw) vector_length (vl), gang worker vector); +} static void test_reductions_minmax (void) @@ -89,14 +104,32 @@ test_reductions_minmax (void) for (i = 0; i < n; i++) array[i] = i; - check_reduction_macro (int, min, n + 1, array[i]); - check_reduction_macro (int, max, -1, array[i]); + /* Gang reductions. */ + check_reduction_macro (int, min, n + 1, array[i], num_gangs (ng), gang); + check_reduction_macro (int, max, -1, array[i], num_gangs (ng), gang); + + /* Worker reductions. */ + check_reduction_macro (int, min, n + 1, array[i], num_workers (nw), worker); + check_reduction_macro (int, max, -1, array[i], num_workers (nw), worker); + + /* Vector reductions. */ + check_reduction_macro (int, min, n + 1, array[i], vector_length (vl), + vector); + check_reduction_macro (int, max, -1, array[i], vector_length (vl), vector); + + /* Combined reductions. */ + check_reduction_macro (int, min, n + 1, array[i], num_gangs (ng) + num_workers (nw) vector_length (vl), gang worker + vector); + check_reduction_macro (int, max, -1, array[i], num_gangs (ng) + num_workers (nw) vector_length (vl), gang worker + vector); } int main (void) { - test_reductions_int (); + test_reductions (); test_reductions_bool (); test_reductions_minmax (); return 0; diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-2.c index 8a0b0d6..7cb9497 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-2.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-2.c @@ -1,124 +1,83 @@ /* { dg-do run } */ +/* Ignore vector_length warnings for offloaded (nvptx) targets. */ +/* { dg-additional-options "-foffload=-w" } */ + /* float reductions. */ #include <stdlib.h> -#include <stdbool.h> -#include <math.h> +#include "reduction.h" -#define vl 32 +const int ng = 8; +const int nw = 4; +const int vl = 32; -int -main(void) +static void +test_reductions (void) { - const int n = 1000; + const int n = 100; int i; - float vresult, result, array[n]; - bool lvresult, lresult; + float array[n]; for (i = 0; i < n; i++) - array[i] = i; - - result = 0; - vresult = 0; + array[i] = i+1; - /* '+' reductions. */ -#pragma acc parallel vector_length (vl) copy(result) -#pragma acc loop reduction (+:result) - for (i = 0; i < n; i++) - result += array[i]; + /* Gang reductions. */ + check_reduction_op (float, +, 0, array[i], num_gangs (ng), gang); + check_reduction_op (float, *, 1, array[i], num_gangs (ng), gang); - /* Verify the reduction. */ - for (i = 0; i < n; i++) - vresult += array[i]; - - if (result != vresult) - abort (); - - result = 0; - vresult = 0; - - /* '*' reductions. */ -#pragma acc parallel vector_length (vl) copy(result) -#pragma acc loop reduction (*:result) - for (i = 0; i < n; i++) - result *= array[i]; + /* Worker reductions. */ + check_reduction_op (float, +, 0, array[i], num_workers (nw), worker); + check_reduction_op (float, *, 1, array[i], num_workers (nw), worker); - /* Verify the reduction. */ - for (i = 0; i < n; i++) - vresult *= array[i]; - - if (fabs(result - vresult) > .0001) - abort (); - result = 0; - vresult = 0; - - /* 'max' reductions. */ -#pragma acc parallel vector_length (vl) copy(result) -#pragma acc loop reduction (max:result) - for (i = 0; i < n; i++) - result = result > array[i] ? result : array[i]; - - /* Verify the reduction. */ - for (i = 0; i < n; i++) - vresult = vresult > array[i] ? vresult : array[i]; - - if (result != vresult) - abort (); - - result = 0; - vresult = 0; - - /* 'min' reductions. */ -#pragma acc parallel vector_length (vl) copy(result) -#pragma acc loop reduction (min:result) - for (i = 0; i < n; i++) - result = result < array[i] ? result : array[i]; + /* Vector reductions. */ + check_reduction_op (float, +, 0, array[i], vector_length (vl), vector); + check_reduction_op (float, *, 1, array[i], vector_length (vl), vector); - /* Verify the reduction. */ - for (i = 0; i < n; i++) - vresult = vresult < array[i] ? vresult : array[i]; - - if (result != vresult) - abort (); - - result = 5; - vresult = 5; - - lresult = false; - lvresult = false; - - /* '&&' reductions. */ -#pragma acc parallel vector_length (vl) copy(lresult) -#pragma acc loop reduction (&&:lresult) - for (i = 0; i < n; i++) - lresult = lresult && (result > array[i]); - - /* Verify the reduction. */ - for (i = 0; i < n; i++) - lvresult = lresult && (result > array[i]); - - if (lresult != lvresult) - abort (); - - result = 5; - vresult = 5; - - lresult = false; - lvresult = false; + /* Combined reductions. */ + check_reduction_op (float, +, 0, array[i], num_gangs (ng) num_workers (nw) + vector_length (vl), gang worker vector); + check_reduction_op (float, *, 1, array[i], num_gangs (ng) num_workers (nw) + vector_length (vl), gang worker vector); +} - /* '||' reductions. */ -#pragma acc parallel vector_length (vl) copy(lresult) -#pragma acc loop reduction (||:lresult) - for (i = 0; i < n; i++) - lresult = lresult || (result > array[i]); +static void +test_reductions_minmax (void) +{ + const int n = 1000; + int i; + float array[n]; - /* Verify the reduction. */ for (i = 0; i < n; i++) - lvresult = lresult || (result > array[i]); + array[i] = i; - if (lresult != lvresult) - abort (); + /* Gang reductions. */ + check_reduction_macro (float, min, n + 1, array[i], num_gangs (ng), gang); + check_reduction_macro (float, max, -1, array[i], num_gangs (ng), gang); + + /* Worker reductions. */ + check_reduction_macro (float, min, n + 1, array[i], num_workers (nw), + worker); + check_reduction_macro (float, max, -1, array[i], num_workers (nw), worker); + + /* Vector reductions. */ + check_reduction_macro (float, min, n + 1, array[i], vector_length (vl), + vector); + check_reduction_macro (float, max, -1, array[i], vector_length (vl), vector); + + /* Combined reductions. */ + check_reduction_macro (float, min, n + 1, array[i], num_gangs (ng) + num_workers (nw) vector_length (vl), gang worker + vector); + check_reduction_macro (float, max, -1, array[i], num_gangs (ng) + num_workers (nw)vector_length (vl), gang worker + vector); +} +int +main (void) +{ + test_reductions (); + test_reductions_minmax (); return 0; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-3.c index a233e29..1b948be 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-3.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-3.c @@ -1,93 +1,84 @@ /* { dg-do run } */ +/* Ignore vector_length warnings for offloaded (nvptx) targets. */ +/* { dg-additional-options "-foffload=-w" } */ + /* double reductions. */ #include <stdlib.h> -#include <stdbool.h> -#include <math.h> +#include "reduction.h" -#define vl 32 +const int ng = 8; +const int nw = 4; +const int vl = 32; -int -main(void) +static void +test_reductions (void) { - const int n = 1000; + const int n = 10; int i; - double vresult, result, array[n]; - bool lvresult, lresult; - - for (i = 0; i < n; i++) - array[i] = i; - - result = 0; - vresult = 0; - - /* 'max' reductions. */ -#pragma acc parallel vector_length (vl) copy(result) -#pragma acc loop reduction (max:result) - for (i = 0; i < n; i++) - result = result > array[i] ? result : array[i]; - - /* Verify the reduction. */ - for (i = 0; i < n; i++) - vresult = vresult > array[i] ? vresult : array[i]; - - if (result != vresult) - abort (); - - result = 0; - vresult = 0; - - /* 'min' reductions. */ -#pragma acc parallel vector_length (vl) copy(result) -#pragma acc loop reduction (min:result) - for (i = 0; i < n; i++) - result = result < array[i] ? result : array[i]; + double array[n]; - /* Verify the reduction. */ for (i = 0; i < n; i++) - vresult = vresult < array[i] ? vresult : array[i]; + array[i] = i+1; - if (result != vresult) - abort (); + /* Gang reductions. */ + check_reduction_op (double, +, 0, array[i], num_gangs (ng), gang); + check_reduction_op (double, *, 1, array[i], num_gangs (ng), gang); - result = 5; - vresult = 5; + /* Worker reductions. */ + check_reduction_op (double, +, 0, array[i], num_workers (nw), worker); + check_reduction_op (double, *, 1, array[i], num_workers (nw), worker); - lresult = false; - lvresult = false; + /* Vector reductions. */ + check_reduction_op (double, +, 0, array[i], vector_length (vl), vector); + check_reduction_op (double, *, 1, array[i], vector_length (vl), vector); - /* '&&' reductions. */ -#pragma acc parallel vector_length (vl) copy(lresult) -#pragma acc loop reduction (&&:lresult) - for (i = 0; i < n; i++) - lresult = lresult && (result > array[i]); - - /* Verify the reduction. */ - for (i = 0; i < n; i++) - lvresult = lresult && (result > array[i]); - - if (lresult != lvresult) - abort (); - - result = 5; - vresult = 5; - - lresult = false; - lvresult = false; + /* Combined reductions. */ + check_reduction_op (double, +, 0, array[i], num_gangs (ng) num_workers (nw) + vector_length (vl), gang worker vector); + check_reduction_op (double, *, 1, array[i], num_gangs (ng) num_workers (nw) + vector_length (vl), gang worker vector); +} - /* '||' reductions. */ -#pragma acc parallel vector_length (vl) copy(lresult) -#pragma acc loop reduction (||:lresult) - for (i = 0; i < n; i++) - lresult = lresult || (result > array[i]); +static void +test_reductions_minmax (void) +{ + const int n = 1000; + int i; + double array[n]; - /* Verify the reduction. */ for (i = 0; i < n; i++) - lvresult = lresult || (result > array[i]); + array[i] = i; - if (lresult != lvresult) - abort (); + /* Gang reductions. */ + check_reduction_macro (double, min, n + 1, array[i], num_gangs (ng), gang); + check_reduction_macro (double, max, -1, array[i], num_gangs (ng), gang); + + /* Worker reductions. */ + check_reduction_macro (double, min, n + 1, array[i], num_workers (nw), + worker); + check_reduction_macro (double, max, -1, array[i], num_workers (nw), worker); + + /* Vector reductions. */ + check_reduction_macro (double, min, n + 1, array[i], vector_length (vl), + vector); + check_reduction_macro (double, max, -1, array[i], vector_length (vl), + vector); + + /* Combined reductions. */ + check_reduction_macro (double, min, n + 1, array[i], num_gangs (ng) + num_workers (nw) vector_length (vl), gang worker + vector); + check_reduction_macro (double, max, -1, array[i], num_gangs (ng) + num_workers (nw) vector_length (vl), gang worker + vector); +} +int +main (void) +{ + test_reductions (); + test_reductions_minmax (); return 0; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-4.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-4.c index 59d49c1..79355ed 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-4.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-4.c @@ -1,59 +1,56 @@ /* { dg-do run { target { ! { hppa*-*-hpux* } } } } */ +/* Ignore vector_length warnings for offloaded (nvptx) targets. */ +/* { dg-additional-options "-foffload=-w" } */ + /* complex reductions. */ #include <stdlib.h> -#include <stdbool.h> -#include <math.h> #include <complex.h> +#include "reduction.h" -#define vl 32 +const int ng = 8; +const int nw = 4; +const int vl = 32; -int -main(void) +static void +test_reductions (void) { - const int n = 1000; + const int n = 10; int i; - double _Complex vresult, result, array[n]; - bool lvresult, lresult; - - for (i = 0; i < n; i++) - array[i] = i; - - result = 0; - vresult = 0; - - /* '&&' reductions. */ -#pragma acc parallel vector_length (vl) copy(lresult) -#pragma acc loop reduction (&&:lresult) - for (i = 0; i < n; i++) - lresult = lresult && (creal(result) > creal(array[i])); - - /* Verify the reduction. */ - for (i = 0; i < n; i++) - lvresult = lresult && (creal(result) > creal(array[i])); + double _Complex array[n]; - if (lresult != lvresult) - abort (); - - result = 5; - vresult = 5; - - lresult = false; - lvresult = false; - - /* '||' reductions. */ -#pragma acc parallel vector_length (vl) copy(lresult) -#pragma acc loop reduction (||:lresult) - for (i = 0; i < n; i++) - lresult = lresult || (creal(result) > creal(array[i])); - - /* Verify the reduction. */ for (i = 0; i < n; i++) - lvresult = lresult || (creal(result) > creal(array[i])); - - if (lresult != lvresult) - abort (); + array[i] = i+1; + + /* Gang reductions. */ + check_reduction_op (double, +, 0, creal (array[i]), num_gangs (ng), gang); + check_reduction_op (double, *, 1, creal (array[i]), num_gangs (ng), gang); + + /* Worker reductions. */ + check_reduction_op (double, +, 0, creal (array[i]), num_workers (nw), + worker); + check_reduction_op (double, *, 1, creal (array[i]), num_workers (nw), + worker); + + /* Vector reductions. */ + check_reduction_op (double, +, 0, creal (array[i]), vector_length (vl), + vector); + check_reduction_op (double, *, 1, creal (array[i]), vector_length (vl), + vector); + + /* Combined reductions. */ + check_reduction_op (double, +, 0, creal (array[i]), num_gangs (ng) + num_workers (nw) vector_length (vl), gang worker + vector); + check_reduction_op (double, *, 1, creal (array[i]), num_gangs (ng) + num_workers (nw) vector_length (vl), gang worker + vector); +} +int +main (void) +{ + test_reductions (); return 0; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-5.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-5.c index efe8702..46b553a 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-5.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-5.c @@ -1,32 +1,57 @@ +/* { dg-do run } */ +/* { dg-additional-options "-w" } */ + +/* Ignore vector_length warnings for offloaded (nvptx) targets. */ +/* { dg-additional-options "-foffload=-w" } */ + +/* Multiple reductions. */ + #include <stdio.h> #include <stdlib.h> +const int ng = 8; +const int nw = 4; +const int vl = 32; + +const int n = 100; + +#define DO_PRAGMA(x) _Pragma (#x) + +#define check_reduction(gwv_par, gwv_loop) \ + { \ + s1 = 2; s2 = 5; \ +DO_PRAGMA (acc parallel gwv_par copy (s1, s2)) \ +DO_PRAGMA (acc loop gwv_loop reduction (+:s1, s2)) \ + for (i = 0; i < n; i++) \ + { \ + s1 = s1 + 3; \ + s2 = s2 + 5; \ + } \ + \ + if (s1 != v1 && s2 != v2) \ + abort (); \ + } + int main (void) { int s1 = 2, s2 = 5, v1 = 2, v2 = 5; - int n = 100; int i; -#pragma acc parallel vector_length (32) copy(s1,s2) -#pragma acc loop reduction (+:s1, s2) - for (i = 0; i < n; i++) - { - s1 = s1 + 3; - s2 = s2 + 2; - } - for (i = 0; i < n; i++) { v1 = v1 + 3; v2 = v2 + 2; } - - if (s1 != v1) - abort (); - - if (s2 != v2) - abort (); - + + check_reduction (num_gangs (ng), gang); + + /* Nvptx targets require a vector_length or 32 in to allow spinlocks with + gangs. */ + check_reduction (num_workers (nw) vector_length (vl), worker); + check_reduction (vector_length (vl), vector); + check_reduction (num_gangs (ng) num_workers (nw) vector_length (vl), gang + worker vector); + return 0; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-6.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-6.c new file mode 100644 index 0000000..af30b31 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-6.c @@ -0,0 +1,36 @@ +/* { dg-do run } */ +/* { dg-additional-options "-w" } */ + +/* Test reductions on explicitly private variables. */ + +#include <assert.h> + +int +main () +{ + int i, j, red[10]; + int v; + + for (i = 0; i < 10; i++) + red[i] = -1; + +#pragma acc parallel copyout(red) + { +#pragma acc loop gang private(v) + for (j = 0; j < 10; j++) + { + v = j; + +#pragma acc loop vector reduction (+:v) + for (i = 0; i < 100; i++) + v++; + + red[j] = v; + } + } + + for (i = 0; i < 10; i++) + assert (red[i] == i + 100); + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction.h b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction.h new file mode 100644 index 0000000..1b3f8d4 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/reduction.h @@ -0,0 +1,43 @@ +#ifndef REDUCTION_H +#define REDUCTION_H + +#define DO_PRAGMA(x) _Pragma (#x) + +#define check_reduction_op(type, op, init, b, gwv_par, gwv_loop) \ + { \ + type res, vres; \ + res = (init); \ +DO_PRAGMA (acc parallel gwv_par copy (res)) \ +DO_PRAGMA (acc loop gwv_loop reduction (op:res)) \ + for (i = 0; i < n; i++) \ + res = res op (b); \ + \ + vres = (init); \ + for (i = 0; i < n; i++) \ + vres = vres op (b); \ + \ + if (res != vres) \ + abort (); \ + } + +#define check_reduction_macro(type, op, init, b, gwv_par, gwv_loop) \ + { \ + type res, vres; \ + res = (init); \ + DO_PRAGMA (acc parallel gwv_par copy(res)) \ +DO_PRAGMA (acc loop gwv_loop reduction (op:res)) \ + for (i = 0; i < n; i++) \ + res = op (res, (b)); \ + \ + vres = (init); \ + for (i = 0; i < n; i++) \ + vres = op (vres, (b)); \ + \ + if (res != vres) \ + abort (); \ + } + +#define max(a, b) (((a) > (b)) ? (a) : (b)) +#define min(a, b) (((a) < (b)) ? (a) : (b)) + +#endif diff --git a/libgomp/testsuite/libgomp.oacc-fortran/parallel-reduction.f90 b/libgomp/testsuite/libgomp.oacc-fortran/parallel-reduction.f90 new file mode 100644 index 0000000..31db7e1 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/parallel-reduction.f90 @@ -0,0 +1,47 @@ +! { dg-do run } +! { dg-additional-options "-w" } + +program reduction + implicit none + integer, parameter :: n = 10 + integer s1, s2 + include "openacc_lib.h" + + s1 = 0 + s2 = 0 + + !$acc parallel reduction(+:s1,s2) num_gangs (n) copy(s1) + s1 = s1 + 1 + s2 = s2 + 1 + !$acc end parallel + + if (acc_get_device_type () .eq. acc_device_nvidia) then + if (s1 .ne. n) call abort + if (s2 .ne. n) call abort + else + if (s1 .ne. 1) call abort + if (s2 .ne. 1) call abort + end if + + ! Test reductions inside subroutines + + s1 = 0 + s2 = 0 + call redsub (s1, s2, n) + + if (acc_get_device_type () .eq. acc_device_nvidia) then + if (s1 .ne. n) call abort + else + if (s2 .ne. 1) call abort + end if +end program reduction + +subroutine redsub(s1, s2, n) + implicit none + integer :: s1, s2, n + + !$acc parallel reduction(+:s1,s2) num_gangs (10) copy(s1) + s1 = s1 + 1 + s2 = s2 + 1 + !$acc end parallel +end subroutine redsub diff --git a/libgomp/testsuite/libgomp.oacc-fortran/pr70289.f90 b/libgomp/testsuite/libgomp.oacc-fortran/pr70289.f90 new file mode 100644 index 0000000..63bde44 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/pr70289.f90 @@ -0,0 +1,20 @@ +program foo + implicit none + integer :: i + integer :: temp = 0 + integer :: temp2 = 0 + + !$acc parallel + !$acc loop gang private(temp) + do i=1, 10000 + temp = 0 + enddo + !$acc end parallel + + !$acc parallel reduction(+:temp2) + !$acc loop gang reduction(+:temp2) + do i=1, 10000 + temp2 = 0 + enddo + !$acc end parallel +end program foo diff --git a/libgomp/testsuite/libgomp.oacc-fortran/reduction-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/reduction-1.f90 index db0a52d..e51509f 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/reduction-1.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/reduction-1.f90 @@ -1,28 +1,55 @@ ! { dg-do run } +! { dg-additional-options "-w" } ! Integer reductions program reduction_1 implicit none - integer, parameter :: n = 10, vl = 32 - integer :: i, vresult, result - logical :: lresult, lvresult + integer, parameter :: n = 10, ng = 8, nw = 4, vl = 32 + integer :: i, vresult, rg, rw, rv, rc + logical :: lrg, lrw, lrv, lrc, lvresult integer, dimension (n) :: array do i = 1, n array(i) = i end do - result = 0 + ! + ! '+' reductions + ! + + rg = 0 + rw = 0 + rv = 0 + rc = 0 vresult = 0 - ! '+' reductions + !$acc parallel num_gangs(ng) copy(rg) + !$acc loop reduction(+:rg) gang + do i = 1, n + rg = rg + array(i) + end do + !$acc end parallel - !$acc parallel vector_length(vl) num_gangs(1) copy(result) - !$acc loop reduction(+:result) + !$acc parallel num_workers(nw) copy(rw) + !$acc loop reduction(+:rw) worker do i = 1, n - result = result + array(i) + rw = rw + array(i) + end do + !$acc end parallel + + !$acc parallel vector_length(vl) copy(rv) + !$acc loop reduction(+:rv) vector + do i = 1, n + rv = rv + array(i) + end do + !$acc end parallel + + !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc) + !$acc loop reduction(+:rc) gang worker vector + do i = 1, n + rc = rc + array(i) end do !$acc end parallel @@ -31,17 +58,46 @@ program reduction_1 vresult = vresult + array(i) end do - if (result.ne.vresult) call abort - - result = 0 - vresult = 0 + if (rg .ne. vresult) call abort + if (rw .ne. vresult) call abort + if (rv .ne. vresult) call abort + if (rc .ne. vresult) call abort + ! ! '*' reductions + ! - !$acc parallel vector_length(vl) num_gangs(1) copy(result) - !$acc loop reduction(*:result) + rg = 1 + rw = 1 + rv = 1 + rc = 1 + vresult = 1 + + !$acc parallel num_gangs(ng) copy(rg) + !$acc loop reduction(*:rg) gang do i = 1, n - result = result * array(i) + rg = rg * array(i) + end do + !$acc end parallel + + !$acc parallel num_workers(nw) copy(rw) + !$acc loop reduction(*:rw) worker + do i = 1, n + rw = rw * array(i) + end do + !$acc end parallel + + !$acc parallel vector_length(vl) copy(rv) + !$acc loop reduction(*:rv) vector + do i = 1, n + rv = rv * array(i) + end do + !$acc end parallel + + !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc) + !$acc loop reduction(*:rc) gang worker vector + do i = 1, n + rc = rc * array(i) end do !$acc end parallel @@ -50,17 +106,46 @@ program reduction_1 vresult = vresult * array(i) end do - if (result.ne.vresult) call abort + if (rg .ne. vresult) call abort + if (rw .ne. vresult) call abort + if (rv .ne. vresult) call abort + if (rc .ne. vresult) call abort + + ! + ! 'max' reductions + ! - result = 0 + rg = 0 + rw = 0 + rv = 0 + rc = 0 vresult = 0 - ! 'max' reductions + !$acc parallel num_gangs(ng) copy(rg) + !$acc loop reduction(max:rg) gang + do i = 1, n + rg = max (rg, array(i)) + end do + !$acc end parallel + + !$acc parallel num_workers(nw) copy(rw) + !$acc loop reduction(max:rw) worker + do i = 1, n + rw = max (rw, array(i)) + end do + !$acc end parallel + + !$acc parallel vector_length(vl) copy(rv) + !$acc loop reduction(max:rv) vector + do i = 1, n + rv = max (rv, array(i)) + end do + !$acc end parallel - !$acc parallel vector_length(vl) num_gangs(1) copy(result) - !$acc loop reduction(max:result) + !$acc parallel num_gangs(ng) Num_workers(nw) vector_length(vl) copy(rc) + !$acc loop reduction(max:rc) gang worker vector do i = 1, n - result = max (result, array(i)) + rc = max (rc, array(i)) end do !$acc end parallel @@ -69,17 +154,46 @@ program reduction_1 vresult = max (vresult, array(i)) end do - if (result.ne.vresult) call abort - - result = 1 - vresult = 1 + if (rg .ne. vresult) call abort + if (rw .ne. vresult) call abort + if (rv .ne. vresult) call abort + if (rc .ne. vresult) call abort + ! ! 'min' reductions + ! + + rg = 0 + rw = 0 + rv = 0 + rc = 0 + vresult = 0 + + !$acc parallel num_gangs(ng) copy(rg) + !$acc loop reduction(min:rg) gang + do i = 1, n + rg = min (rg, array(i)) + end do + !$acc end parallel - !$acc parallel vector_length(vl) num_gangs(1) copy(result) - !$acc loop reduction(min:result) + !$acc parallel num_workers(nw) copy(rw) + !$acc loop reduction(min:rw) worker do i = 1, n - result = min (result, array(i)) + rw = min (rw, array(i)) + end do + !$acc end parallel + + !$acc parallel vector_length(vl) copy(rv) + !$acc loop reduction(min:rv) vector + do i = 1, n + rv = min (rv, array(i)) + end do + !$acc end parallel + + !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc) + !$acc loop reduction(min:rc) gang worker vector + do i = 1, n + rc = min (rc, array(i)) end do !$acc end parallel @@ -88,17 +202,46 @@ program reduction_1 vresult = min (vresult, array(i)) end do - if (result.ne.vresult) call abort + if (rg .ne. vresult) call abort + if (rw .ne. vresult) call abort + if (rv .ne. vresult) call abort + if (rc .ne. vresult) call abort + + ! + ! 'iand' reductions + ! - result = 1 + rg = 1 + rw = 1 + rv = 1 + rc = 1 vresult = 1 - ! 'iand' reductions + !$acc parallel num_gangs(ng) copy(rg) + !$acc loop reduction(iand:rg) gang + do i = 1, n + rg = iand (rg, array(i)) + end do + !$acc end parallel + + !$acc parallel num_workers(nw) copy(rw) + !$acc loop reduction(iand:rw) worker + do i = 1, n + rw = iand (rw, array(i)) + end do + !$acc end parallel + + !$acc parallel vector_length(vl) copy(rv) + !$acc loop reduction(iand:rv) vector + do i = 1, n + rv = iand (rv, array(i)) + end do + !$acc end parallel - !$acc parallel vector_length(vl) num_gangs(1) copy(result) - !$acc loop reduction(iand:result) + !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc) + !$acc loop reduction(iand:rc) gang worker vector do i = 1, n - result = iand (result, array(i)) + rc = iand (rc, array(i)) end do !$acc end parallel @@ -107,17 +250,46 @@ program reduction_1 vresult = iand (vresult, array(i)) end do - if (result.ne.vresult) call abort - - result = 1 - vresult = 1 + if (rg .ne. vresult) call abort + if (rw .ne. vresult) call abort + if (rv .ne. vresult) call abort + if (rc .ne. vresult) call abort + ! ! 'ior' reductions + ! + + rg = 0 + rw = 0 + rv = 0 + rc = 0 + vresult = 0 + + !$acc parallel num_gangs(ng) copy(rg) + !$acc loop reduction(ior:rg) gang + do i = 1, n + rg = ior (rg, array(i)) + end do + !$acc end parallel - !$acc parallel vector_length(vl) num_gangs(1) copy(result) - !$acc loop reduction(ior:result) + !$acc parallel num_workers(nw) copy(rw) + !$acc loop reduction(ior:rw) worker do i = 1, n - result = ior (result, array(i)) + rw = ior (rw, array(i)) + end do + !$acc end parallel + + !$acc parallel vector_length(vl) copy(rv) + !$acc loop reduction(ior:rv) gang + do i = 1, n + rv = ior (rv, array(i)) + end do + !$acc end parallel + + !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc) + !$acc loop reduction(ior:rc) gang worker vector + do i = 1, n + rc = ior (rc, array(i)) end do !$acc end parallel @@ -126,17 +298,46 @@ program reduction_1 vresult = ior (vresult, array(i)) end do - if (result.ne.vresult) call abort + if (rg .ne. vresult) call abort + if (rw .ne. vresult) call abort + if (rv .ne. vresult) call abort + if (rc .ne. vresult) call abort - result = 0 + ! + ! 'ieor' reductions + ! + + rg = 0 + rw = 0 + rv = 0 + rc = 0 vresult = 0 - ! 'ieor' reductions + !$acc parallel num_gangs(ng) copy(rg) + !$acc loop reduction(ieor:rg) gang + do i = 1, n + rg = ieor (rg, array(i)) + end do + !$acc end parallel + + !$acc parallel num_workers(nw) copy(rw) + !$acc loop reduction(ieor:rw) worker + do i = 1, n + rw = ieor (rw, array(i)) + end do + !$acc end parallel - !$acc parallel vector_length(vl) num_gangs(1) copy(result) - !$acc loop reduction(ieor:result) + !$acc parallel vector_length(vl) copy(rv) + !$acc loop reduction(ieor:rv) vector do i = 1, n - result = ieor (result, array(i)) + rv = ieor (rv, array(i)) + end do + !$acc end parallel + + !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc) + !$acc loop reduction(ieor:rc) gang worker vector + do i = 1, n + rc = ieor (rc, array(i)) end do !$acc end parallel @@ -145,17 +346,46 @@ program reduction_1 vresult = ieor (vresult, array(i)) end do - if (result.ne.vresult) call abort - - lresult = .false. - lvresult = .false. + if (rg .ne. vresult) call abort + if (rw .ne. vresult) call abort + if (rv .ne. vresult) call abort + if (rc .ne. vresult) call abort + ! ! '.and.' reductions + ! + + lrg = .true. + lrw = .true. + lrv = .true. + lrc = .true. + lvresult = .true. + + !$acc parallel num_gangs(ng) copy(lrg) + !$acc loop reduction(.and.:lrg) gang + do i = 1, n + lrg = lrg .and. (array(i) .ge. 5) + end do + !$acc end parallel + + !$acc parallel num_workers(nw) copy(lrw) + !$acc loop reduction(.and.:lrw) worker + do i = 1, n + lrw = lrw .and. (array(i) .ge. 5) + end do + !$acc end parallel + + !$acc parallel vector_length(vl) copy(lrv) + !$acc loop reduction(.and.:lrv) vector + do i = 1, n + lrv = lrv .and. (array(i) .ge. 5) + end do + !$acc end parallel - !$acc parallel vector_length(vl) num_gangs(1) copy(lresult) - !$acc loop reduction(.and.:lresult) + !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(lrc) + !$acc loop reduction(.and.:lrc) gang worker vector do i = 1, n - lresult = lresult .and. (array(i) .ge. 5) + lrc = lrc .and. (array(i) .ge. 5) end do !$acc end parallel @@ -164,17 +394,46 @@ program reduction_1 lvresult = lvresult .and. (array(i) .ge. 5) end do - if (result.ne.vresult) call abort + if (lrg .neqv. lvresult) call abort + if (lrw .neqv. lvresult) call abort + if (lrv .neqv. lvresult) call abort + if (lrc .neqv. lvresult) call abort + + ! + ! '.or.' reductions + ! - lresult = .false. + lrg = .true. + lrw = .true. + lrv = .true. + lrc = .true. lvresult = .false. - ! '.or.' reductions + !$acc parallel num_gangs(ng) copy(lrg) + !$acc loop reduction(.or.:lrg) gang + do i = 1, n + lrg = lrg .or. (array(i) .ge. 5) + end do + !$acc end parallel + + !$acc parallel num_workers(nw) copy(lrw) + !$acc loop reduction(.or.:lrw) worker + do i = 1, n + lrw = lrw .or. (array(i) .ge. 5) + end do + !$acc end parallel + + !$acc parallel vector_length(vl) copy(lrv) + !$acc loop reduction(.or.:lrv) vector + do i = 1, n + lrv = lrv .or. (array(i) .ge. 5) + end do + !$acc end parallel - !$acc parallel vector_length(vl) num_gangs(1) copy(lresult) - !$acc loop reduction(.or.:lresult) + !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(lrc) + !$acc loop reduction(.or.:lrc) gang worker vector do i = 1, n - lresult = lresult .or. (array(i) .ge. 5) + lrc = lrc .or. (array(i) .ge. 5) end do !$acc end parallel @@ -183,17 +442,46 @@ program reduction_1 lvresult = lvresult .or. (array(i) .ge. 5) end do - if (result.ne.vresult) call abort - - lresult = .false. - lvresult = .false. + if (lrg .neqv. lvresult) call abort + if (lrw .neqv. lvresult) call abort + if (lrv .neqv. lvresult) call abort + if (lrc .neqv. lvresult) call abort + ! ! '.eqv.' reductions + ! + + lrg = .true. + lrw = .true. + lrv = .true. + lrc = .true. + lvresult = .true. + + !$acc parallel num_gangs(ng) copy(lrg) + !$acc loop reduction(.eqv.:lrg) gang + do i = 1, n + lrg = lrg .eqv. (array(i) .ge. 5) + end do + !$acc end parallel + + !$acc parallel num_workers(nw) copy(lrw) + !$acc loop reduction(.eqv.:lrw) worker + do i = 1, n + lrw = lrw .eqv. (array(i) .ge. 5) + end do + !$acc end parallel + + !$acc parallel vector_length(vl) copy(lrv) + !$acc loop reduction(.eqv.:lrv) vector + do i = 1, n + lrv = lrv .eqv. (array(i) .ge. 5) + end do + !$acc end parallel - !$acc parallel vector_length(vl) num_gangs(1) copy(lresult) - !$acc loop reduction(.eqv.:lresult) + !$acc parallel num_workers(nw) vector_length(vl) copy(lrc) + !$acc loop reduction(.eqv.:lrc) gang worker vector do i = 1, n - lresult = lresult .eqv. (array(i) .ge. 5) + lrc = lrc .eqv. (array(i) .ge. 5) end do !$acc end parallel @@ -202,17 +490,46 @@ program reduction_1 lvresult = lvresult .eqv. (array(i) .ge. 5) end do - if (result.ne.vresult) call abort - - lresult = .false. - lvresult = .false. + if (lrg .neqv. lvresult) call abort + if (lrw .neqv. lvresult) call abort + if (lrv .neqv. lvresult) call abort + if (lrc .neqv. lvresult) call abort + ! ! '.neqv.' reductions + ! + + lrg = .true. + lrw = .true. + lrv = .true. + lrc = .true. + lvresult = .true. + + !$acc parallel num_gangs(ng) copy(lrg) + !$acc loop reduction(.neqv.:lrg) gang + do i = 1, n + lrg = lrg .neqv. (array(i) .ge. 5) + end do + !$acc end parallel + + !$acc parallel num_workers(nw) copy(lrw) + !$acc loop reduction(.neqv.:lrw) worker + do i = 1, n + lrw = lrw .neqv. (array(i) .ge. 5) + end do + !$acc end parallel + + !$acc parallel vector_length(vl) copy(lrv) + !$acc loop reduction(.neqv.:lrv) vector + do i = 1, n + lrv = lrv .neqv. (array(i) .ge. 5) + end do + !$acc end parallel - !$acc parallel vector_length(vl) num_gangs(1) copy(lresult) - !$acc loop reduction(.neqv.:lresult) + !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(lrc) + !$acc loop reduction(.neqv.:lrc) gang worker vector do i = 1, n - lresult = lresult .neqv. (array(i) .ge. 5) + lrc = lrc .neqv. (array(i) .ge. 5) end do !$acc end parallel @@ -221,5 +538,8 @@ program reduction_1 lvresult = lvresult .neqv. (array(i) .ge. 5) end do - if (result.ne.vresult) call abort + if (lrg .neqv. lvresult) call abort + if (lrw .neqv. lvresult) call abort + if (lrv .neqv. lvresult) call abort + if (lrc .neqv. lvresult) call abort end program reduction_1 diff --git a/libgomp/testsuite/libgomp.oacc-fortran/reduction-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/reduction-2.f90 index 96955ce..b828feb 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/reduction-2.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/reduction-2.f90 @@ -5,26 +5,52 @@ program reduction_2 implicit none - integer, parameter :: n = 10, vl = 32 + integer, parameter :: n = 10, ng = 8, nw = 4, vl = 32 integer :: i - real, parameter :: e = .001 - real :: vresult, result - logical :: lresult, lvresult - real, dimension (n) :: array + real :: vresult, rg, rw, rv, rc + real, parameter :: e = 0.001 + logical :: lrg, lrw, lrv, lrc, lvresult + real, dimension (n) :: array do i = 1, n array(i) = i end do - result = 0 + ! + ! '+' reductions + ! + + rg = 0 + rw = 0 + rv = 0 + rc = 0 vresult = 0 - ! '+' reductions + !$acc parallel num_gangs(ng) copy(rg) + !$acc loop reduction(+:rg) gang + do i = 1, n + rg = rg + array(i) + end do + !$acc end parallel - !$acc parallel vector_length(vl) num_gangs(1) copy(result) - !$acc loop reduction(+:result) + !$acc parallel num_workers(nw) copy(rw) + !$acc loop reduction(+:rw) worker do i = 1, n - result = result + array(i) + rw = rw + array(i) + end do + !$acc end parallel + + !$acc parallel vector_length(vl) copy(rv) + !$acc loop reduction(+:rv) vector + do i = 1, n + rv = rv + array(i) + end do + !$acc end parallel + + !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc) + !$acc loop reduction(+:rc) gang worker vector + do i = 1, n + rc = rc + array(i) end do !$acc end parallel @@ -33,17 +59,46 @@ program reduction_2 vresult = vresult + array(i) end do - if (abs (result - vresult) .ge. e) call abort + if (rg .ne. vresult) call abort + if (rw .ne. vresult) call abort + if (rv .ne. vresult) call abort + if (rc .ne. vresult) call abort + + ! + ! '*' reductions + ! - result = 1 + rg = 1 + rw = 1 + rv = 1 + rc = 1 vresult = 1 - ! '*' reductions + !$acc parallel num_gangs(ng) copy(rg) + !$acc loop reduction(*:rg) gang + do i = 1, n + rg = rg * array(i) + end do + !$acc end parallel - !$acc parallel vector_length(vl) num_gangs(1) copy(result) - !$acc loop reduction(*:result) + !$acc parallel num_workers(nw) copy(rw) + !$acc loop reduction(*:rw) worker do i = 1, n - result = result * array(i) + rw = rw * array(i) + end do + !$acc end parallel + + !$acc parallel vector_length(vl) copy(rv) + !$acc loop reduction(*:rv) vector + do i = 1, n + rv = rv * array(i) + end do + !$acc end parallel + + !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc) + !$acc loop reduction(*:rc) gang worker vector + do i = 1, n + rc = rc * array(i) end do !$acc end parallel @@ -52,17 +107,46 @@ program reduction_2 vresult = vresult * array(i) end do - if (result.ne.vresult) call abort + if (abs (rg - vresult) .ge. e) call abort + if (abs (rw - vresult) .ge. e) call abort + if (abs (rv - vresult) .ge. e) call abort + if (abs (rc - vresult) .ge. e) call abort + + ! + ! 'max' reductions + ! - result = 0 + rg = 0 + rw = 0 + rg = 0 + rc = 0 vresult = 0 - ! 'max' reductions + !$acc parallel num_gangs(ng) copy(rg) + !$acc loop reduction(max:rg) gang + do i = 1, n + rg = max (rg, array(i)) + end do + !$acc end parallel + + !$acc parallel num_workers(nw) copy(rw) + !$acc loop reduction(max:rw) worker + do i = 1, n + rw = max (rw, array(i)) + end do + !$acc end parallel - !$acc parallel vector_length(vl) num_gangs(1) copy(result) - !$acc loop reduction(max:result) + !$acc parallel vector_length(vl) copy(rv) + !$acc loop reduction(max:rv) vector do i = 1, n - result = max (result, array(i)) + rv = max (rv, array(i)) + end do + !$acc end parallel + + !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc) + !$acc loop reduction(max:rc) gang worker vector + do i = 1, n + rc = max (rc, array(i)) end do !$acc end parallel @@ -71,17 +155,46 @@ program reduction_2 vresult = max (vresult, array(i)) end do - if (result.ne.vresult) call abort - - result = 1 - vresult = 1 + if (abs (rg - vresult) .ge. e) call abort + if (abs (rw - vresult) .ge. e) call abort + if (abs (rg - vresult) .ge. e) call abort + if (abs (rc - vresult) .ge. e) call abort + ! ! 'min' reductions + ! - !$acc parallel vector_length(vl) num_gangs(1) copy(result) - !$acc loop reduction(min:result) + rg = 0 + rw = 0 + rv = 0 + rc = 0 + vresult = 0 + + !$acc parallel num_gangs(ng) copy(rg) + !$acc loop reduction(min:rg) gang + do i = 1, n + rg = min (rg, array(i)) + end do + !$acc end parallel + + !$acc parallel num_workers(nw) copy(rw) + !$acc loop reduction(min:rw) worker + do i = 1, n + rw = min (rw, array(i)) + end do + !$acc end parallel + + !$acc parallel vector_length(vl) copy(rv) + !$acc loop reduction(min:rv) vector do i = 1, n - result = min (result, array(i)) + rv = min (rv, array(i)) + end do + !$acc end parallel + + !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc) + !$acc loop reduction(min:rc) gang worker vector + do i = 1, n + rc = min (rc, array(i)) end do !$acc end parallel @@ -90,17 +203,46 @@ program reduction_2 vresult = min (vresult, array(i)) end do - if (result.ne.vresult) call abort + if (rg .ne. vresult) call abort + if (rv .ne. vresult) call abort + if (rw .ne. vresult) call abort + if (rc .ne. vresult) call abort + + ! + ! '.and.' reductions + ! - lresult = .true. + lrg = .true. + lrw = .true. + lrv = .true. + lrc = .true. lvresult = .true. - ! '.and.' reductions + !$acc parallel num_gangs(ng) copy(lrg) + !$acc loop reduction(.and.:lrg) gang + do i = 1, n + lrg = lrg .and. (array(i) .ge. 5) + end do + !$acc end parallel - !$acc parallel vector_length(vl) num_gangs(1) copy(lresult) - !$acc loop reduction(.and.:lresult) + !$acc parallel num_workers(nw) copy(lrw) + !$acc loop reduction(.and.:lrw) worker do i = 1, n - lresult = lresult .and. (array(i) .ge. 5) + lrw = lrw .and. (array(i) .ge. 5) + end do + !$acc end parallel + + !$acc parallel vector_length(vl) copy(lrv) + !$acc loop reduction(.and.:lrv) vector + do i = 1, n + lrv = lrv .and. (array(i) .ge. 5) + end do + !$acc end parallel + + !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(lrc) + !$acc loop reduction(.and.:lrc) gang worker vector + do i = 1, n + lrc = lrc .and. (array(i) .ge. 5) end do !$acc end parallel @@ -109,17 +251,46 @@ program reduction_2 lvresult = lvresult .and. (array(i) .ge. 5) end do - if (result.ne.vresult) call abort + if (lrg .neqv. lvresult) call abort + if (lrw .neqv. lvresult) call abort + if (lrv .neqv. lvresult) call abort + if (lrc .neqv. lvresult) call abort + + ! + ! '.or.' reductions + ! - lresult = .false. + lrg = .false. + lrw = .false. + lrv = .false. + lrc = .false. lvresult = .false. - ! '.or.' reductions + !$acc parallel num_gangs(ng) copy(lrg) + !$acc loop reduction(.or.:lrg) gang + do i = 1, n + lrg = lrg .or. (array(i) .ge. 5) + end do + !$acc end parallel + + !$acc parallel num_workers(nw) copy(lrw) + !$acc loop reduction(.or.:lrw) worker + do i = 1, n + lrw = lrw .or. (array(i) .ge. 5) + end do + !$acc end parallel - !$acc parallel vector_length(vl) num_gangs(1) copy(lresult) - !$acc loop reduction(.or.:lresult) + !$acc parallel vector_length(vl) copy(lrv) + !$acc loop reduction(.or.:lrv) vector do i = 1, n - lresult = lresult .or. (array(i) .ge. 5) + lrv = lrv .or. (array(i) .ge. 5) + end do + !$acc end parallel + + !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(lrc) + !$acc loop reduction(.or.:lrc) gang worker vector + do i = 1, n + lrc = lrc .or. (array(i) .ge. 5) end do !$acc end parallel @@ -128,17 +299,46 @@ program reduction_2 lvresult = lvresult .or. (array(i) .ge. 5) end do - if (result.ne.vresult) call abort - - lresult = .false. - lvresult = .false. + if (lrg .neqv. lvresult) call abort + if (lrw .neqv. lvresult) call abort + if (lrv .neqv. lvresult) call abort + if (lrc .neqv. lvresult) call abort + ! ! '.eqv.' reductions + ! - !$acc parallel vector_length(vl) num_gangs(1) copy(lresult) - !$acc loop reduction(.eqv.:lresult) + lrg = .true. + lrw = .true. + lrv = .true. + lrc = .true. + lvresult = .true. + + !$acc parallel num_gangs(ng) copy(lrg) + !$acc loop reduction(.eqv.:lrg) gang + do i = 1, n + lrg = lrg .eqv. (array(i) .ge. 5) + end do + !$acc end parallel + + !$acc parallel num_workers(nw) copy(lrw) + !$acc loop reduction(.eqv.:lrw) worker do i = 1, n - lresult = lresult .eqv. (array(i) .ge. 5) + lrw = lrw .eqv. (array(i) .ge. 5) + end do + !$acc end parallel + + !$acc parallel vector_length(vl) copy(lrv) + !$acc loop reduction(.eqv.:lrv) vector + do i = 1, n + lrv = lrv .eqv. (array(i) .ge. 5) + end do + !$acc end parallel + + !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(lrc) + !$acc loop reduction(.eqv.:lrc) gang worker vector + do i = 1, n + lrc = lrc .eqv. (array(i) .ge. 5) end do !$acc end parallel @@ -147,17 +347,46 @@ program reduction_2 lvresult = lvresult .eqv. (array(i) .ge. 5) end do - if (result.ne.vresult) call abort - - lresult = .false. - lvresult = .false. + if (lrg .neqv. lvresult) call abort + if (lrw .neqv. lvresult) call abort + if (lrv .neqv. lvresult) call abort + if (lrc .neqv. lvresult) call abort + ! ! '.neqv.' reductions + ! + + lrg = .true. + lrw = .true. + lrv = .true. + lrc = .true. + lvresult = .true. + + !$acc parallel num_gangs(ng) copy(lrg) + !$acc loop reduction(.neqv.:lrg) gang + do i = 1, n + lrg = lrg .neqv. (array(i) .ge. 5) + end do + !$acc end parallel + + !$acc parallel num_workers(nw) copy(lrw) + !$acc loop reduction(.neqv.:lrw) worker + do i = 1, n + lrw = lrw .neqv. (array(i) .ge. 5) + end do + !$acc end parallel + + !$acc parallel vector_length(vl) copy(lrv) + !$acc loop reduction(.neqv.:lrv) vector + do i = 1, n + lrv = lrv .neqv. (array(i) .ge. 5) + end do + !$acc end parallel - !$acc parallel vector_length(vl) num_gangs(1) copy(lresult) - !$acc loop reduction(.neqv.:lresult) + !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(lrc) + !$acc loop reduction(.neqv.:lrc) gang worker vector do i = 1, n - lresult = lresult .neqv. (array(i) .ge. 5) + lrc = lrc .neqv. (array(i) .ge. 5) end do !$acc end parallel @@ -166,5 +395,8 @@ program reduction_2 lvresult = lvresult .neqv. (array(i) .ge. 5) end do - if (result.ne.vresult) call abort + if (lrg .neqv. lvresult) call abort + if (lrw .neqv. lvresult) call abort + if (lrv .neqv. lvresult) call abort + if (lrc .neqv. lvresult) call abort end program reduction_2 diff --git a/libgomp/testsuite/libgomp.oacc-fortran/reduction-3.f90 b/libgomp/testsuite/libgomp.oacc-fortran/reduction-3.f90 index ecf7fbe..3d8d753 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/reduction-3.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/reduction-3.f90 @@ -5,26 +5,52 @@ program reduction_3 implicit none - integer, parameter :: n = 10, vl = 32 + integer, parameter :: n = 10, ng = 8, nw = 4, vl = 32 integer :: i - double precision, parameter :: e = .001 - double precision :: vresult, result - logical :: lresult, lvresult + double precision :: vresult, rg, rw, rv, rc + double precision, parameter :: e = 0.001 + logical :: lrg, lrw, lrv, lrc, lvresult double precision, dimension (n) :: array do i = 1, n array(i) = i end do - result = 0 + ! + ! '+' reductions + ! + + rg = 0 + rw = 0 + rv = 0 + rc = 0 vresult = 0 - ! '+' reductions + !$acc parallel num_gangs(ng) copy(rg) + !$acc loop reduction(+:rg) gang + do i = 1, n + rg = rg + array(i) + end do + !$acc end parallel - !$acc parallel vector_length(vl) num_gangs(1) copy(result) - !$acc loop reduction(+:result) + !$acc parallel num_workers(nw) copy(rw) + !$acc loop reduction(+:rw) worker do i = 1, n - result = result + array(i) + rw = rw + array(i) + end do + !$acc end parallel + + !$acc parallel vector_length(vl) copy(rv) + !$acc loop reduction(+:rv) vector + do i = 1, n + rv = rv + array(i) + end do + !$acc end parallel + + !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc) + !$acc loop reduction(+:rc) gang worker vector + do i = 1, n + rc = rc + array(i) end do !$acc end parallel @@ -33,17 +59,46 @@ program reduction_3 vresult = vresult + array(i) end do - if (abs (result - vresult) .ge. e) call abort + if (abs (rg - vresult) .ge. e) call abort + if (abs (rw - vresult) .ge. e) call abort + if (abs (rv - vresult) .ge. e) call abort + if (abs (rc - vresult) .ge. e) call abort + + ! + ! '*' reductions + ! - result = 1 + rg = 1 + rw = 1 + rv = 1 + rc = 1 vresult = 1 - ! '*' reductions + !$acc parallel num_gangs(ng) copy(rg) + !$acc loop reduction(*:rg) gang + do i = 1, n + rg = rg * array(i) + end do + !$acc end parallel - !$acc parallel vector_length(vl) num_gangs(1) copy(result) - !$acc loop reduction(*:result) + !$acc parallel num_workers(nw) copy(rw) + !$acc loop reduction(*:rw) worker do i = 1, n - result = result * array(i) + rw = rw * array(i) + end do + !$acc end parallel + + !$acc parallel vector_length(vl) copy(rv) + !$acc loop reduction(*:rv) vector + do i = 1, n + rv = rv * array(i) + end do + !$acc end parallel + + !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc) + !$acc loop reduction(*:rc) gang worker vector + do i = 1, n + rc = rc * array(i) end do !$acc end parallel @@ -52,17 +107,46 @@ program reduction_3 vresult = vresult * array(i) end do - if (result.ne.vresult) call abort + if (abs (rg - vresult) .ge. e) call abort + if (abs (rw - vresult) .ge. e) call abort + if (abs (rv - vresult) .ge. e) call abort + if (abs (rc - vresult) .ge. e) call abort + + ! + ! 'max' reductions + ! - result = 0 + rg = 0 + rw = 0 + rv = 0 + rc = 0 vresult = 0 - ! 'max' reductions + !$acc parallel num_gangs(ng) copy(rg) + !$acc loop reduction(max:rg) gang + do i = 1, n + rg = max (rg, array(i)) + end do + !$acc end parallel + + !$acc parallel num_workers(nw) copy(rw) + !$acc loop reduction(max:rw) worker + do i = 1, n + rw = max (rw, array(i)) + end do + !$acc end parallel - !$acc parallel vector_length(vl) num_gangs(1) copy(result) - !$acc loop reduction(max:result) + !$acc parallel vector_length(vl) copy(rv) + !$acc loop reduction(max:rv) vector do i = 1, n - result = max (result, array(i)) + rv = max (rv, array(i)) + end do + !$acc end parallel + + !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc) + !$acc loop reduction(max:rc) gang worker vector + do i = 1, n + rc = max (rc, array(i)) end do !$acc end parallel @@ -71,17 +155,46 @@ program reduction_3 vresult = max (vresult, array(i)) end do - if (result.ne.vresult) call abort - - result = 1 - vresult = 1 + if (abs (rg - vresult) .ge. e) call abort + if (abs (rw - vresult) .ge. e) call abort + if (abs (rv - vresult) .ge. e) call abort + if (abs (rc - vresult) .ge. e) call abort + ! ! 'min' reductions + ! - !$acc parallel vector_length(vl) num_gangs(1) copy(result) - !$acc loop reduction(min:result) + rg = 0 + rw = 0 + rv = 0 + rc = 0 + vresult = 0 + + !$acc parallel num_gangs(ng) copy(rg) + !$acc loop reduction(min:rg) gang + do i = 1, n + rg = min (rg, array(i)) + end do + !$acc end parallel + + !$acc parallel num_workers(nw) copy(rw) + !$acc loop reduction(min:rw) worker + do i = 1, n + rw = min (rw, array(i)) + end do + !$acc end parallel + + !$acc parallel vector_length(vl) copy(rv) + !$acc loop reduction(min:rv) vector do i = 1, n - result = min (result, array(i)) + rv = min (rv, array(i)) + end do + !$acc end parallel + + !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc) + !$acc loop reduction(min:rc) gang worker vector + do i = 1, n + rc = min (rc, array(i)) end do !$acc end parallel @@ -90,17 +203,46 @@ program reduction_3 vresult = min (vresult, array(i)) end do - if (result.ne.vresult) call abort + if (rg .ne. vresult) call abort + if (rw .ne. vresult) call abort + if (rv .ne. vresult) call abort + if (rc .ne. vresult) call abort + + ! + ! '.and.' reductions + ! - lresult = .true. + lrg = .true. + lrw = .true. + lrv = .true. + lrc = .true. lvresult = .true. - ! '.and.' reductions + !$acc parallel num_gangs(ng) copy(lrg) + !$acc loop reduction(.and.:lrg) gang + do i = 1, n + lrg = lrg .and. (array(i) .ge. 5) + end do + !$acc end parallel - !$acc parallel vector_length(vl) num_gangs(1) copy(lresult) - !$acc loop reduction(.and.:lresult) + !$acc parallel num_workers(nw) copy(lrw) + !$acc loop reduction(.and.:lrw) worker do i = 1, n - lresult = lresult .and. (array(i) .ge. 5) + lrw = lrw .and. (array(i) .ge. 5) + end do + !$acc end parallel + + !$acc parallel vector_length(vl) copy(lrv) + !$acc loop reduction(.and.:lrv) vector + do i = 1, n + lrv = lrv .and. (array(i) .ge. 5) + end do + !$acc end parallel + + !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(lrc) + !$acc loop reduction(.and.:lrc) gang worker vector + do i = 1, n + lrc = lrc .and. (array(i) .ge. 5) end do !$acc end parallel @@ -109,17 +251,46 @@ program reduction_3 lvresult = lvresult .and. (array(i) .ge. 5) end do - if (result.ne.vresult) call abort + if (lrg .neqv. lvresult) call abort + if (lrw .neqv. lvresult) call abort + if (lrv .neqv. lvresult) call abort + if (lrc .neqv. lvresult) call abort + + ! + ! '.or.' reductions + ! - lresult = .false. + lrg = .false. + lrw = .false. + lrv = .false. + lrc = .false. lvresult = .false. - ! '.or.' reductions + !$acc parallel num_gangs(ng) copy(lrg) + !$acc loop reduction(.or.:lrg) gang + do i = 1, n + lrg = lrg .or. (array(i) .ge. 5) + end do + !$acc end parallel + + !$acc parallel num_workers(nw) copy(lrw) + !$acc loop reduction(.or.:lrw) worker + do i = 1, n + lrw = lrw .or. (array(i) .ge. 5) + end do + !$acc end parallel - !$acc parallel vector_length(vl) num_gangs(1) copy(lresult) - !$acc loop reduction(.or.:lresult) + !$acc parallel vector_length(vl) copy(lrv) + !$acc loop reduction(.or.:lrv) vector do i = 1, n - lresult = lresult .or. (array(i) .ge. 5) + lrv = lrv .or. (array(i) .ge. 5) + end do + !$acc end parallel + + !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(lrc) + !$acc loop reduction(.or.:lrc) gang worker vector + do i = 1, n + lrc = lrc .or. (array(i) .ge. 5) end do !$acc end parallel @@ -128,17 +299,46 @@ program reduction_3 lvresult = lvresult .or. (array(i) .ge. 5) end do - if (result.ne.vresult) call abort - - lresult = .false. - lvresult = .false. + if (lrg .neqv. lvresult) call abort + if (lrw .neqv. lvresult) call abort + if (lrv .neqv. lvresult) call abort + if (lrc .neqv. lvresult) call abort + ! ! '.eqv.' reductions + ! - !$acc parallel vector_length(vl) num_gangs(1) copy(lresult) - !$acc loop reduction(.eqv.:lresult) + lrg = .true. + lrw = .true. + lrv = .true. + lrc = .true. + lvresult = .true. + + !$acc parallel num_gangs(ng) copy(lrg) + !$acc loop reduction(.eqv.:lrg) gang + do i = 1, n + lrg = lrg .eqv. (array(i) .ge. 5) + end do + !$acc end parallel + + !$acc parallel num_workers(nw) copy(lrw) + !$acc loop reduction(.eqv.:lrw) worker do i = 1, n - lresult = lresult .eqv. (array(i) .ge. 5) + lrw = lrw .eqv. (array(i) .ge. 5) + end do + !$acc end parallel + + !$acc parallel vector_length(vl) copy(lrv) + !$acc loop reduction(.eqv.:lrv) vector + do i = 1, n + lrv = lrv .eqv. (array(i) .ge. 5) + end do + !$acc end parallel + + !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(lrc) + !$acc loop reduction(.eqv.:lrc) gang worker vector + do i = 1, n + lrc = lrc .eqv. (array(i) .ge. 5) end do !$acc end parallel @@ -147,17 +347,46 @@ program reduction_3 lvresult = lvresult .eqv. (array(i) .ge. 5) end do - if (result.ne.vresult) call abort - - lresult = .false. - lvresult = .false. + if (lrg .neqv. lvresult) call abort + if (lrw .neqv. lvresult) call abort + if (lrv .neqv. lvresult) call abort + if (lrc .neqv. lvresult) call abort + ! ! '.neqv.' reductions + ! + + lrg = .true. + lrw = .true. + lrv = .true. + lrc = .true. + lvresult = .true. + + !$acc parallel num_gangs(ng) copy(lrg) + !$acc loop reduction(.neqv.:lrg) gang + do i = 1, n + lrg = lrg .neqv. (array(i) .ge. 5) + end do + !$acc end parallel + + !$acc parallel num_workers(nw) copy(lrw) + !$acc loop reduction(.neqv.:lrw) worker + do i = 1, n + lrw = lrw .neqv. (array(i) .ge. 5) + end do + !$acc end parallel + + !$acc parallel vector_length(vl) copy(lrv) + !$acc loop reduction(.neqv.:lrv) vector + do i = 1, n + lrv = lrv .neqv. (array(i) .ge. 5) + end do + !$acc end parallel - !$acc parallel vector_length(vl) num_gangs(1) copy(lresult) - !$acc loop reduction(.neqv.:lresult) + !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(lrc) + !$acc loop reduction(.neqv.:lrc) gang worker vector do i = 1, n - lresult = lresult .neqv. (array(i) .ge. 5) + lrc = lrc .neqv. (array(i) .ge. 5) end do !$acc end parallel @@ -166,5 +395,8 @@ program reduction_3 lvresult = lvresult .neqv. (array(i) .ge. 5) end do - if (result.ne.vresult) call abort + if (lrg .neqv. lvresult) call abort + if (lrw .neqv. lvresult) call abort + if (lrv .neqv. lvresult) call abort + if (lrc .neqv. lvresult) call abort end program reduction_3 diff --git a/libgomp/testsuite/libgomp.oacc-fortran/reduction-4.f90 b/libgomp/testsuite/libgomp.oacc-fortran/reduction-4.f90 index 8c99fdb..c3bdaf6 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/reduction-4.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/reduction-4.f90 @@ -5,50 +5,108 @@ program reduction_4 implicit none - integer, parameter :: n = 10, vl = 32 + integer, parameter :: n = 10, ng = 8, nw = 4, vl = 32 integer :: i - complex :: vresult, result + real :: vresult, rg, rw, rv, rc complex, dimension (n) :: array do i = 1, n array(i) = i end do - result = 0 + ! + ! '+' reductions + ! + + rg = 0 + rw = 0 + rv = 0 + rc = 0 vresult = 0 - ! '+' reductions + !$acc parallel num_gangs(ng) copy(rg) + !$acc loop reduction(+:rg) gang + do i = 1, n + rg = rg + REAL(array(i)) + end do + !$acc end parallel + + !$acc parallel num_workers(nw) copy(rw) + !$acc loop reduction(+:rw) worker + do i = 1, n + rw = rw + REAL(array(i)) + end do + !$acc end parallel + + !$acc parallel vector_length(vl) copy(rv) + !$acc loop reduction(+:rv) vector + do i = 1, n + rv = rv + REAL(array(i)) + end do + !$acc end parallel - !$acc parallel vector_length(vl) num_gangs(1) copy(result) - !$acc loop reduction(+:result) + !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc) + !$acc loop reduction(+:rc) gang worker vector do i = 1, n - result = result + array(i) + rc = rc + REAL(array(i)) end do !$acc end parallel ! Verify the results do i = 1, n - vresult = vresult + array(i) + vresult = vresult + REAL(array(i)) end do - if (result .ne. vresult) call abort + if (rg .ne. vresult) call abort + if (rw .ne. vresult) call abort + if (rv .ne. vresult) call abort + if (rc .ne. vresult) call abort - result = 1 + ! + ! '*' reductions + ! + + rg = 1 + rw = 1 + rv = 1 + rc = 1 vresult = 1 -! ! '*' reductions -! -! !$acc parallel vector_length(vl) -! !$acc loop reduction(*:result) -! do i = 1, n -! result = result * array(i) -! end do -! !$acc end parallel -! -! ! Verify the results -! do i = 1, n -! vresult = vresult * array(i) -! end do -! -! if (result.ne.vresult) call abort + !$acc parallel num_gangs(ng) copy(rg) + !$acc loop reduction(*:rg) gang + do i = 1, n + rg = rg * REAL(array(i)) + end do + !$acc end parallel + + !$acc parallel num_workers(nw) copy(rw) + !$acc loop reduction(*:rw) worker + do i = 1, n + rw = rw * REAL(array(i)) + end do + !$acc end parallel + + !$acc parallel vector_length(vl) copy(rv) + !$acc loop reduction(*:rv) vector + do i = 1, n + rv = rv * REAL(array(i)) + end do + !$acc end parallel + + !$acc parallel num_gangs(ng) num_workers(nw) vector_length(vl) copy(rc) + !$acc loop reduction(*:rc) gang worker vector + do i = 1, n + rc = rc * REAL(array(i)) + end do + !$acc end parallel + + ! Verify the results + do i = 1, n + vresult = vresult * REAL(array(i)) + end do + + if (rg .ne. vresult) call abort + if (rw .ne. vresult) call abort + if (rv .ne. vresult) call abort + if (rc .ne. vresult) call abort end program reduction_4 diff --git a/libgomp/testsuite/libgomp.oacc-fortran/reduction-5.f90 b/libgomp/testsuite/libgomp.oacc-fortran/reduction-5.f90 index ec13e4e..4210648 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/reduction-5.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/reduction-5.f90 @@ -1,12 +1,17 @@ ! { dg-do run } +! { dg-additional-options "-w" } ! subroutine reduction program reduction integer, parameter :: n = 40, c = 10 - integer :: i, vsum, sum + integer :: i, vsum, gs, ws, vs, cs, ns - call redsub (sum, n, c) + call redsub_gang (gs, n, c) + call redsub_worker (ws, n, c) + call redsub_vector (vs, n, c) + call redsub_combined (cs, n, c) + call redsub_nested (ns, n, c) vsum = 0 @@ -15,21 +20,80 @@ program reduction vsum = vsum + c end do - if (sum.ne.vsum) call abort () + if (gs .ne. vsum) call abort () + if (ws .ne. vsum) call abort () + if (vs .ne. vsum) call abort () + if (cs .ne. vsum) call abort () + if (ns .ne. vsum) call abort () end program reduction -subroutine redsub(sum, n, c) +subroutine redsub_gang(sum, n, c) integer :: sum, n, c - integer :: s - s = 0 + sum = 0 - !$acc parallel vector_length(32) copyin (n, c) copy (s) num_gangs(1) - !$acc loop reduction(+:s) + !$acc parallel copyin (n, c) num_gangs(n) copy(sum) + !$acc loop reduction(+:sum) gang do i = 1, n - s = s + c + sum = sum + c end do !$acc end parallel +end subroutine redsub_gang - sum = s -end subroutine redsub +subroutine redsub_worker(sum, n, c) + integer :: sum, n, c + + sum = 0 + + !$acc parallel copyin (n, c) num_workers(4) vector_length (32) copy(sum) + !$acc loop reduction(+:sum) worker + do i = 1, n + sum = sum + c + end do + !$acc end parallel +end subroutine redsub_worker + +subroutine redsub_vector(sum, n, c) + integer :: sum, n, c + + sum = 0 + + !$acc parallel copyin (n, c) vector_length(32) copy(sum) + !$acc loop reduction(+:sum) vector + do i = 1, n + sum = sum + c + end do + !$acc end parallel +end subroutine redsub_vector + +subroutine redsub_combined(sum, n, c) + integer :: sum, n, c + + sum = 0 + + !$acc parallel num_gangs (8) num_workers (4) vector_length(32) copy(sum) + !$acc loop reduction(+:sum) gang worker vector + do i = 1, n + sum = sum + c + end do + !$acc end parallel +end subroutine redsub_combined + +subroutine redsub_nested(sum, n, c) + integer :: sum, n, c + integer :: ii, jj + + ii = n / 10; + jj = 10; + sum = 0 + + !$acc parallel num_gangs (8) copy(sum) + !$acc loop reduction(+:sum) gang + do i = 1, ii + !$acc loop reduction(+:sum) vector + do j = 1, jj + sum = sum + c + end do + end do + !$acc end parallel +end subroutine redsub_nested diff --git a/libgomp/testsuite/libgomp.oacc-fortran/reduction-6.f90 b/libgomp/testsuite/libgomp.oacc-fortran/reduction-6.f90 index 2ff6f5f..f3ed275 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/reduction-6.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/reduction-6.f90 @@ -1,30 +1,94 @@ ! { dg-do run } +! { dg-additional-options "-cpp -w" } program reduction implicit none - integer, parameter :: n = 100 - integer :: i, s1, s2, vs1, vs2 + integer, parameter :: n = 100, n2 = 1000, chunksize = 10 + integer :: i, gs1, gs2, ws1, ws2, vs1, vs2, cs1, cs2, hs1, hs2 + integer :: j, red, vred - s1 = 0 - s2 = 0 + gs1 = 0 + gs2 = 0 + ws1 = 0 + ws2 = 0 vs1 = 0 vs2 = 0 + cs1 = 0 + cs2 = 0 + hs1 = 0 + hs2 = 0 - !$acc parallel vector_length (32) copy(s1, s2) - !$acc loop reduction(+:s1, s2) + !$acc parallel num_gangs (1000) copy(gs1, gs2) + !$acc loop reduction(+:gs1, gs2) gang do i = 1, n - s1 = s1 + 1 - s2 = s2 + 2 + gs1 = gs1 + 1 + gs2 = gs2 + 2 end do !$acc end parallel - ! Verify the results + !$acc parallel num_workers (4) vector_length (32) copy(ws1, ws2) + !$acc loop reduction(+:ws1, ws2) worker + do i = 1, n + ws1 = ws1 + 1 + ws2 = ws2 + 2 + end do + !$acc end parallel + + !$acc parallel vector_length (32) copy(vs1, vs2) + !$acc loop reduction(+:vs1, vs2) vector do i = 1, n vs1 = vs1 + 1 vs2 = vs2 + 2 end do + !$acc end parallel + + !$acc parallel num_gangs(8) num_workers(4) vector_length(32) copy(cs1, cs2) + !$acc loop reduction(+:cs1, cs2) gang worker vector + do i = 1, n + cs1 = cs1 + 1 + cs2 = cs2 + 2 + end do + !$acc end parallel + + ! Verify the results on the host + do i = 1, n + hs1 = hs1 + 1 + hs2 = hs2 + 2 + end do + + if (gs1 .ne. hs1) call abort () + if (gs2 .ne. hs2) call abort () + + if (ws1 .ne. hs1) call abort () + if (ws2 .ne. hs2) call abort () + + if (vs1 .ne. hs1) call abort () + if (vs2 .ne. hs2) call abort () + + if (cs1 .ne. hs1) call abort () + if (cs2 .ne. hs2) call abort () + + ! Nested reductions. + + red = 0 + vred = 0 + + !$acc parallel num_gangs(10) vector_length(32) copy(red) + !$acc loop reduction(+:red) gang + do i = 1, n/chunksize + !$acc loop reduction(+:red) vector + do j = 1, chunksize + red = red + chunksize + end do + end do + !$acc end parallel + + do i = 1, n/chunksize + do j = 1, chunksize + vred = vred + chunksize + end do + end do - if (s1.ne.vs1) call abort () - if (s2.ne.vs2) call abort () + if (red .ne. vred) call abort () end program reduction diff --git a/libgomp/testsuite/libgomp.oacc-fortran/reduction-7.f90 b/libgomp/testsuite/libgomp.oacc-fortran/reduction-7.f90 new file mode 100644 index 0000000..8ec36ad --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/reduction-7.f90 @@ -0,0 +1,88 @@ +! { dg-do run } +! { dg-additional-options "-w" } + +! subroutine reduction with private and firstprivate variables + +program reduction + integer, parameter :: n = 100 + integer :: i, j, vsum, cs, arr(n) + + call redsub_private (cs, n, arr) + call redsub_bogus (cs, n) + call redsub_combined (cs, n, arr) + + vsum = 0 + + ! Verify the results + do i = 1, n + vsum = i + do j = 1, n + vsum = vsum + 1; + end do + if (vsum .ne. arr(i)) call abort () + end do +end program reduction + +! This subroutine tests a reduction with an explicit private variable. + +subroutine redsub_private(sum, n, arr) + integer :: sum, n, arr(n) + integer :: i, j, v + + !$acc parallel copyout (arr) + !$acc loop gang private (v) + do j = 1, n + v = j + + !$acc loop vector reduction (+:v) + do i = 1, 100 + v = v + 1 + end do + + arr(j) = v + end do + !$acc end parallel + + ! verify the results + do i = 1, 10 + if (arr(i) .ne. 100+i) call abort () + end do +end subroutine redsub_private + + +! Bogus reduction on an impliclitly firstprivate variable. The results do +! survive the parallel region. The goal here is to ensure that gfortran +! doesn't ICE. + +subroutine redsub_bogus(sum, n) + integer :: sum, n, arr(n) + integer :: i + + !$acc parallel + !$acc loop gang worker vector reduction (+:sum) + do i = 1, n + sum = sum + 1 + end do + !$acc end parallel +end subroutine redsub_bogus + +! This reduction involving a firstprivate variable yields legitimate results. + +subroutine redsub_combined(sum, n, arr) + integer :: sum, n, arr(n) + integer :: i, j + + !$acc parallel copy (arr) + !$acc loop gang + do i = 1, n + sum = i; + + !$acc loop reduction(+:sum) + do j = 1, n + sum = sum + 1 + end do + + arr(i) = sum + end do + !$acc end parallel +end subroutine redsub_combined |