diff options
author | Cesar Philippidis <cesar@codesourcery.com> | 2018-06-22 03:04:14 -0700 |
---|---|---|
committer | Thomas Schwinge <tschwinge@gcc.gnu.org> | 2018-06-22 12:04:14 +0200 |
commit | 31dd69b7ff60979b615e45229f759613873989e6 (patch) | |
tree | d3f7ad7604a2aa1dc7c92c08dba3aa31c5e791e7 /gcc | |
parent | ebbb116851bd0b43e6c86cd719b7a70684991d66 (diff) | |
download | gcc-31dd69b7ff60979b615e45229f759613873989e6.zip gcc-31dd69b7ff60979b615e45229f759613873989e6.tar.gz gcc-31dd69b7ff60979b615e45229f759613873989e6.tar.bz2 |
Update OpenACC testcases
gcc/testsuite/
* c-c++-common/goacc/deviceptr-4.c: New file.
* c-c++-common/goacc/kernels-counter-var-redundant-load.c:
Likewise.
* c-c++-common/goacc/kernels-loop-data-2.c: Likewise.
* c-c++-common/goacc/kernels-loop-data-enter-exit-2.c: Likewise.
* c-c++-common/goacc/kernels-loop-data-enter-exit.c: Likewise.
* c-c++-common/goacc/kernels-loop-data-update.c: Likewise.
* c-c++-common/goacc/kernels-loop-data.c: Likewise.
* c-c++-common/goacc/kernels-parallel-loop-data-enter-exit.c:
Likewise.
* c-c++-common/goacc/parallel-reduction.c: Likewise.
* c-c++-common/goacc/private-reduction-1.c: Likewise.
* gfortran.dg/goacc/kernels-parallel-loop-data-enter-exit.f95:
Likewise.
* gfortran.dg/goacc/modules.f95: Likewise.
* gfortran.dg/goacc/routine-8.f90: Likewise.
* gfortran.dg/goacc/routine-level-of-parallelism-1.f90: Likewise.
libgomp/
* testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c: Don't force "-O2".
* testsuite/libgomp.oacc-c-c++-common/data-2.c: Update.
* testsuite/libgomp.oacc-c-c++-common/host_data-1.c: Likewise.
* testsuite/libgomp.oacc-c-c++-common/mode-transitions.c: Likewise.
* testsuite/libgomp.oacc-fortran/data-1.f90: Likewise.
* testsuite/libgomp.oacc-fortran/data-2.f90: Likewise.
* testsuite/libgomp.oacc-c++/non-scalar-data.C: New file.
* testsuite/libgomp.oacc-c-c++-common/declare-3.c: Likewise.
* testsuite/libgomp.oacc-c-c++-common/enter-data.c: Likewise.
* testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-2.c:
Likewise.
* testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-enter-exit-2.c:
Likewise.
* testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-enter-exit.c:
Likewise.
* testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-update.c:
Likewise.
* testsuite/libgomp.oacc-c-c++-common/kernels-loop-data.c:
Likewise.
* testsuite/libgomp.oacc-c-c++-common/kernels-parallel-loop-data-enter-exit.c:
Likewise.
* testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-local-worker-1.c:
Likewise.
* testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-local-worker-2.c:
Likewise.
* testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-local-worker-3.c:
Likewise.
* testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-local-worker-4.c:
Likewise.
* testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-local-worker-5.c:
Likewise.
* testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-1.c:
Likewise.
* testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-2.c:
Likewise.
* testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-3.c:
Likewise.
* testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-4.c:
Likewise.
* testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-5.c:
Likewise.
* testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-6.c:
Likewise.
* testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-vector-1.c:
Likewise.
* testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-vector-2.c:
Likewise.
* testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-1.c:
Likewise.
* testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-2.c:
Likewise.
* testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-3.c:
Likewise.
* testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-4.c:
Likewise.
* testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-5.c:
Likewise.
* testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-6.c:
Likewise.
* testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-7.c:
Likewise.
* testsuite/libgomp.oacc-c-c++-common/kernels-reduction-1.c:
Likewise.
* testsuite/libgomp.oacc-c-c++-common/parallel-loop-1.c: Likewise.
* testsuite/libgomp.oacc-c-c++-common/parallel-loop-1.h: Likewise.
* testsuite/libgomp.oacc-c-c++-common/parallel-loop-2.h: Likewise.
* testsuite/libgomp.oacc-fortran/cublas-fixed.h: Likewise.
* testsuite/libgomp.oacc-fortran/dummy-array.f90: Likewise.
* testsuite/libgomp.oacc-fortran/host_data-2.f90: Likewise.
* testsuite/libgomp.oacc-fortran/host_data-3.f: Likewise.
* testsuite/libgomp.oacc-fortran/host_data-4.f90: Likewise.
* testsuite/libgomp.oacc-fortran/kernels-acc-loop-reduction-2.f90:
Likewise.
* testsuite/libgomp.oacc-fortran/kernels-acc-loop-reduction.f90:
Likewise.
* testsuite/libgomp.oacc-fortran/kernels-collapse-3.f90: Likewise.
* testsuite/libgomp.oacc-fortran/kernels-collapse-4.f90: Likewise.
* testsuite/libgomp.oacc-fortran/kernels-independent.f90:
Likewise.
* testsuite/libgomp.oacc-fortran/kernels-loop-1.f90: Likewise.
* testsuite/libgomp.oacc-fortran/kernels-map-1.f90: Likewise.
* testsuite/libgomp.oacc-fortran/kernels-parallel-loop-data-enter-exit.f95:
Likewise.
* testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-gang-1.f90:
Likewise.
* testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-gang-2.f90:
Likewise.
* testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-gang-3.f90:
Likewise.
* testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-gang-6.f90:
Likewise.
* testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-vector-1.f90:
Likewise.
* testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-vector-2.f90:
Likewise.
* testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-1.f90:
Likewise.
* testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-2.f90:
Likewise.
* testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-3.f90:
Likewise.
* testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-4.f90:
Likewise.
* testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-5.f90:
Likewise.
* testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-6.f90:
Likewise.
* testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-7.f90:
Likewise.
* testsuite/libgomp.oacc-fortran/kernels-reduction-1.f90:
Likewise.
* testsuite/libgomp.oacc-fortran/lib-12.f90: Likewise.
* testsuite/libgomp.oacc-fortran/lib-13.f90: Likewise.
* testsuite/libgomp.oacc-fortran/lib-14.f90: Likewise.
* testsuite/libgomp.oacc-fortran/lib-15.f90: Likewise.
* testsuite/libgomp.oacc-fortran/parallel-loop-1.f90: Likewise.
* testsuite/libgomp.oacc-fortran/reference-reductions.f90: Likewise.
* testsuite/libgomp.oacc-fortran/vector-routine.f90: Likewise.
Co-Authored-By: James Norris <jnorris@codesourcery.com>
Co-Authored-By: Julian Brown <julian@codesourcery.com>
Co-Authored-By: Thomas Schwinge <thomas@codesourcery.com>
Co-Authored-By: Tom de Vries <tom@codesourcery.com>
From-SVN: r261884
Diffstat (limited to 'gcc')
15 files changed, 692 insertions, 0 deletions
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index bb7aa60..655a440 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,26 @@ +2018-06-22 Cesar Philippidis <cesar@codesourcery.com> + James Norris <jnorris@codesourcery.com> + Thomas Schwinge <thomas@codesourcery.com> + Tom de Vries <tom@codesourcery.com> + + * c-c++-common/goacc/deviceptr-4.c: New file. + * c-c++-common/goacc/kernels-counter-var-redundant-load.c: + Likewise. + * c-c++-common/goacc/kernels-loop-data-2.c: Likewise. + * c-c++-common/goacc/kernels-loop-data-enter-exit-2.c: Likewise. + * c-c++-common/goacc/kernels-loop-data-enter-exit.c: Likewise. + * c-c++-common/goacc/kernels-loop-data-update.c: Likewise. + * c-c++-common/goacc/kernels-loop-data.c: Likewise. + * c-c++-common/goacc/kernels-parallel-loop-data-enter-exit.c: + Likewise. + * c-c++-common/goacc/parallel-reduction.c: Likewise. + * c-c++-common/goacc/private-reduction-1.c: Likewise. + * gfortran.dg/goacc/kernels-parallel-loop-data-enter-exit.f95: + Likewise. + * gfortran.dg/goacc/modules.f95: Likewise. + * gfortran.dg/goacc/routine-8.f90: Likewise. + * gfortran.dg/goacc/routine-level-of-parallelism-1.f90: Likewise. + 2018-06-21 Michael Meissner <meissner@linux.ibm.com> * gcc.target/powerpc/pack02.c: Use __ibm128 instead of long double diff --git a/gcc/testsuite/c-c++-common/goacc/deviceptr-4.c b/gcc/testsuite/c-c++-common/goacc/deviceptr-4.c new file mode 100644 index 0000000..db1b916 --- /dev/null +++ b/gcc/testsuite/c-c++-common/goacc/deviceptr-4.c @@ -0,0 +1,11 @@ +/* { dg-additional-options "-fdump-tree-gimple" } */ + +void +subr (int *a) +{ +#pragma acc data deviceptr (a) +#pragma acc parallel + a[0] += 1.0; +} + +/* { dg-final { scan-tree-dump-times "#pragma omp target oacc_parallel.*map\\(tofrom:a" 1 "gimple" } } */ diff --git a/gcc/testsuite/c-c++-common/goacc/kernels-counter-var-redundant-load.c b/gcc/testsuite/c-c++-common/goacc/kernels-counter-var-redundant-load.c new file mode 100644 index 0000000..0304254 --- /dev/null +++ b/gcc/testsuite/c-c++-common/goacc/kernels-counter-var-redundant-load.c @@ -0,0 +1,34 @@ +/* { dg-additional-options "-O2" } */ +/* { dg-additional-options "-fdump-tree-dom3" } */ + +#include <stdlib.h> + +#define N (1024 * 512) +#define COUNTERTYPE unsigned int + +COUNTERTYPE +foo (unsigned int *c) +{ + COUNTERTYPE ii; + +#pragma acc kernels copyout (c[0:N]) + { + for (ii = 0; ii < N; ii++) + c[ii] = 1; + } + + return ii; +} + +/* We're expecting: + + .omp_data_i_10 = &.omp_data_arr.3; + _11 = .omp_data_i_10->ii; + *_11 = 0; + _15 = .omp_data_i_10->c; + c.1_16 = *_15; + + Check that there's only one load from anonymous ssa-name (which we assume to + be the one to read c), and that there's no such load for ii. */ + +/* { dg-final { scan-tree-dump-times "(?n)\\*_\[0-9\]\[0-9\]*;$" 1 "dom3" } } */ diff --git a/gcc/testsuite/c-c++-common/goacc/kernels-loop-data-2.c b/gcc/testsuite/c-c++-common/goacc/kernels-loop-data-2.c new file mode 100644 index 0000000..7180021 --- /dev/null +++ b/gcc/testsuite/c-c++-common/goacc/kernels-loop-data-2.c @@ -0,0 +1,68 @@ +/* { dg-additional-options "-O2" } */ +/* { dg-additional-options "-fdump-tree-parloops1-all" } */ +/* { dg-additional-options "-fdump-tree-optimized" } */ + +#include <stdlib.h> + +#define N (1024 * 512) +#define COUNTERTYPE unsigned int + +int +main (void) +{ + unsigned int *__restrict a; + unsigned int *__restrict b; + unsigned int *__restrict c; + + a = (unsigned int *)malloc (N * sizeof (unsigned int)); + b = (unsigned int *)malloc (N * sizeof (unsigned int)); + c = (unsigned int *)malloc (N * sizeof (unsigned int)); + +#pragma acc data copyout (a[0:N]) + { +#pragma acc kernels present (a[0:N]) + { + for (COUNTERTYPE i = 0; i < N; i++) + a[i] = i * 2; + } + } + +#pragma acc data copyout (b[0:N]) + { +#pragma acc kernels present (b[0:N]) + { + for (COUNTERTYPE i = 0; i < N; i++) + b[i] = i * 4; + } + } + +#pragma acc data copyin (a[0:N], b[0:N]) copyout (c[0:N]) + { +#pragma acc kernels present (a[0:N], b[0:N], c[0:N]) + { + for (COUNTERTYPE ii = 0; ii < N; ii++) + c[ii] = a[ii] + b[ii]; + } + } + + for (COUNTERTYPE i = 0; i < N; i++) + if (c[i] != a[i] + b[i]) + abort (); + + free (a); + free (b); + free (c); + + return 0; +} + +/* Check that only three loops are analyzed, and that all can be + parallelized. */ +/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops1" } } */ +/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 3 "parloops1" } } */ +/* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */ + +/* Check that the loop has been split off into a function. */ +/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.1" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.2" 1 "optimized" } } */ diff --git a/gcc/testsuite/c-c++-common/goacc/kernels-loop-data-enter-exit-2.c b/gcc/testsuite/c-c++-common/goacc/kernels-loop-data-enter-exit-2.c new file mode 100644 index 0000000..0c9f833 --- /dev/null +++ b/gcc/testsuite/c-c++-common/goacc/kernels-loop-data-enter-exit-2.c @@ -0,0 +1,66 @@ +/* { dg-additional-options "-O2" } */ +/* { dg-additional-options "-fdump-tree-parloops1-all" } */ +/* { dg-additional-options "-fdump-tree-optimized" } */ + +#include <stdlib.h> + +#define N (1024 * 512) +#define COUNTERTYPE unsigned int + +int +main (void) +{ + unsigned int *__restrict a; + unsigned int *__restrict b; + unsigned int *__restrict c; + + a = (unsigned int *)malloc (N * sizeof (unsigned int)); + b = (unsigned int *)malloc (N * sizeof (unsigned int)); + c = (unsigned int *)malloc (N * sizeof (unsigned int)); + +#pragma acc enter data create (a[0:N]) +#pragma acc kernels present (a[0:N]) + { + for (COUNTERTYPE i = 0; i < N; i++) + a[i] = i * 2; + } +#pragma acc exit data copyout (a[0:N]) + +#pragma acc enter data create (b[0:N]) +#pragma acc kernels present (b[0:N]) + { + for (COUNTERTYPE i = 0; i < N; i++) + b[i] = i * 4; + } +#pragma acc exit data copyout (b[0:N]) + + +#pragma acc enter data copyin (a[0:N], b[0:N]) create (c[0:N]) +#pragma acc kernels present (a[0:N], b[0:N], c[0:N]) + { + for (COUNTERTYPE ii = 0; ii < N; ii++) + c[ii] = a[ii] + b[ii]; + } +#pragma acc exit data copyout (c[0:N]) + + for (COUNTERTYPE i = 0; i < N; i++) + if (c[i] != a[i] + b[i]) + abort (); + + free (a); + free (b); + free (c); + + return 0; +} + +/* Check that only three loops are analyzed, and that all can be + parallelized. */ +/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops1" } } */ +/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 3 "parloops1" } } */ +/* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */ + +/* Check that the loop has been split off into a function. */ +/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.1" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.2" 1 "optimized" } } */ diff --git a/gcc/testsuite/c-c++-common/goacc/kernels-loop-data-enter-exit.c b/gcc/testsuite/c-c++-common/goacc/kernels-loop-data-enter-exit.c new file mode 100644 index 0000000..0bd21b6 --- /dev/null +++ b/gcc/testsuite/c-c++-common/goacc/kernels-loop-data-enter-exit.c @@ -0,0 +1,63 @@ +/* { dg-additional-options "-O2" } */ +/* { dg-additional-options "-fdump-tree-parloops1-all" } */ +/* { dg-additional-options "-fdump-tree-optimized" } */ + +#include <stdlib.h> + +#define N (1024 * 512) +#define COUNTERTYPE unsigned int + +int +main (void) +{ + unsigned int *__restrict a; + unsigned int *__restrict b; + unsigned int *__restrict c; + + a = (unsigned int *)malloc (N * sizeof (unsigned int)); + b = (unsigned int *)malloc (N * sizeof (unsigned int)); + c = (unsigned int *)malloc (N * sizeof (unsigned int)); + +#pragma acc enter data create (a[0:N], b[0:N], c[0:N]) + +#pragma acc kernels present (a[0:N]) + { + for (COUNTERTYPE i = 0; i < N; i++) + a[i] = i * 2; + } + +#pragma acc kernels present (b[0:N]) + { + for (COUNTERTYPE i = 0; i < N; i++) + b[i] = i * 4; + } + +#pragma acc kernels present (a[0:N], b[0:N], c[0:N]) + { + for (COUNTERTYPE ii = 0; ii < N; ii++) + c[ii] = a[ii] + b[ii]; + } + +#pragma acc exit data copyout (a[0:N], c[0:N]) + + for (COUNTERTYPE i = 0; i < N; i++) + if (c[i] != a[i] + b[i]) + abort (); + + free (a); + free (b); + free (c); + + return 0; +} + +/* Check that only three loops are analyzed, and that all can be + parallelized. */ +/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops1" } } */ +/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 3 "parloops1" } } */ +/* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */ + +/* Check that the loop has been split off into a function. */ +/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.1" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.2" 1 "optimized" } } */ diff --git a/gcc/testsuite/c-c++-common/goacc/kernels-loop-data-update.c b/gcc/testsuite/c-c++-common/goacc/kernels-loop-data-update.c new file mode 100644 index 0000000..dd5a841 --- /dev/null +++ b/gcc/testsuite/c-c++-common/goacc/kernels-loop-data-update.c @@ -0,0 +1,63 @@ +/* { dg-additional-options "-O2" } */ +/* { dg-additional-options "-fdump-tree-parloops1-all" } */ +/* { dg-additional-options "-fdump-tree-optimized" } */ + +#include <stdlib.h> + +#define N (1024 * 512) +#define COUNTERTYPE unsigned int + +int +main (void) +{ + unsigned int *__restrict a; + unsigned int *__restrict b; + unsigned int *__restrict c; + + a = (unsigned int *)malloc (N * sizeof (unsigned int)); + b = (unsigned int *)malloc (N * sizeof (unsigned int)); + c = (unsigned int *)malloc (N * sizeof (unsigned int)); + +#pragma acc enter data create (a[0:N], b[0:N], c[0:N]) + +#pragma acc kernels present (a[0:N]) + { + for (COUNTERTYPE i = 0; i < N; i++) + a[i] = i * 2; + } + + { + for (COUNTERTYPE i = 0; i < N; i++) + b[i] = i * 4; + } + +#pragma acc update device (b[0:N]) + +#pragma acc kernels present (a[0:N], b[0:N], c[0:N]) + { + for (COUNTERTYPE ii = 0; ii < N; ii++) + c[ii] = a[ii] + b[ii]; + } + +#pragma acc exit data copyout (a[0:N], c[0:N]) + + for (COUNTERTYPE i = 0; i < N; i++) + if (c[i] != a[i] + b[i]) + abort (); + + free (a); + free (b); + free (c); + + return 0; +} + +/* Check that only two loops are analyzed, and that both can be + parallelized. */ +/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 2 "parloops1" } } */ +/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 2 "parloops1" } } */ +/* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */ + +/* Check that the loop has been split off into a function. */ +/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.1" 1 "optimized" } } */ diff --git a/gcc/testsuite/c-c++-common/goacc/kernels-loop-data.c b/gcc/testsuite/c-c++-common/goacc/kernels-loop-data.c new file mode 100644 index 0000000..a658182 --- /dev/null +++ b/gcc/testsuite/c-c++-common/goacc/kernels-loop-data.c @@ -0,0 +1,62 @@ +/* { dg-additional-options "-O2" } */ +/* { dg-additional-options "-fdump-tree-parloops1-all" } */ +/* { dg-additional-options "-fdump-tree-optimized" } */ + +#include <stdlib.h> + +#define N (1024 * 512) +#define COUNTERTYPE unsigned int + +int +main (void) +{ + unsigned int *__restrict a; + unsigned int *__restrict b; + unsigned int *__restrict c; + + a = (unsigned int *)malloc (N * sizeof (unsigned int)); + b = (unsigned int *)malloc (N * sizeof (unsigned int)); + c = (unsigned int *)malloc (N * sizeof (unsigned int)); + +#pragma acc data copyout (a[0:N], b[0:N], c[0:N]) + { +#pragma acc kernels present (a[0:N]) + { + for (COUNTERTYPE i = 0; i < N; i++) + a[i] = i * 2; + } + +#pragma acc kernels present (b[0:N]) + { + for (COUNTERTYPE i = 0; i < N; i++) + b[i] = i * 4; + } + +#pragma acc kernels present (a[0:N], b[0:N], c[0:N]) + { + for (COUNTERTYPE ii = 0; ii < N; ii++) + c[ii] = a[ii] + b[ii]; + } + } + + for (COUNTERTYPE i = 0; i < N; i++) + if (c[i] != a[i] + b[i]) + abort (); + + free (a); + free (b); + free (c); + + return 0; +} + +/* Check that only three loops are analyzed, and that all can be + parallelized. */ +/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops1" } } */ +/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 3 "parloops1" } } */ +/* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" } } */ + +/* Check that the loop has been split off into a function. */ +/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.1" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.2" 1 "optimized" } } */ diff --git a/gcc/testsuite/c-c++-common/goacc/kernels-parallel-loop-data-enter-exit.c b/gcc/testsuite/c-c++-common/goacc/kernels-parallel-loop-data-enter-exit.c new file mode 100644 index 0000000..81b0fee --- /dev/null +++ b/gcc/testsuite/c-c++-common/goacc/kernels-parallel-loop-data-enter-exit.c @@ -0,0 +1,66 @@ +/* { dg-additional-options "-O2" } */ +/* { dg-additional-options "-fdump-tree-parloops1-all" } */ +/* { dg-additional-options "-fdump-tree-optimized" } */ + +#include <stdlib.h> + +#define N (1024 * 512) +#define COUNTERTYPE unsigned int + +int +main (void) +{ + unsigned int *__restrict a; + unsigned int *__restrict b; + unsigned int *__restrict c; + + a = (unsigned int *)malloc (N * sizeof (unsigned int)); + b = (unsigned int *)malloc (N * sizeof (unsigned int)); + c = (unsigned int *)malloc (N * sizeof (unsigned int)); + +#pragma acc enter data create (a[0:N], b[0:N], c[0:N]) + +#pragma acc kernels present (a[0:N]) + { + for (COUNTERTYPE i = 0; i < N; i++) + a[i] = i * 2; + } + +#pragma acc parallel present (b[0:N]) + { +#pragma acc loop + for (COUNTERTYPE i = 0; i < N; i++) + b[i] = i * 4; + } + +#pragma acc kernels present (a[0:N], b[0:N], c[0:N]) + { + for (COUNTERTYPE ii = 0; ii < N; ii++) + c[ii] = a[ii] + b[ii]; + } + +#pragma acc exit data copyout (a[0:N], b[0:N], c[0:N]) + + for (COUNTERTYPE i = 0; i < N; i++) + if (c[i] != a[i] + b[i]) + abort (); + + free (a); + free (b); + free (c); + + return 0; +} + +/* Check that only two loops are analyzed, and that both can be + parallelized. */ +// FIXME: OpenACC kernels stopped working with the firstprivate subarray +// changes. +/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 2 "parloops1" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 2 "parloops1" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump-not "FAILED:" "parloops1" { xfail *-*-* } } } */ + +/* Check that the loop has been split off into a function. */ +/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.1" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.2" 1 "optimized" } } */ diff --git a/gcc/testsuite/c-c++-common/goacc/parallel-reduction.c b/gcc/testsuite/c-c++-common/goacc/parallel-reduction.c new file mode 100644 index 0000000..d7cc947 --- /dev/null +++ b/gcc/testsuite/c-c++-common/goacc/parallel-reduction.c @@ -0,0 +1,17 @@ +int +main () +{ + int sum = 0; + int dummy = 0; + +#pragma acc data copy (dummy) + { +#pragma acc parallel num_gangs (10) copy (sum) reduction (+:sum) + { + int v = 5; + sum += 10 + v; + } + } + + return sum; +} diff --git a/gcc/testsuite/c-c++-common/goacc/private-reduction-1.c b/gcc/testsuite/c-c++-common/goacc/private-reduction-1.c new file mode 100644 index 0000000..d4e3995 --- /dev/null +++ b/gcc/testsuite/c-c++-common/goacc/private-reduction-1.c @@ -0,0 +1,12 @@ +int +reduction () +{ + int i, r; + + #pragma acc parallel + #pragma acc loop private (r) reduction (+:r) + for (i = 0; i < 100; i++) + r += 10; + + return r; +} diff --git a/gcc/testsuite/gfortran.dg/goacc/kernels-parallel-loop-data-enter-exit.f95 b/gcc/testsuite/gfortran.dg/goacc/kernels-parallel-loop-data-enter-exit.f95 new file mode 100644 index 0000000..48c20b9 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/goacc/kernels-parallel-loop-data-enter-exit.f95 @@ -0,0 +1,48 @@ +! { dg-additional-options "-O2" } +! { dg-additional-options "-fdump-tree-parloops1-all" } +! { dg-additional-options "-fdump-tree-optimized" } + +program main + implicit none + integer, parameter :: n = 1024 + integer, dimension (0:n-1) :: a, b, c + integer :: i, ii + + !$acc enter data create (a(0:n-1), b(0:n-1), c(0:n-1)) + + !$acc kernels present (a(0:n-1)) + do i = 0, n - 1 + a(i) = i * 2 + end do + !$acc end kernels + + !$acc parallel present (b(0:n-1)) + !$acc loop + do i = 0, n -1 + b(i) = i * 4 + end do + !$acc end parallel + + !$acc kernels present (a(0:n-1), b(0:n-1), c(0:n-1)) + do ii = 0, n - 1 + c(ii) = a(ii) + b(ii) + end do + !$acc end kernels + + !$acc exit data copyout (a(0:n-1), b(0:n-1), c(0:n-1)) + + do i = 0, n - 1 + if (c(i) .ne. a(i) + b(i)) call abort + end do + +end program main + +! Check that only three loops are analyzed, and that all can be parallelized. +! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 2 "parloops1" { xfail *-*-* } } } +! { dg-final { scan-tree-dump-times "(?n)__attribute__\\(\\(oacc kernels parallelized, oacc function \\(, , \\), oacc kernels, omp target entrypoint\\)\\)" 2 "parloops1" { xfail *-*-* } } } +! { dg-final { scan-tree-dump-not "FAILED:" "parloops1" { xfail *-*-* } } } + +! Check that the loop has been split off into a function. +! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.0 " 1 "optimized" } } +! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.1 " 1 "optimized" } } +! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.2 " 1 "optimized" } } diff --git a/gcc/testsuite/gfortran.dg/goacc/modules.f95 b/gcc/testsuite/gfortran.dg/goacc/modules.f95 new file mode 100644 index 0000000..19a2abe --- /dev/null +++ b/gcc/testsuite/gfortran.dg/goacc/modules.f95 @@ -0,0 +1,55 @@ +! { dg-do compile } + +MODULE reduction_test + +CONTAINS + +SUBROUTINE reduction_kernel(x_min,x_max,y_min,y_max,arr,sum) + + IMPLICIT NONE + + INTEGER :: x_min,x_max,y_min,y_max + REAL(KIND=8), DIMENSION(x_min-2:x_max+2,y_min-2:y_max+2) :: arr + REAL(KIND=8) :: sum + + INTEGER :: j,k + + sum=0.0 + +!$ACC DATA PRESENT(arr) COPY(sum) +!$ACC PARALLEL LOOP REDUCTION(+ : sum) + DO k=y_min,y_max + DO j=x_min,x_max + sum=sum*arr(j,k) + ENDDO + ENDDO +!$ACC END PARALLEL LOOP +!$ACC END DATA + +END SUBROUTINE reduction_kernel + +END MODULE reduction_test + +program main + use reduction_test + + integer :: x_min,x_max,y_min,y_max + real(kind=8), dimension(1:10,1:10) :: arr + real(kind=8) :: sum + + x_min = 5 + x_max = 6 + y_min = 5 + y_max = 6 + + arr(:,:) = 1.0 + + sum = 1.0 + + !$acc data copy(arr) + + call field_summary_kernel(x_min,x_max,y_min,y_max,arr,sum) + + !$acc end data + +end program diff --git a/gcc/testsuite/gfortran.dg/goacc/routine-8.f90 b/gcc/testsuite/gfortran.dg/goacc/routine-8.f90 new file mode 100644 index 0000000..c903915 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/goacc/routine-8.f90 @@ -0,0 +1,32 @@ +! Test ACC ROUTINE inside an interface block. + +program main + interface + function s_1 (a) + integer a + !$acc routine + end function s_1 + end interface + + interface + function s_2 (a) + integer a + !$acc routine seq + end function s_2 + end interface + + interface + function s_3 (a) + integer a + !$acc routine (s_3) ! { dg-error "Only the ..ACC ROUTINE form without list is allowed in interface block" } + end function s_3 + end interface + + interface + function s_4 (a) + integer a + !$acc routine (s_4) seq ! { dg-error "Only the ..ACC ROUTINE form without list is allowed in interface block" } + end function s_4 + end interface +end program main + diff --git a/gcc/testsuite/gfortran.dg/goacc/routine-level-of-parallelism-1.f90 b/gcc/testsuite/gfortran.dg/goacc/routine-level-of-parallelism-1.f90 new file mode 100644 index 0000000..75dd1b0 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/goacc/routine-level-of-parallelism-1.f90 @@ -0,0 +1,72 @@ +! Test various aspects of clauses specifying compatible levels of +! parallelism with the OpenACC routine directive. The Fortran counterpart is +! c-c++-common/goacc/routine-level-of-parallelism-2.c + +subroutine g_1 + !$acc routine gang +end subroutine g_1 + +subroutine s_1_2a + !$acc routine +end subroutine s_1_2a + +subroutine s_1_2b + !$acc routine seq +end subroutine s_1_2b + +subroutine s_1_2c + !$acc routine (s_1_2c) +end subroutine s_1_2c + +subroutine s_1_2d + !$acc routine (s_1_2d) seq +end subroutine s_1_2d + +module s_2 +contains + subroutine s_2_1a + !$acc routine + end subroutine s_2_1a + + subroutine s_2_1b + !$acc routine seq + end subroutine s_2_1b + + subroutine s_2_1c + !$acc routine (s_2_1c) + end subroutine s_2_1c + + subroutine s_2_1d + !$acc routine (s_2_1d) seq + end subroutine s_2_1d +end module s_2 + +subroutine test + external g_1, w_1, v_1 + external s_1_1, s_1_2 + + interface + function s_3_1a (a) + integer a + !$acc routine + end function s_3_1a + end interface + + interface + function s_3_1b (a) + integer a + !$acc routine seq + end function s_3_1b + end interface + + !$acc routine(g_1) gang + + !$acc routine(w_1) worker + + !$acc routine(v_1) worker + + ! Also test the implicit seq clause. + + !$acc routine (s_1_1) seq + +end subroutine test |