diff options
author | Andrew Stubbs <ams@codesourcery.com> | 2019-11-14 16:16:04 +0000 |
---|---|---|
committer | Andrew Stubbs <ams@codesourcery.com> | 2020-01-20 16:51:06 +0000 |
commit | 09e0ad6253f4330977e1b2f116b5e289dc2c2a02 (patch) | |
tree | 2edb48c4818ee2132b970ce8ddb86c57198209b9 /libgomp | |
parent | 3a43459715e239fb8043bf64b830aaf1a9802180 (diff) | |
download | gcc-09e0ad6253f4330977e1b2f116b5e289dc2c2a02.zip gcc-09e0ad6253f4330977e1b2f116b5e289dc2c2a02.tar.gz gcc-09e0ad6253f4330977e1b2f116b5e289dc2c2a02.tar.bz2 |
Update OpenACC tests for amdgcn
2020-01-20 Andrew Stubbs <ams@codesourcery.com>
libgomp/
* testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c: Skip test on gcn.
* testsuite/libgomp.oacc-c-c++-common/loop-dim-default.c (main):
Adjust test dimensions for amdgcn.
* testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c (main): Adjust
gang/worker/vector expectations dynamically.
* testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c
(main): Likewise.
* testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c (main): Likewise.
* testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c (main): Likewise.
* testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c (main): Likewise.
* testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c (main): Likewise.
* testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c (main): Likewise.
* testsuite/libgomp.oacc-c-c++-common/loop-v-1.c (main): Likewise.
* testsuite/libgomp.oacc-c-c++-common/loop-w-1.c (main): Likewise.
* testsuite/libgomp.oacc-c-c++-common/loop-wv-1.c (main): Likewise.
* testsuite/libgomp.oacc-c-c++-common/parallel-dims.c
(acc_gang): Recognise acc_device_radeon.
(acc_worker): Likewise.
(acc_vector): Likewise.
(main): Set expectations for amdgcn.
* testsuite/libgomp.oacc-c-c++-common/routine-gwv-1.c
(main): Adjust gang/worker/vector expectations dynamically.
* testsuite/libgomp.oacc-c-c++-common/routine-v-1.c (main): Likewise.
* testsuite/libgomp.oacc-c-c++-common/routine-w-1.c (main): Likewise.
* testsuite/libgomp.oacc-c-c++-common/routine-wv-1.c (main): Likewise.
* testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c: Set expectations
for amdgcn.
Diffstat (limited to 'libgomp')
19 files changed, 180 insertions, 45 deletions
diff --git a/libgomp/ChangeLog b/libgomp/ChangeLog index 24cbe04..fa6aeed 100644 --- a/libgomp/ChangeLog +++ b/libgomp/ChangeLog @@ -1,3 +1,33 @@ +2020-01-20 Andrew Stubbs <ams@codesourcery.com> + + * testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c: Skip test on gcn. + * testsuite/libgomp.oacc-c-c++-common/loop-dim-default.c (main): + Adjust test dimensions for amdgcn. + * testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c (main): Adjust + gang/worker/vector expectations dynamically. + * testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c + (main): Likewise. + * testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c (main): Likewise. + * testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c (main): Likewise. + * testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c (main): Likewise. + * testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c (main): Likewise. + * testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c (main): Likewise. + * testsuite/libgomp.oacc-c-c++-common/loop-v-1.c (main): Likewise. + * testsuite/libgomp.oacc-c-c++-common/loop-w-1.c (main): Likewise. + * testsuite/libgomp.oacc-c-c++-common/loop-wv-1.c (main): Likewise. + * testsuite/libgomp.oacc-c-c++-common/parallel-dims.c + (acc_gang): Recognise acc_device_radeon. + (acc_worker): Likewise. + (acc_vector): Likewise. + (main): Set expectations for amdgcn. + * testsuite/libgomp.oacc-c-c++-common/routine-gwv-1.c + (main): Adjust gang/worker/vector expectations dynamically. + * testsuite/libgomp.oacc-c-c++-common/routine-v-1.c (main): Likewise. + * testsuite/libgomp.oacc-c-c++-common/routine-w-1.c (main): Likewise. + * testsuite/libgomp.oacc-c-c++-common/routine-wv-1.c (main): Likewise. + * testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c: Set expectations + for amdgcn. + 2020-01-17 Andrew Stubbs <ams@codesourcery.com> * config/accel/openacc.f90 (openacc_kinds): Rename acc_device_gcn to diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c index 34bc57e..0c9ae95 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c @@ -1,3 +1,6 @@ +/* AMD GCN does not use 32-lane vectors. + { dg-skip-if "unsuitable dimensions" { openacc_amdgcn_accel_selected } { "*" } { "" } } */ + /* { dg-additional-options "-fopenacc-dim=32" } */ #include <stdio.h> diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-dim-default.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-dim-default.c index 04387d3..30f0539 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-dim-default.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-dim-default.c @@ -128,5 +128,14 @@ int test_1 (int gp, int wp, int vp) int main () { +#ifdef ACC_DEVICE_TYPE_gcn + /* AMD GCN uses the autovectorizer for the vector dimension: the use + of a function call in vector-partitioned code in this test is not + currently supported. */ + /* AMD GCN does not currently support multiple workers. This should be + set to 16 when that changes. */ + return test_1 (16, 1, 1); +#else return test_1 (16, 16, 32); +#endif } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c index 766e578..5c84301 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c @@ -9,11 +9,13 @@ int main () int ix; int exit = 0; int ondev = 0; + int gangsize, workersize, vectorsize; for (ix = 0; ix < N;ix++) ary[ix] = -1; -#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) copy(ary) copy(ondev) +#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \ + copy(ary) copy(ondev) copyout(gangsize, workersize, vectorsize) { #pragma acc loop gang worker vector for (unsigned ix = 0; ix < N; ix++) @@ -32,6 +34,10 @@ int main () else ary[ix] = ix; } + + gangsize = __builtin_goacc_parlevel_size (GOMP_DIM_GANG); + workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER); + vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR); } for (ix = 0; ix < N; ix++) @@ -39,11 +45,12 @@ int main () int expected = ix; if(ondev) { - int chunk_size = (N + 32*32*32 - 1) / (32*32*32); + int chunk_size = (N + gangsize * workersize * vectorsize - 1) + / (gangsize * workersize * vectorsize); - int g = ix / (chunk_size * 32 * 32); - int w = ix / 32 % 32; - int v = ix % 32; + int g = ix / (chunk_size * workersize * vectorsize); + int w = (ix / vectorsize) % workersize; + int v = ix % vectorsize; expected = (g << 16) | (w << 8) | v; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c index 0bec6e1..9c4a85f 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c @@ -8,8 +8,10 @@ int main () int ix; int ondev = 0; int t = 0, h = 0; - -#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) copy(ondev) + int gangsize, workersize, vectorsize; + +#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \ + copy(ondev) copyout(gangsize, workersize, vectorsize) { #pragma acc loop gang worker vector reduction(+:t) for (unsigned ix = 0; ix < N; ix++) @@ -28,18 +30,22 @@ int main () } t += val; } + gangsize = __builtin_goacc_parlevel_size (GOMP_DIM_GANG); + workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER); + vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR); } for (ix = 0; ix < N; ix++) { int val = ix; - if(ondev) + if (ondev) { - int chunk_size = (N + 32*32*32 - 1) / (32*32*32); + int chunk_size = (N + gangsize * workersize * vectorsize - 1) + / (gangsize * workersize * vectorsize); - int g = ix / (chunk_size * 32 * 32); - int w = ix / 32 % 32; - int v = ix % 32; + int g = ix / (chunk_size * vectorsize * workersize); + int w = ix / vectorsize % workersize; + int v = ix % vectorsize; val = (g << 16) | (w << 8) | v; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c index da4921d..1173c1f 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c @@ -9,8 +9,9 @@ int main () int ix; int ondev = 0; int t = 0, h = 0; + int vectorsize; -#pragma acc parallel vector_length(32) copy(ondev) +#pragma acc parallel vector_length(32) copy(ondev) copyout(vectorsize) { #pragma acc loop vector reduction (+:t) for (unsigned ix = 0; ix < N; ix++) @@ -29,6 +30,7 @@ int main () } t += val; } + vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR); } for (ix = 0; ix < N; ix++) @@ -38,7 +40,7 @@ int main () { int g = 0; int w = 0; - int v = ix % 32; + int v = ix % vectorsize; val = (g << 16) | (w << 8) | v; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c index 15e2bc2..84c2296 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c @@ -9,8 +9,9 @@ int main () int ix; int ondev = 0; int q = 0, h = 0; + int vectorsize; -#pragma acc parallel vector_length(32) copy(q) copy(ondev) +#pragma acc parallel vector_length(32) copy(q) copy(ondev) copyout(vectorsize) { int t = q; @@ -32,6 +33,7 @@ int main () t += val; } q = t; + vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR); } for (ix = 0; ix < N; ix++) @@ -41,7 +43,7 @@ int main () { int g = 0; int w = 0; - int v = ix % 32; + int v = ix % vectorsize; val = (g << 16) | (w << 8) | v; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c index 6bbd04f..648f89e 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c @@ -8,8 +8,10 @@ int main () int ix; int ondev = 0; int t = 0, h = 0; + int workersize; -#pragma acc parallel num_workers(32) vector_length(32) copy(ondev) +#pragma acc parallel num_workers(32) vector_length(32) copy(ondev) \ + copyout(workersize) { #pragma acc loop worker reduction(+:t) for (unsigned ix = 0; ix < N; ix++) @@ -28,6 +30,7 @@ int main () } t += val; } + workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER); } for (ix = 0; ix < N; ix++) @@ -36,7 +39,7 @@ int main () if(ondev) { int g = 0; - int w = ix % 32; + int w = ix % workersize; int v = 0; val = (g << 16) | (w << 8) | v; diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c index c63a5d4..f9fcf37 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c @@ -8,8 +8,10 @@ int main () int ix; int ondev = 0; int q = 0, h = 0; + int workersize; -#pragma acc parallel num_workers(32) vector_length(32) copy(q) copy(ondev) +#pragma acc parallel num_workers(32) vector_length(32) copy(q) copy(ondev) \ + copyout(workersize) { int t = q; @@ -31,6 +33,7 @@ int main () t += val; } q = t; + workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER); } for (ix = 0; ix < N; ix++) @@ -39,7 +42,7 @@ int main () if(ondev) { int g = 0; - int w = ix % 32; + int w = ix % workersize; int v = 0; val = (g << 16) | (w << 8) | v; diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c index 71d3969..c360ad1 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c @@ -8,8 +8,10 @@ int main () int ix; int ondev = 0; int t = 0, h = 0; + int workersize, vectorsize; -#pragma acc parallel num_workers(32) vector_length(32) copy(ondev) +#pragma acc parallel num_workers(32) vector_length(32) copy(ondev) \ + copyout(workersize, vectorsize) { #pragma acc loop worker vector reduction (+:t) for (unsigned ix = 0; ix < N; ix++) @@ -28,6 +30,8 @@ int main () } t += val; } + workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER); + vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR); } for (ix = 0; ix < N; ix++) @@ -36,8 +40,8 @@ int main () if(ondev) { int g = 0; - int w = (ix / 32) % 32; - int v = ix % 32; + int w = (ix / vectorsize) % workersize; + int v = ix % vectorsize; val = (g << 16) | (w << 8) | v; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-v-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-v-1.c index 6010cd2..8c858f3 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-v-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-v-1.c @@ -9,11 +9,13 @@ int main () int ix; int exit = 0; int ondev = 0; + int vectorsize; for (ix = 0; ix < N;ix++) ary[ix] = -1; -#pragma acc parallel vector_length(32) copy(ary) copy(ondev) +#pragma acc parallel vector_length(32) copy(ary) copy(ondev) \ + copyout(vectorsize) { #pragma acc loop vector for (unsigned ix = 0; ix < N; ix++) @@ -31,6 +33,7 @@ int main () else ary[ix] = ix; } + vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR); } for (ix = 0; ix < N; ix++) @@ -40,7 +43,7 @@ int main () { int g = 0; int w = 0; - int v = ix % 32; + int v = ix % vectorsize; expected = (g << 16) | (w << 8) | v; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-w-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-w-1.c index fa6fb91..5fe486f 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-w-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-w-1.c @@ -9,11 +9,13 @@ int main () int ix; int exit = 0; int ondev = 0; + int workersize; for (ix = 0; ix < N;ix++) ary[ix] = -1; -#pragma acc parallel num_workers(32) vector_length(32) copy(ary) copy(ondev) +#pragma acc parallel num_workers(32) vector_length(32) copy(ary) copy(ondev) \ + copyout(workersize) { #pragma acc loop worker for (unsigned ix = 0; ix < N; ix++) @@ -31,6 +33,7 @@ int main () else ary[ix] = ix; } + workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER); } for (ix = 0; ix < N; ix++) @@ -39,7 +42,7 @@ int main () if(ondev) { int g = 0; - int w = ix % 32; + int w = ix % workersize; int v = 0; expected = (g << 16) | (w << 8) | v; diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-wv-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-wv-1.c index cd4cc99..fd4e4cf 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-wv-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-wv-1.c @@ -9,11 +9,13 @@ int main () int ix; int exit = 0; int ondev = 0; + int workersize, vectorsize; for (ix = 0; ix < N;ix++) ary[ix] = -1; -#pragma acc parallel num_workers(32) vector_length(32) copy(ary) copy(ondev) +#pragma acc parallel num_workers(32) vector_length(32) copy(ary) copy(ondev) \ + copyout(workersize, vectorsize) { #pragma acc loop worker vector for (unsigned ix = 0; ix < N; ix++) @@ -31,6 +33,8 @@ int main () else ary[ix] = ix; } + workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER); + vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR); } for (ix = 0; ix < N; ix++) @@ -39,8 +43,8 @@ int main () if(ondev) { int g = 0; - int w = (ix / 32) % 32; - int v = ix % 32; + int w = (ix / vectorsize) % workersize; + int v = ix % vectorsize; expected = (g << 16) | (w << 8) | v; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c index a5edfc6..cc4c738 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c @@ -14,7 +14,8 @@ static unsigned int __attribute__ ((optimize ("O2"))) acc_gang () { if (acc_on_device ((int) acc_device_host)) return 0; - else if (acc_on_device ((int) acc_device_nvidia)) + else if (acc_on_device ((int) acc_device_nvidia) + || acc_on_device ((int) acc_device_radeon)) return __builtin_goacc_parlevel_id (GOMP_DIM_GANG); else __builtin_abort (); @@ -25,7 +26,8 @@ static unsigned int __attribute__ ((optimize ("O2"))) acc_worker () { if (acc_on_device ((int) acc_device_host)) return 0; - else if (acc_on_device ((int) acc_device_nvidia)) + else if (acc_on_device ((int) acc_device_nvidia) + || acc_on_device ((int) acc_device_radeon)) return __builtin_goacc_parlevel_id (GOMP_DIM_WORKER); else __builtin_abort (); @@ -36,7 +38,8 @@ static unsigned int __attribute__ ((optimize ("O2"))) acc_vector () { if (acc_on_device ((int) acc_device_host)) return 0; - else if (acc_on_device ((int) acc_device_nvidia)) + else if (acc_on_device ((int) acc_device_nvidia) + || acc_on_device ((int) acc_device_radeon)) return __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR); else __builtin_abort (); @@ -282,6 +285,12 @@ int main () /* The GCC nvptx back end enforces num_workers (32). */ workers_actual = 32; } + else if (acc_on_device (acc_device_radeon)) + { + /* The GCC GCN back end is limited to num_workers (16). + Temporarily set this to 1 until multiple workers are permitted. */ + workers_actual = 1; // 16; + } else __builtin_abort (); #pragma acc loop worker reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max) @@ -328,6 +337,11 @@ int main () /* We're actually executing with num_workers (32). */ /* workers_actual = 32; */ } + else if (acc_on_device (acc_device_radeon)) + { + /* The GCC GCN back end is limited to num_workers (16). */ + workers_actual = 16; + } else __builtin_abort (); #pragma acc loop worker reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max) @@ -367,6 +381,11 @@ int main () /* The GCC nvptx back end enforces vector_length (32). */ vectors_actual = 1024; } + else if (acc_on_device (acc_device_radeon)) + { + /* The GCC GCN back end enforces vector_length (1): autovectorize. */ + vectors_actual = 1; + } else __builtin_abort (); #pragma acc loop vector reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max) @@ -407,6 +426,13 @@ int main () /* The GCC nvptx back end enforces vector_length (32). */ vectors_actual = 32; } + else if (acc_on_device (acc_device_radeon)) + { + /* Because of the way vectors are implemented for GCN, a vector loop + containing a seq routine call will not vectorize calls to that + routine. Hence, we'll only get one "vector". */ + vectors_actual = 1; + } else __builtin_abort (); #pragma acc loop vector reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max) @@ -433,6 +459,9 @@ int main () in the following case. So, limit ourselves here. */ if (acc_get_device_type () == acc_device_nvidia) gangs = 3; + /* Similar appears to be true for GCN. */ + if (acc_get_device_type () == acc_device_radeon) + gangs = 3; int gangs_actual = gangs; #define WORKERS 3 int workers_actual = WORKERS; @@ -459,6 +488,13 @@ int main () /* The GCC nvptx back end enforces vector_length (32). */ vectors_actual = 32; } + else if (acc_on_device (acc_device_radeon)) + { + /* Temporary setting, until multiple workers are permitted. */ + workers_actual = 1; + /* See above comments about GCN vectors_actual. */ + vectors_actual = 1; + } else __builtin_abort (); #pragma acc loop gang reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max) diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-gwv-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-gwv-1.c index a97e046..da13d84 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-gwv-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-gwv-1.c @@ -30,14 +30,18 @@ int main () int ix; int exit = 0; int ondev = 0; + int gangsize, workersize, vectorsize; for (ix = 0; ix < N;ix++) ary[ix] = -1; -#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) copy(ary) copy(ondev) +#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) copy(ary) copy(ondev) copyout(gangsize, workersize, vectorsize) { ondev = acc_on_device (acc_device_not_host); gang (ary); + gangsize = __builtin_goacc_parlevel_size (GOMP_DIM_GANG); + workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER); + vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR); } for (ix = 0; ix < N; ix++) @@ -45,11 +49,12 @@ int main () int expected = ix; if(ondev) { - int chunk_size = (N + 32*32*32 - 1) / (32*32*32); + int chunk_size = (N + gangsize * workersize * vectorsize - 1) + / (gangsize * workersize * vectorsize); - int g = ix / (chunk_size * 32 * 32); - int w = ix / 32 % 32; - int v = ix % 32; + int g = ix / (chunk_size * vectorsize * workersize); + int w = (ix / vectorsize) % workersize; + int v = ix % vectorsize; expected = (g << 16) | (w << 8) | v; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-v-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-v-1.c index b1e3e3a..dd7bb6c 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-v-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-v-1.c @@ -30,14 +30,17 @@ int main () int ix; int exit = 0; int ondev = 0; + int vectorsize; for (ix = 0; ix < N;ix++) ary[ix] = -1; -#pragma acc parallel vector_length(32) copy(ary) copy(ondev) +#pragma acc parallel vector_length(32) copy(ary) copy(ondev) \ + copyout(vectorsize) { ondev = acc_on_device (acc_device_not_host); vector (ary); + vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR); } for (ix = 0; ix < N; ix++) @@ -47,7 +50,7 @@ int main () { int g = 0; int w = 0; - int v = ix % 32; + int v = ix % vectorsize; expected = (g << 16) | (w << 8) | v; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-w-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-w-1.c index 81f1e03..acd9884 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-w-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-w-1.c @@ -30,14 +30,17 @@ int main () int ix; int exit = 0; int ondev = 0; + int workersize; for (ix = 0; ix < N;ix++) ary[ix] = -1; -#pragma acc parallel num_workers(32) vector_length(32) copy(ary) copy(ondev) +#pragma acc parallel num_workers(32) vector_length(32) copy(ary) copy(ondev) \ + copyout(workersize) { ondev = acc_on_device (acc_device_not_host); worker (ary); + workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER); } for (ix = 0; ix < N; ix++) @@ -46,7 +49,7 @@ int main () if(ondev) { int g = 0; - int w = ix % 32; + int w = ix % workersize; int v = 0; expected = (g << 16) | (w << 8) | v; diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-1.c index 23dbc1a..73696e4 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-1.c @@ -30,14 +30,18 @@ int main () int ix; int exit = 0; int ondev = 0; + int workersize, vectorsize; for (ix = 0; ix < N;ix++) ary[ix] = -1; -#pragma acc parallel num_workers(32) vector_length(32) copy(ary) copy(ondev) +#pragma acc parallel num_workers(32) vector_length(32) copy(ary) copy(ondev) \ + copyout(workersize, vectorsize) { ondev = acc_on_device (acc_device_not_host); worker (ary); + workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER); + vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR); } for (ix = 0; ix < N; ix++) @@ -46,8 +50,8 @@ int main () if(ondev) { int g = 0; - int w = (ix / 32) % 32; - int v = ix % 32; + int w = (ix / vectorsize) % workersize; + int v = ix % vectorsize; expected = (g << 16) | (w << 8) | v; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c index 8862148..609f9f6 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c @@ -2,8 +2,13 @@ #include <openacc.h> #include <gomp-constants.h> +#ifdef ACC_DEVICE_TYPE_gcn +#define NUM_WORKERS 16 +#define NUM_VECTORS 1 +#else #define NUM_WORKERS 16 #define NUM_VECTORS 32 +#endif #define WIDTH 64 #define HEIGHT 32 |