aboutsummaryrefslogtreecommitdiff
path: root/libgomp
diff options
context:
space:
mode:
authorAndrew Stubbs <ams@codesourcery.com>2019-11-14 16:16:04 +0000
committerAndrew Stubbs <ams@codesourcery.com>2020-01-20 16:51:06 +0000
commit09e0ad6253f4330977e1b2f116b5e289dc2c2a02 (patch)
tree2edb48c4818ee2132b970ce8ddb86c57198209b9 /libgomp
parent3a43459715e239fb8043bf64b830aaf1a9802180 (diff)
downloadgcc-09e0ad6253f4330977e1b2f116b5e289dc2c2a02.zip
gcc-09e0ad6253f4330977e1b2f116b5e289dc2c2a02.tar.gz
gcc-09e0ad6253f4330977e1b2f116b5e289dc2c2a02.tar.bz2
Update OpenACC tests for amdgcn
2020-01-20 Andrew Stubbs <ams@codesourcery.com> libgomp/ * testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c: Skip test on gcn. * testsuite/libgomp.oacc-c-c++-common/loop-dim-default.c (main): Adjust test dimensions for amdgcn. * testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c (main): Adjust gang/worker/vector expectations dynamically. * testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c (main): Likewise. * testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c (main): Likewise. * testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c (main): Likewise. * testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c (main): Likewise. * testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c (main): Likewise. * testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c (main): Likewise. * testsuite/libgomp.oacc-c-c++-common/loop-v-1.c (main): Likewise. * testsuite/libgomp.oacc-c-c++-common/loop-w-1.c (main): Likewise. * testsuite/libgomp.oacc-c-c++-common/loop-wv-1.c (main): Likewise. * testsuite/libgomp.oacc-c-c++-common/parallel-dims.c (acc_gang): Recognise acc_device_radeon. (acc_worker): Likewise. (acc_vector): Likewise. (main): Set expectations for amdgcn. * testsuite/libgomp.oacc-c-c++-common/routine-gwv-1.c (main): Adjust gang/worker/vector expectations dynamically. * testsuite/libgomp.oacc-c-c++-common/routine-v-1.c (main): Likewise. * testsuite/libgomp.oacc-c-c++-common/routine-w-1.c (main): Likewise. * testsuite/libgomp.oacc-c-c++-common/routine-wv-1.c (main): Likewise. * testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c: Set expectations for amdgcn.
Diffstat (limited to 'libgomp')
-rw-r--r--libgomp/ChangeLog30
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c3
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/loop-dim-default.c9
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c17
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c20
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c6
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c6
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c7
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c7
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c10
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/loop-v-1.c7
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/loop-w-1.c7
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/loop-wv-1.c10
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c42
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/routine-gwv-1.c15
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/routine-v-1.c7
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/routine-w-1.c7
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-1.c10
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c5
19 files changed, 180 insertions, 45 deletions
diff --git a/libgomp/ChangeLog b/libgomp/ChangeLog
index 24cbe04..fa6aeed 100644
--- a/libgomp/ChangeLog
+++ b/libgomp/ChangeLog
@@ -1,3 +1,33 @@
+2020-01-20 Andrew Stubbs <ams@codesourcery.com>
+
+ * testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c: Skip test on gcn.
+ * testsuite/libgomp.oacc-c-c++-common/loop-dim-default.c (main):
+ Adjust test dimensions for amdgcn.
+ * testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c (main): Adjust
+ gang/worker/vector expectations dynamically.
+ * testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c
+ (main): Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c (main): Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c (main): Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c (main): Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c (main): Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c (main): Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/loop-v-1.c (main): Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/loop-w-1.c (main): Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/loop-wv-1.c (main): Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/parallel-dims.c
+ (acc_gang): Recognise acc_device_radeon.
+ (acc_worker): Likewise.
+ (acc_vector): Likewise.
+ (main): Set expectations for amdgcn.
+ * testsuite/libgomp.oacc-c-c++-common/routine-gwv-1.c
+ (main): Adjust gang/worker/vector expectations dynamically.
+ * testsuite/libgomp.oacc-c-c++-common/routine-v-1.c (main): Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/routine-w-1.c (main): Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/routine-wv-1.c (main): Likewise.
+ * testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c: Set expectations
+ for amdgcn.
+
2020-01-17 Andrew Stubbs <ams@codesourcery.com>
* config/accel/openacc.f90 (openacc_kinds): Rename acc_device_gcn to
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c
index 34bc57e..0c9ae95 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c
@@ -1,3 +1,6 @@
+/* AMD GCN does not use 32-lane vectors.
+ { dg-skip-if "unsuitable dimensions" { openacc_amdgcn_accel_selected } { "*" } { "" } } */
+
/* { dg-additional-options "-fopenacc-dim=32" } */
#include <stdio.h>
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-dim-default.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-dim-default.c
index 04387d3..30f0539 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-dim-default.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-dim-default.c
@@ -128,5 +128,14 @@ int test_1 (int gp, int wp, int vp)
int main ()
{
+#ifdef ACC_DEVICE_TYPE_gcn
+ /* AMD GCN uses the autovectorizer for the vector dimension: the use
+ of a function call in vector-partitioned code in this test is not
+ currently supported. */
+ /* AMD GCN does not currently support multiple workers. This should be
+ set to 16 when that changes. */
+ return test_1 (16, 1, 1);
+#else
return test_1 (16, 16, 32);
+#endif
}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c
index 766e578..5c84301 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c
@@ -9,11 +9,13 @@ int main ()
int ix;
int exit = 0;
int ondev = 0;
+ int gangsize, workersize, vectorsize;
for (ix = 0; ix < N;ix++)
ary[ix] = -1;
-#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) copy(ary) copy(ondev)
+#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
+ copy(ary) copy(ondev) copyout(gangsize, workersize, vectorsize)
{
#pragma acc loop gang worker vector
for (unsigned ix = 0; ix < N; ix++)
@@ -32,6 +34,10 @@ int main ()
else
ary[ix] = ix;
}
+
+ gangsize = __builtin_goacc_parlevel_size (GOMP_DIM_GANG);
+ workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER);
+ vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR);
}
for (ix = 0; ix < N; ix++)
@@ -39,11 +45,12 @@ int main ()
int expected = ix;
if(ondev)
{
- int chunk_size = (N + 32*32*32 - 1) / (32*32*32);
+ int chunk_size = (N + gangsize * workersize * vectorsize - 1)
+ / (gangsize * workersize * vectorsize);
- int g = ix / (chunk_size * 32 * 32);
- int w = ix / 32 % 32;
- int v = ix % 32;
+ int g = ix / (chunk_size * workersize * vectorsize);
+ int w = (ix / vectorsize) % workersize;
+ int v = ix % vectorsize;
expected = (g << 16) | (w << 8) | v;
}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c
index 0bec6e1..9c4a85f 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c
@@ -8,8 +8,10 @@ int main ()
int ix;
int ondev = 0;
int t = 0, h = 0;
-
-#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) copy(ondev)
+ int gangsize, workersize, vectorsize;
+
+#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
+ copy(ondev) copyout(gangsize, workersize, vectorsize)
{
#pragma acc loop gang worker vector reduction(+:t)
for (unsigned ix = 0; ix < N; ix++)
@@ -28,18 +30,22 @@ int main ()
}
t += val;
}
+ gangsize = __builtin_goacc_parlevel_size (GOMP_DIM_GANG);
+ workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER);
+ vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR);
}
for (ix = 0; ix < N; ix++)
{
int val = ix;
- if(ondev)
+ if (ondev)
{
- int chunk_size = (N + 32*32*32 - 1) / (32*32*32);
+ int chunk_size = (N + gangsize * workersize * vectorsize - 1)
+ / (gangsize * workersize * vectorsize);
- int g = ix / (chunk_size * 32 * 32);
- int w = ix / 32 % 32;
- int v = ix % 32;
+ int g = ix / (chunk_size * vectorsize * workersize);
+ int w = ix / vectorsize % workersize;
+ int v = ix % vectorsize;
val = (g << 16) | (w << 8) | v;
}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c
index da4921d..1173c1f 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c
@@ -9,8 +9,9 @@ int main ()
int ix;
int ondev = 0;
int t = 0, h = 0;
+ int vectorsize;
-#pragma acc parallel vector_length(32) copy(ondev)
+#pragma acc parallel vector_length(32) copy(ondev) copyout(vectorsize)
{
#pragma acc loop vector reduction (+:t)
for (unsigned ix = 0; ix < N; ix++)
@@ -29,6 +30,7 @@ int main ()
}
t += val;
}
+ vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR);
}
for (ix = 0; ix < N; ix++)
@@ -38,7 +40,7 @@ int main ()
{
int g = 0;
int w = 0;
- int v = ix % 32;
+ int v = ix % vectorsize;
val = (g << 16) | (w << 8) | v;
}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c
index 15e2bc2..84c2296 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c
@@ -9,8 +9,9 @@ int main ()
int ix;
int ondev = 0;
int q = 0, h = 0;
+ int vectorsize;
-#pragma acc parallel vector_length(32) copy(q) copy(ondev)
+#pragma acc parallel vector_length(32) copy(q) copy(ondev) copyout(vectorsize)
{
int t = q;
@@ -32,6 +33,7 @@ int main ()
t += val;
}
q = t;
+ vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR);
}
for (ix = 0; ix < N; ix++)
@@ -41,7 +43,7 @@ int main ()
{
int g = 0;
int w = 0;
- int v = ix % 32;
+ int v = ix % vectorsize;
val = (g << 16) | (w << 8) | v;
}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c
index 6bbd04f..648f89e 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c
@@ -8,8 +8,10 @@ int main ()
int ix;
int ondev = 0;
int t = 0, h = 0;
+ int workersize;
-#pragma acc parallel num_workers(32) vector_length(32) copy(ondev)
+#pragma acc parallel num_workers(32) vector_length(32) copy(ondev) \
+ copyout(workersize)
{
#pragma acc loop worker reduction(+:t)
for (unsigned ix = 0; ix < N; ix++)
@@ -28,6 +30,7 @@ int main ()
}
t += val;
}
+ workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER);
}
for (ix = 0; ix < N; ix++)
@@ -36,7 +39,7 @@ int main ()
if(ondev)
{
int g = 0;
- int w = ix % 32;
+ int w = ix % workersize;
int v = 0;
val = (g << 16) | (w << 8) | v;
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c
index c63a5d4..f9fcf37 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c
@@ -8,8 +8,10 @@ int main ()
int ix;
int ondev = 0;
int q = 0, h = 0;
+ int workersize;
-#pragma acc parallel num_workers(32) vector_length(32) copy(q) copy(ondev)
+#pragma acc parallel num_workers(32) vector_length(32) copy(q) copy(ondev) \
+ copyout(workersize)
{
int t = q;
@@ -31,6 +33,7 @@ int main ()
t += val;
}
q = t;
+ workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER);
}
for (ix = 0; ix < N; ix++)
@@ -39,7 +42,7 @@ int main ()
if(ondev)
{
int g = 0;
- int w = ix % 32;
+ int w = ix % workersize;
int v = 0;
val = (g << 16) | (w << 8) | v;
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c
index 71d3969..c360ad1 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c
@@ -8,8 +8,10 @@ int main ()
int ix;
int ondev = 0;
int t = 0, h = 0;
+ int workersize, vectorsize;
-#pragma acc parallel num_workers(32) vector_length(32) copy(ondev)
+#pragma acc parallel num_workers(32) vector_length(32) copy(ondev) \
+ copyout(workersize, vectorsize)
{
#pragma acc loop worker vector reduction (+:t)
for (unsigned ix = 0; ix < N; ix++)
@@ -28,6 +30,8 @@ int main ()
}
t += val;
}
+ workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER);
+ vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR);
}
for (ix = 0; ix < N; ix++)
@@ -36,8 +40,8 @@ int main ()
if(ondev)
{
int g = 0;
- int w = (ix / 32) % 32;
- int v = ix % 32;
+ int w = (ix / vectorsize) % workersize;
+ int v = ix % vectorsize;
val = (g << 16) | (w << 8) | v;
}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-v-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-v-1.c
index 6010cd2..8c858f3 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-v-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-v-1.c
@@ -9,11 +9,13 @@ int main ()
int ix;
int exit = 0;
int ondev = 0;
+ int vectorsize;
for (ix = 0; ix < N;ix++)
ary[ix] = -1;
-#pragma acc parallel vector_length(32) copy(ary) copy(ondev)
+#pragma acc parallel vector_length(32) copy(ary) copy(ondev) \
+ copyout(vectorsize)
{
#pragma acc loop vector
for (unsigned ix = 0; ix < N; ix++)
@@ -31,6 +33,7 @@ int main ()
else
ary[ix] = ix;
}
+ vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR);
}
for (ix = 0; ix < N; ix++)
@@ -40,7 +43,7 @@ int main ()
{
int g = 0;
int w = 0;
- int v = ix % 32;
+ int v = ix % vectorsize;
expected = (g << 16) | (w << 8) | v;
}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-w-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-w-1.c
index fa6fb91..5fe486f 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-w-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-w-1.c
@@ -9,11 +9,13 @@ int main ()
int ix;
int exit = 0;
int ondev = 0;
+ int workersize;
for (ix = 0; ix < N;ix++)
ary[ix] = -1;
-#pragma acc parallel num_workers(32) vector_length(32) copy(ary) copy(ondev)
+#pragma acc parallel num_workers(32) vector_length(32) copy(ary) copy(ondev) \
+ copyout(workersize)
{
#pragma acc loop worker
for (unsigned ix = 0; ix < N; ix++)
@@ -31,6 +33,7 @@ int main ()
else
ary[ix] = ix;
}
+ workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER);
}
for (ix = 0; ix < N; ix++)
@@ -39,7 +42,7 @@ int main ()
if(ondev)
{
int g = 0;
- int w = ix % 32;
+ int w = ix % workersize;
int v = 0;
expected = (g << 16) | (w << 8) | v;
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-wv-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-wv-1.c
index cd4cc99..fd4e4cf 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-wv-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-wv-1.c
@@ -9,11 +9,13 @@ int main ()
int ix;
int exit = 0;
int ondev = 0;
+ int workersize, vectorsize;
for (ix = 0; ix < N;ix++)
ary[ix] = -1;
-#pragma acc parallel num_workers(32) vector_length(32) copy(ary) copy(ondev)
+#pragma acc parallel num_workers(32) vector_length(32) copy(ary) copy(ondev) \
+ copyout(workersize, vectorsize)
{
#pragma acc loop worker vector
for (unsigned ix = 0; ix < N; ix++)
@@ -31,6 +33,8 @@ int main ()
else
ary[ix] = ix;
}
+ workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER);
+ vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR);
}
for (ix = 0; ix < N; ix++)
@@ -39,8 +43,8 @@ int main ()
if(ondev)
{
int g = 0;
- int w = (ix / 32) % 32;
- int v = ix % 32;
+ int w = (ix / vectorsize) % workersize;
+ int v = ix % vectorsize;
expected = (g << 16) | (w << 8) | v;
}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c
index a5edfc6..cc4c738 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c
@@ -14,7 +14,8 @@ static unsigned int __attribute__ ((optimize ("O2"))) acc_gang ()
{
if (acc_on_device ((int) acc_device_host))
return 0;
- else if (acc_on_device ((int) acc_device_nvidia))
+ else if (acc_on_device ((int) acc_device_nvidia)
+ || acc_on_device ((int) acc_device_radeon))
return __builtin_goacc_parlevel_id (GOMP_DIM_GANG);
else
__builtin_abort ();
@@ -25,7 +26,8 @@ static unsigned int __attribute__ ((optimize ("O2"))) acc_worker ()
{
if (acc_on_device ((int) acc_device_host))
return 0;
- else if (acc_on_device ((int) acc_device_nvidia))
+ else if (acc_on_device ((int) acc_device_nvidia)
+ || acc_on_device ((int) acc_device_radeon))
return __builtin_goacc_parlevel_id (GOMP_DIM_WORKER);
else
__builtin_abort ();
@@ -36,7 +38,8 @@ static unsigned int __attribute__ ((optimize ("O2"))) acc_vector ()
{
if (acc_on_device ((int) acc_device_host))
return 0;
- else if (acc_on_device ((int) acc_device_nvidia))
+ else if (acc_on_device ((int) acc_device_nvidia)
+ || acc_on_device ((int) acc_device_radeon))
return __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR);
else
__builtin_abort ();
@@ -282,6 +285,12 @@ int main ()
/* The GCC nvptx back end enforces num_workers (32). */
workers_actual = 32;
}
+ else if (acc_on_device (acc_device_radeon))
+ {
+ /* The GCC GCN back end is limited to num_workers (16).
+ Temporarily set this to 1 until multiple workers are permitted. */
+ workers_actual = 1; // 16;
+ }
else
__builtin_abort ();
#pragma acc loop worker reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
@@ -328,6 +337,11 @@ int main ()
/* We're actually executing with num_workers (32). */
/* workers_actual = 32; */
}
+ else if (acc_on_device (acc_device_radeon))
+ {
+ /* The GCC GCN back end is limited to num_workers (16). */
+ workers_actual = 16;
+ }
else
__builtin_abort ();
#pragma acc loop worker reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
@@ -367,6 +381,11 @@ int main ()
/* The GCC nvptx back end enforces vector_length (32). */
vectors_actual = 1024;
}
+ else if (acc_on_device (acc_device_radeon))
+ {
+ /* The GCC GCN back end enforces vector_length (1): autovectorize. */
+ vectors_actual = 1;
+ }
else
__builtin_abort ();
#pragma acc loop vector reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
@@ -407,6 +426,13 @@ int main ()
/* The GCC nvptx back end enforces vector_length (32). */
vectors_actual = 32;
}
+ else if (acc_on_device (acc_device_radeon))
+ {
+ /* Because of the way vectors are implemented for GCN, a vector loop
+ containing a seq routine call will not vectorize calls to that
+ routine. Hence, we'll only get one "vector". */
+ vectors_actual = 1;
+ }
else
__builtin_abort ();
#pragma acc loop vector reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
@@ -433,6 +459,9 @@ int main ()
in the following case. So, limit ourselves here. */
if (acc_get_device_type () == acc_device_nvidia)
gangs = 3;
+ /* Similar appears to be true for GCN. */
+ if (acc_get_device_type () == acc_device_radeon)
+ gangs = 3;
int gangs_actual = gangs;
#define WORKERS 3
int workers_actual = WORKERS;
@@ -459,6 +488,13 @@ int main ()
/* The GCC nvptx back end enforces vector_length (32). */
vectors_actual = 32;
}
+ else if (acc_on_device (acc_device_radeon))
+ {
+ /* Temporary setting, until multiple workers are permitted. */
+ workers_actual = 1;
+ /* See above comments about GCN vectors_actual. */
+ vectors_actual = 1;
+ }
else
__builtin_abort ();
#pragma acc loop gang reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-gwv-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-gwv-1.c
index a97e046..da13d84 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-gwv-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-gwv-1.c
@@ -30,14 +30,18 @@ int main ()
int ix;
int exit = 0;
int ondev = 0;
+ int gangsize, workersize, vectorsize;
for (ix = 0; ix < N;ix++)
ary[ix] = -1;
-#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) copy(ary) copy(ondev)
+#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) copy(ary) copy(ondev) copyout(gangsize, workersize, vectorsize)
{
ondev = acc_on_device (acc_device_not_host);
gang (ary);
+ gangsize = __builtin_goacc_parlevel_size (GOMP_DIM_GANG);
+ workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER);
+ vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR);
}
for (ix = 0; ix < N; ix++)
@@ -45,11 +49,12 @@ int main ()
int expected = ix;
if(ondev)
{
- int chunk_size = (N + 32*32*32 - 1) / (32*32*32);
+ int chunk_size = (N + gangsize * workersize * vectorsize - 1)
+ / (gangsize * workersize * vectorsize);
- int g = ix / (chunk_size * 32 * 32);
- int w = ix / 32 % 32;
- int v = ix % 32;
+ int g = ix / (chunk_size * vectorsize * workersize);
+ int w = (ix / vectorsize) % workersize;
+ int v = ix % vectorsize;
expected = (g << 16) | (w << 8) | v;
}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-v-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-v-1.c
index b1e3e3a..dd7bb6c 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-v-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-v-1.c
@@ -30,14 +30,17 @@ int main ()
int ix;
int exit = 0;
int ondev = 0;
+ int vectorsize;
for (ix = 0; ix < N;ix++)
ary[ix] = -1;
-#pragma acc parallel vector_length(32) copy(ary) copy(ondev)
+#pragma acc parallel vector_length(32) copy(ary) copy(ondev) \
+ copyout(vectorsize)
{
ondev = acc_on_device (acc_device_not_host);
vector (ary);
+ vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR);
}
for (ix = 0; ix < N; ix++)
@@ -47,7 +50,7 @@ int main ()
{
int g = 0;
int w = 0;
- int v = ix % 32;
+ int v = ix % vectorsize;
expected = (g << 16) | (w << 8) | v;
}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-w-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-w-1.c
index 81f1e03..acd9884 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-w-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-w-1.c
@@ -30,14 +30,17 @@ int main ()
int ix;
int exit = 0;
int ondev = 0;
+ int workersize;
for (ix = 0; ix < N;ix++)
ary[ix] = -1;
-#pragma acc parallel num_workers(32) vector_length(32) copy(ary) copy(ondev)
+#pragma acc parallel num_workers(32) vector_length(32) copy(ary) copy(ondev) \
+ copyout(workersize)
{
ondev = acc_on_device (acc_device_not_host);
worker (ary);
+ workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER);
}
for (ix = 0; ix < N; ix++)
@@ -46,7 +49,7 @@ int main ()
if(ondev)
{
int g = 0;
- int w = ix % 32;
+ int w = ix % workersize;
int v = 0;
expected = (g << 16) | (w << 8) | v;
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-1.c
index 23dbc1a..73696e4 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-1.c
@@ -30,14 +30,18 @@ int main ()
int ix;
int exit = 0;
int ondev = 0;
+ int workersize, vectorsize;
for (ix = 0; ix < N;ix++)
ary[ix] = -1;
-#pragma acc parallel num_workers(32) vector_length(32) copy(ary) copy(ondev)
+#pragma acc parallel num_workers(32) vector_length(32) copy(ary) copy(ondev) \
+ copyout(workersize, vectorsize)
{
ondev = acc_on_device (acc_device_not_host);
worker (ary);
+ workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER);
+ vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR);
}
for (ix = 0; ix < N; ix++)
@@ -46,8 +50,8 @@ int main ()
if(ondev)
{
int g = 0;
- int w = (ix / 32) % 32;
- int v = ix % 32;
+ int w = (ix / vectorsize) % workersize;
+ int v = ix % vectorsize;
expected = (g << 16) | (w << 8) | v;
}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c
index 8862148..609f9f6 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c
@@ -2,8 +2,13 @@
#include <openacc.h>
#include <gomp-constants.h>
+#ifdef ACC_DEVICE_TYPE_gcn
+#define NUM_WORKERS 16
+#define NUM_VECTORS 1
+#else
#define NUM_WORKERS 16
#define NUM_VECTORS 32
+#endif
#define WIDTH 64
#define HEIGHT 32