aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNathan Sidwell <nathan@codesourcery.com>2016-02-01 16:20:13 +0000
committerNathan Sidwell <nathan@gcc.gnu.org>2016-02-01 16:20:13 +0000
commitb6adbb9faabb776ae7b70a5f5943ae883b1f76ea (patch)
tree09305a38116a380d3ddfab6c7c4b51bb2e212610
parentff86345f836c265d6bbb8d1bee5417e6f4c32ac9 (diff)
downloadgcc-b6adbb9faabb776ae7b70a5f5943ae883b1f76ea.zip
gcc-b6adbb9faabb776ae7b70a5f5943ae883b1f76ea.tar.gz
gcc-b6adbb9faabb776ae7b70a5f5943ae883b1f76ea.tar.bz2
nvptx.c (PTX_GANG_DEFAULT): New.
gcc/ * config/nvptx/nvptx.c (PTX_GANG_DEFAULT): New. (nvptx_goacc_validate_dims): Extend to handle global defaults. * target.def (OACC_VALIDATE_DIMS): Extend documentation. * doc/tm.texti: Rebuilt. * doc/invoke.texi (fopenacc-dim): Document. * lto-wrapper.c (merge_and_complain): Add OPT_fopenacc_dim_ case. (append_compiler_options): Likewise. * omp-low.c (oacc_default_dims, oacc_min_dims): New. (oacc_parse_default_dims): New. (oacc_validate_dims): Add USED arg. Select non-unity default when possible. (oacc_loop_fixed_partitions): Return mask of used partitions. (oacc_loop_auto_partitions): Emit dump info. (oacc_loop_partition): Return mask of used partitions. (execute_oacc_device_lower): Parse default dimension arg. Adjust loop partitioning and validation calls. gcc/c-family/ * c.opt (fopenacc-dim=): New option. gcc/fortran/ * lang.opt (fopenacc-dim=): New option. libgomp/ * testsuite/libgomp.oacc-c-c++-common/loop-dim-default.c: New. * testsuite/libgomp.oacc-fortran/routine-7.f90: Serialize loop. From-SVN: r233041
-rw-r--r--gcc/ChangeLog19
-rw-r--r--gcc/c-family/ChangeLog4
-rw-r--r--gcc/c-family/c.opt4
-rw-r--r--gcc/config/nvptx/nvptx.c23
-rw-r--r--gcc/doc/invoke.texi10
-rw-r--r--gcc/doc/tm.texi9
-rw-r--r--gcc/fortran/ChangeLog4
-rw-r--r--gcc/fortran/lang.opt4
-rw-r--r--gcc/lto-wrapper.c16
-rw-r--r--gcc/omp-low.c152
-rw-r--r--gcc/target.def9
-rw-r--r--libgomp/ChangeLog5
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/loop-dim-default.c133
-rw-r--r--libgomp/testsuite/libgomp.oacc-fortran/routine-7.f904
14 files changed, 353 insertions, 43 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index e17d3c5..cb6d8bd 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,22 @@
+2016-02-01 Nathan Sidwell <nathan@codesourcery.com>
+
+ * config/nvptx/nvptx.c (PTX_GANG_DEFAULT): New.
+ (nvptx_goacc_validate_dims): Extend to handle global defaults.
+ * target.def (OACC_VALIDATE_DIMS): Extend documentation.
+ * doc/tm.texti: Rebuilt.
+ * doc/invoke.texi (fopenacc-dim): Document.
+ * lto-wrapper.c (merge_and_complain): Add OPT_fopenacc_dim_ case.
+ (append_compiler_options): Likewise.
+ * omp-low.c (oacc_default_dims, oacc_min_dims): New.
+ (oacc_parse_default_dims): New.
+ (oacc_validate_dims): Add USED arg. Select non-unity default when
+ possible.
+ (oacc_loop_fixed_partitions): Return mask of used partitions.
+ (oacc_loop_auto_partitions): Emit dump info.
+ (oacc_loop_partition): Return mask of used partitions.
+ (execute_oacc_device_lower): Parse default dimension arg. Adjust
+ loop partitioning and validation calls.
+
2016-02-01 Richard Biener <rguenther@suse.de>
PR middle-end/69556
diff --git a/gcc/c-family/ChangeLog b/gcc/c-family/ChangeLog
index 8c86989..5bd7d3b 100644
--- a/gcc/c-family/ChangeLog
+++ b/gcc/c-family/ChangeLog
@@ -1,3 +1,7 @@
+2016-02-01 Nathan Sidwell <nathan@codesourcery.com>
+
+ * c.opt (fopenacc-dim=): New option.
+
2016-01-27 Ryan Burn <contact@rnburn.com>
PR cilkplus/69267
diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt
index 2b6b52d..f243744 100644
--- a/gcc/c-family/c.opt
+++ b/gcc/c-family/c.opt
@@ -1372,6 +1372,10 @@ fopenacc
C ObjC C++ ObjC++ LTO Var(flag_openacc)
Enable OpenACC.
+fopenacc-dim=
+C ObjC C++ ObjC++ LTO Joined Var(flag_openacc_dims)
+Specify default OpenACC compute dimensions.
+
fopenmp
C ObjC C++ ObjC++ LTO Var(flag_openmp)
Enable OpenMP (implies -frecursive in Fortran).
diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c
index 2d4dad1e..1dadfc5 100644
--- a/gcc/config/nvptx/nvptx.c
+++ b/gcc/config/nvptx/nvptx.c
@@ -4122,10 +4122,12 @@ nvptx_expand_builtin (tree exp, rtx target, rtx ARG_UNUSED (subtarget),
/* Define dimension sizes for known hardware. */
#define PTX_VECTOR_LENGTH 32
#define PTX_WORKER_LENGTH 32
+#define PTX_GANG_DEFAULT 32
/* Validate compute dimensions of an OpenACC offload or routine, fill
in non-unity defaults. FN_LEVEL indicates the level at which a
- routine might spawn a loop. It is negative for non-routines. */
+ routine might spawn a loop. It is negative for non-routines. If
+ DECL is null, we are validating the default dimensions. */
static bool
nvptx_goacc_validate_dims (tree decl, int dims[], int fn_level)
@@ -4133,11 +4135,12 @@ nvptx_goacc_validate_dims (tree decl, int dims[], int fn_level)
bool changed = false;
/* The vector size must be 32, unless this is a SEQ routine. */
- if (fn_level <= GOMP_DIM_VECTOR
+ if (fn_level <= GOMP_DIM_VECTOR && fn_level >= -1
+ && dims[GOMP_DIM_VECTOR] >= 0
&& dims[GOMP_DIM_VECTOR] != PTX_VECTOR_LENGTH)
{
- if (dims[GOMP_DIM_VECTOR] >= 0 && fn_level < 0)
- warning_at (DECL_SOURCE_LOCATION (decl), 0,
+ if (fn_level < 0 && dims[GOMP_DIM_VECTOR] >= 0)
+ warning_at (decl ? DECL_SOURCE_LOCATION (decl) : UNKNOWN_LOCATION, 0,
dims[GOMP_DIM_VECTOR]
? "using vector_length (%d), ignoring %d"
: "using vector_length (%d), ignoring runtime setting",
@@ -4149,13 +4152,23 @@ nvptx_goacc_validate_dims (tree decl, int dims[], int fn_level)
/* Check the num workers is not too large. */
if (dims[GOMP_DIM_WORKER] > PTX_WORKER_LENGTH)
{
- warning_at (DECL_SOURCE_LOCATION (decl), 0,
+ warning_at (decl ? DECL_SOURCE_LOCATION (decl) : UNKNOWN_LOCATION, 0,
"using num_workers (%d), ignoring %d",
PTX_WORKER_LENGTH, dims[GOMP_DIM_WORKER]);
dims[GOMP_DIM_WORKER] = PTX_WORKER_LENGTH;
changed = true;
}
+ if (!decl)
+ {
+ dims[GOMP_DIM_VECTOR] = PTX_VECTOR_LENGTH;
+ if (dims[GOMP_DIM_WORKER] < 0)
+ dims[GOMP_DIM_WORKER] = PTX_WORKER_LENGTH;
+ if (dims[GOMP_DIM_GANG] < 0)
+ dims[GOMP_DIM_GANG] = PTX_GANG_DEFAULT;
+ changed = true;
+ }
+
return changed;
}
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index ba0b4b2..fcc404e 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -1963,9 +1963,13 @@ Programming Interface v2.0 @w{@uref{http://www.openacc.org/}}. This option
implies @option{-pthread}, and thus is only supported on targets that
have support for @option{-pthread}.
-Note that this is an experimental feature, incomplete, and subject to
-change in future versions of GCC. See
-@w{@uref{https://gcc.gnu.org/wiki/OpenACC}} for more information.
+@item -fopenacc-dim=@var{geom}
+@opindex fopenacc-dim
+@cindex OpenACC accelerator programming
+Specify default compute dimensions for parallel offload regions that do
+not explicitly specify. The @var{geom} value is a triple of
+':'-separated sizes, in order 'gang', 'worker' and, 'vector'. A size
+can be omitted, to use a target-specific default value.
@item -fopenmp
@opindex fopenmp
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index aae09bf..2392691 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -5767,11 +5767,12 @@ to use it.
@deftypefn {Target Hook} bool TARGET_GOACC_VALIDATE_DIMS (tree @var{decl}, int *@var{dims}, int @var{fn_level})
This hook should check the launch dimensions provided for an OpenACC
compute region, or routine. Defaulted values are represented as -1
-and non-constant values as 0. The @var{fn_level} is negative for the
+and non-constant values as 0. The @var{fn_level} is negative for the
function corresponding to the compute region. For a routine is is the
-outermost level at which partitioned execution may be spawned. It
-should fill in anything that needs to default to non-unity and verify
-non-defaults. Diagnostics should be issued as appropriate. Return
+outermost level at which partitioned execution may be spawned. The hook
+should verify non-default values. If DECL is NULL, global defaults
+are being validated and unspecified defaults should be filled in.
+Diagnostics should be issued as appropriate. Return
true, if changes have been made. You must override this hook to
provide dimensions larger than 1.
@end deftypefn
diff --git a/gcc/fortran/ChangeLog b/gcc/fortran/ChangeLog
index ccc29c1..36b4ddb 100644
--- a/gcc/fortran/ChangeLog
+++ b/gcc/fortran/ChangeLog
@@ -1,3 +1,7 @@
+2016-02-02 Nathan Sidwell <nathan@codesourcery.com>
+
+ * lang.opt (fopenacc-dim=): New option.
+
2016-01-31 Paul Thomas <pault@gcc.gnu.org>
PR fortran/67564
diff --git a/gcc/fortran/lang.opt b/gcc/fortran/lang.opt
index f368ab8..45428d8 100644
--- a/gcc/fortran/lang.opt
+++ b/gcc/fortran/lang.opt
@@ -578,6 +578,10 @@ fopenacc
Fortran LTO
; Documented in C
+fopenacc-dim=
+Fortran LTO Joined Var(flag_openacc_dims)
+; Documented in C
+
fopenmp
Fortran LTO
; Documented in C
diff --git a/gcc/lto-wrapper.c b/gcc/lto-wrapper.c
index e636b8b..ced6f2f 100644
--- a/gcc/lto-wrapper.c
+++ b/gcc/lto-wrapper.c
@@ -287,12 +287,25 @@ merge_and_complain (struct cl_decoded_option **decoded_options,
append_option (decoded_options, decoded_options_count, foption);
/* -fmath-errno > -fno-math-errno,
-fsigned-zeros > -fno-signed-zeros,
- -ftrapping-math -> -fno-trapping-math,
+ -ftrapping-math > -fno-trapping-math,
-fwrapv > -fno-wrapv. */
else if (foption->value > (*decoded_options)[j].value)
(*decoded_options)[j] = *foption;
break;
+ case OPT_fopenacc_dim_:
+ /* Append or check identical. */
+ for (j = 0; j < *decoded_options_count; ++j)
+ if ((*decoded_options)[j].opt_index == foption->opt_index)
+ break;
+ if (j == *decoded_options_count)
+ append_option (decoded_options, decoded_options_count, foption);
+ else if (strcmp ((*decoded_options)[j].arg, foption->arg))
+ fatal_error (input_location,
+ "Option %s with different values",
+ foption->orig_option_with_args_text);
+ break;
+
case OPT_freg_struct_return:
case OPT_fpcc_struct_return:
case OPT_fshort_double:
@@ -506,6 +519,7 @@ append_compiler_options (obstack *argv_obstack, struct cl_decoded_option *opts,
case OPT_fwrapv:
case OPT_fopenmp:
case OPT_fopenacc:
+ case OPT_fopenacc_dim_:
case OPT_fcilkplus:
case OPT_ftrapv:
case OPT_fstrict_overflow:
diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index 677ad64..ec4b4b5 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -20238,13 +20238,80 @@ oacc_xform_loop (gcall *call)
gsi_replace_with_seq (&gsi, seq, true);
}
+/* Default partitioned and minimum partitioned dimensions. */
+
+static int oacc_default_dims[GOMP_DIM_MAX];
+static int oacc_min_dims[GOMP_DIM_MAX];
+
+/* Parse the default dimension parameter. This is a set of
+ :-separated optional compute dimensions. Each specified dimension
+ is a positive integer. When device type support is added, it is
+ planned to be a comma separated list of such compute dimensions,
+ with all but the first prefixed by the colon-terminated device
+ type. */
+
+static void
+oacc_parse_default_dims (const char *dims)
+{
+ int ix;
+
+ for (ix = GOMP_DIM_MAX; ix--;)
+ {
+ oacc_default_dims[ix] = -1;
+ oacc_min_dims[ix] = 1;
+ }
+
+#ifndef ACCEL_COMPILER
+ /* Cannot be overridden on the host. */
+ dims = NULL;
+#endif
+ if (dims)
+ {
+ const char *pos = dims;
+
+ for (ix = 0; *pos && ix != GOMP_DIM_MAX; ix++)
+ {
+ if (ix)
+ {
+ if (*pos != ':')
+ goto malformed;
+ pos++;
+ }
+
+ if (*pos != ':')
+ {
+ long val;
+ const char *eptr;
+
+ errno = 0;
+ val = strtol (pos, CONST_CAST (char **, &eptr), 10);
+ if (errno || val <= 0 || (unsigned)val != val)
+ goto malformed;
+ pos = eptr;
+ oacc_default_dims[ix] = (int)val;
+ }
+ }
+ if (*pos)
+ {
+ malformed:
+ error_at (UNKNOWN_LOCATION,
+ "-fopenacc-dim operand is malformed at '%s'", pos);
+ }
+ }
+
+ /* Allow the backend to validate the dimensions. */
+ targetm.goacc.validate_dims (NULL_TREE, oacc_default_dims, -1);
+ targetm.goacc.validate_dims (NULL_TREE, oacc_min_dims, -2);
+}
+
/* Validate and update the dimensions for offloaded FN. ATTRS is the
raw attribute. DIMS is an array of dimensions, which is filled in.
LEVEL is the partitioning level of a routine, or -1 for an offload
- region itself. */
+ region itself. USED is the mask of partitioned execution in the
+ function. */
static void
-oacc_validate_dims (tree fn, tree attrs, int *dims, int level)
+oacc_validate_dims (tree fn, tree attrs, int *dims, int level, unsigned used)
{
tree purpose[GOMP_DIM_MAX];
unsigned ix;
@@ -20265,11 +20332,29 @@ oacc_validate_dims (tree fn, tree attrs, int *dims, int level)
bool changed = targetm.goacc.validate_dims (fn, dims, level);
- /* Default anything left to 1. */
+ /* Default anything left to 1 or a partitioned default. */
for (ix = 0; ix != GOMP_DIM_MAX; ix++)
if (dims[ix] < 0)
{
- dims[ix] = 1;
+ /* The OpenACC spec says 'If the [num_gangs] clause is not
+ specified, an implementation-defined default will be used;
+ the default may depend on the code within the construct.'
+ (2.5.6). Thus an implementation is free to choose
+ non-unity default for a parallel region that doesn't have
+ any gang-partitioned loops. However, it appears that there
+ is a sufficient body of user code that expects non-gang
+ partitioned regions to not execute in gang-redundant mode.
+ So we (a) don't warn about the non-portability and (b) pick
+ the minimum permissible dimension size when there is no
+ partitioned execution. Otherwise we pick the global
+ default for the dimension, which the user can control. The
+ same wording and logic applies to num_workers and
+ vector_length, however the worker- or vector- single
+ execution doesn't have the same impact as gang-redundant
+ execution. (If the minimum gang-level partioning is not 1,
+ the target is probably too confusing.) */
+ dims[ix] = (used & GOMP_DIM_MASK (ix)
+ ? oacc_default_dims[ix] : oacc_min_dims[ix]);
changed = true;
}
@@ -20719,14 +20804,15 @@ oacc_loop_process (oacc_loop *loop)
/* Walk the OpenACC loop heirarchy checking and assigning the
programmer-specified partitionings. OUTER_MASK is the partitioning
- this loop is contained within. Return true if we contain an
- auto-partitionable loop. */
+ this loop is contained within. Return mask of partitioning
+ encountered. If any auto loops are discovered, set GOMP_DIM_MAX
+ bit. */
-static bool
+static unsigned
oacc_loop_fixed_partitions (oacc_loop *loop, unsigned outer_mask)
{
unsigned this_mask = loop->mask;
- bool has_auto = false;
+ unsigned mask_all = 0;
bool noisy = true;
#ifdef ACCEL_COMPILER
@@ -20760,7 +20846,7 @@ oacc_loop_fixed_partitions (oacc_loop *loop, unsigned outer_mask)
}
}
if (auto_par && (loop->flags & OLF_INDEPENDENT))
- has_auto = true;
+ mask_all |= GOMP_DIM_MASK (GOMP_DIM_MAX);
}
if (this_mask & outer_mask)
@@ -20814,16 +20900,16 @@ oacc_loop_fixed_partitions (oacc_loop *loop, unsigned outer_mask)
}
loop->mask = this_mask;
+ mask_all |= this_mask;
+
+ if (loop->child)
+ mask_all |= oacc_loop_fixed_partitions (loop->child,
+ outer_mask | this_mask);
- if (loop->child
- && oacc_loop_fixed_partitions (loop->child, outer_mask | this_mask))
- has_auto = true;
-
- if (loop->sibling
- && oacc_loop_fixed_partitions (loop->sibling, outer_mask))
- has_auto = true;
+ if (loop->sibling)
+ mask_all |= oacc_loop_fixed_partitions (loop->sibling, outer_mask);
- return has_auto;
+ return mask_all;
}
/* Walk the OpenACC loop heirarchy to assign auto-partitioned loops.
@@ -20865,6 +20951,11 @@ oacc_loop_auto_partitions (oacc_loop *loop, unsigned outer_mask)
warning_at (loop->loc, 0,
"insufficient partitioning available to parallelize loop");
+ if (dump_file)
+ fprintf (dump_file, "Auto loop %s:%d assigned %d\n",
+ LOCATION_FILE (loop->loc), LOCATION_LINE (loop->loc),
+ this_mask);
+
loop->mask = this_mask;
}
inner_mask |= loop->mask;
@@ -20876,13 +20967,19 @@ oacc_loop_auto_partitions (oacc_loop *loop, unsigned outer_mask)
}
/* Walk the OpenACC loop heirarchy to check and assign partitioning
- axes. */
+ axes. Return mask of partitioning. */
-static void
+static unsigned
oacc_loop_partition (oacc_loop *loop, unsigned outer_mask)
{
- if (oacc_loop_fixed_partitions (loop, outer_mask))
- oacc_loop_auto_partitions (loop, outer_mask);
+ unsigned mask_all = oacc_loop_fixed_partitions (loop, outer_mask);
+
+ if (mask_all & GOMP_DIM_MASK (GOMP_DIM_MAX))
+ {
+ mask_all ^= GOMP_DIM_MASK (GOMP_DIM_MAX);
+ mask_all |= oacc_loop_auto_partitions (loop, outer_mask);
+ }
+ return mask_all;
}
/* Default fork/join early expander. Delete the function calls if
@@ -20958,6 +21055,13 @@ execute_oacc_device_lower ()
/* Not an offloaded function. */
return 0;
+ /* Parse the default dim argument exactly once. */
+ if ((const void *)flag_openacc_dims != &flag_openacc_dims)
+ {
+ oacc_parse_default_dims (flag_openacc_dims);
+ flag_openacc_dims = (char *)&flag_openacc_dims;
+ }
+
/* Discover, partition and process the loops. */
oacc_loop *loops = oacc_loop_discovery ();
int fn_level = oacc_fn_attrib_level (attrs);
@@ -20969,10 +21073,10 @@ execute_oacc_device_lower ()
: "Function is routine level %d\n", fn_level);
unsigned outer_mask = fn_level >= 0 ? GOMP_DIM_MASK (fn_level) - 1 : 0;
- oacc_loop_partition (loops, outer_mask);
-
+ unsigned used_mask = oacc_loop_partition (loops, outer_mask);
int dims[GOMP_DIM_MAX];
- oacc_validate_dims (current_function_decl, attrs, dims, fn_level);
+
+ oacc_validate_dims (current_function_decl, attrs, dims, fn_level, used_mask);
if (dump_file)
{
diff --git a/gcc/target.def b/gcc/target.def
index d60319e4..fa0af67 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -1648,11 +1648,12 @@ DEFHOOK
(validate_dims,
"This hook should check the launch dimensions provided for an OpenACC\n\
compute region, or routine. Defaulted values are represented as -1\n\
-and non-constant values as 0. The @var{fn_level} is negative for the\n\
+and non-constant values as 0. The @var{fn_level} is negative for the\n\
function corresponding to the compute region. For a routine is is the\n\
-outermost level at which partitioned execution may be spawned. It\n\
-should fill in anything that needs to default to non-unity and verify\n\
-non-defaults. Diagnostics should be issued as appropriate. Return\n\
+outermost level at which partitioned execution may be spawned. The hook\n\
+should verify non-default values. If DECL is NULL, global defaults\n\
+are being validated and unspecified defaults should be filled in.\n\
+Diagnostics should be issued as appropriate. Return\n\
true, if changes have been made. You must override this hook to\n\
provide dimensions larger than 1.",
bool, (tree decl, int *dims, int fn_level),
diff --git a/libgomp/ChangeLog b/libgomp/ChangeLog
index 9c09ac5..c94fa27 100644
--- a/libgomp/ChangeLog
+++ b/libgomp/ChangeLog
@@ -1,3 +1,8 @@
+2016-02-01 Nathan Sidwell <nathan@codesourcery.com>
+
+ * testsuite/libgomp.oacc-c-c++-common/loop-dim-default.c: New.
+ * testsuite/libgomp.oacc-fortran/routine-7.f90: Serialize loop.
+
2016-01-26 Tom de Vries <tom@codesourcery.com>
PR tree-optimization/69110
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-dim-default.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-dim-default.c
new file mode 100644
index 0000000..36b882f
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-dim-default.c
@@ -0,0 +1,133 @@
+
+/* { dg-additional-options "-O2 -fopenacc-dim=16:16" } */
+
+#include <openacc.h>
+#include <alloca.h>
+#include <string.h>
+#include <stdio.h>
+
+#pragma acc routine
+static int __attribute__ ((noinline)) coord ()
+{
+ int res = 0;
+
+ if (acc_on_device (acc_device_nvidia))
+ {
+ int g = 0, w = 0, v = 0;
+
+ __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g));
+ __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w));
+ __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v));
+ res = (1 << 24) | (g << 16) | (w << 8) | v;
+ }
+ return res;
+}
+
+
+int check (const int *ary, int size, int gp, int wp, int vp)
+{
+ int exit = 0;
+ int ix;
+ int *gangs = (int *)alloca (gp * sizeof (int));
+ int *workers = (int *)alloca (wp * sizeof (int));
+ int *vectors = (int *)alloca (vp * sizeof (int));
+ int offloaded = 0;
+
+ memset (gangs, 0, gp * sizeof (int));
+ memset (workers, 0, wp * sizeof (int));
+ memset (vectors, 0, vp * sizeof (int));
+
+ for (ix = 0; ix < size; ix++)
+ {
+ int g = (ary[ix] >> 16) & 0xff;
+ int w = (ary[ix] >> 8) & 0xff;
+ int v = (ary[ix] >> 0) & 0xff;
+
+ if (g >= gp || w >= wp || v >= vp)
+ {
+ printf ("unexpected cpu %#x used\n", ary[ix]);
+ exit = 1;
+ }
+ else
+ {
+ vectors[v]++;
+ workers[w]++;
+ gangs[g]++;
+ }
+ offloaded += ary[ix] >> 24;
+ }
+
+ if (!offloaded)
+ return 0;
+
+ if (offloaded != size)
+ {
+ printf ("offloaded %d times, expected %d\n", offloaded, size);
+ return 1;
+ }
+
+ for (ix = 0; ix < gp; ix++)
+ if (gangs[ix] != gangs[0])
+ {
+ printf ("gang %d not used %d times\n", ix, gangs[0]);
+ exit = 1;
+ }
+
+ for (ix = 0; ix < wp; ix++)
+ if (workers[ix] != workers[0])
+ {
+ printf ("worker %d not used %d times\n", ix, workers[0]);
+ exit = 1;
+ }
+
+ for (ix = 0; ix < vp; ix++)
+ if (vectors[ix] != vectors[0])
+ {
+ printf ("vector %d not used %d times\n", ix, vectors[0]);
+ exit = 1;
+ }
+
+ return exit;
+}
+
+#define N (32 *32*32)
+
+int test_1 (int gp, int wp, int vp)
+{
+ int ary[N];
+ int exit = 0;
+
+#pragma acc parallel copyout (ary)
+ {
+#pragma acc loop gang (static:1)
+ for (int ix = 0; ix < N; ix++)
+ ary[ix] = coord ();
+ }
+
+ exit |= check (ary, N, gp, 1, 1);
+
+#pragma acc parallel copyout (ary)
+ {
+#pragma acc loop worker
+ for (int ix = 0; ix < N; ix++)
+ ary[ix] = coord ();
+ }
+
+ exit |= check (ary, N, 1, wp, 1);
+
+#pragma acc parallel copyout (ary)
+ {
+#pragma acc loop vector
+ for (int ix = 0; ix < N; ix++)
+ ary[ix] = coord ();
+ }
+
+ exit |= check (ary, N, 1, 1, vp);
+
+ return exit;
+}
+
+int main ()
+{
+ return test_1 (16, 16, 32);
+}
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/routine-7.f90 b/libgomp/testsuite/libgomp.oacc-fortran/routine-7.f90
index 7fc8169..200188e 100644
--- a/libgomp/testsuite/libgomp.oacc-fortran/routine-7.f90
+++ b/libgomp/testsuite/libgomp.oacc-fortran/routine-7.f90
@@ -41,7 +41,7 @@ program main
end do
!$acc parallel copy (b)
- !$acc loop
+ !$acc loop seq
do i = 1, N
call worker (b)
end do
@@ -56,7 +56,7 @@ program main
end do
!$acc parallel copy (a)
- !$acc loop
+ !$acc loop seq
do i = 1, N
call vector (a)
end do