aboutsummaryrefslogtreecommitdiff
path: root/gcc/omp-low.c
diff options
context:
space:
mode:
authorNathan Sidwell <nathan@codesourcery.com>2016-02-01 16:20:13 +0000
committerNathan Sidwell <nathan@gcc.gnu.org>2016-02-01 16:20:13 +0000
commitb6adbb9faabb776ae7b70a5f5943ae883b1f76ea (patch)
tree09305a38116a380d3ddfab6c7c4b51bb2e212610 /gcc/omp-low.c
parentff86345f836c265d6bbb8d1bee5417e6f4c32ac9 (diff)
downloadgcc-b6adbb9faabb776ae7b70a5f5943ae883b1f76ea.zip
gcc-b6adbb9faabb776ae7b70a5f5943ae883b1f76ea.tar.gz
gcc-b6adbb9faabb776ae7b70a5f5943ae883b1f76ea.tar.bz2
nvptx.c (PTX_GANG_DEFAULT): New.
gcc/ * config/nvptx/nvptx.c (PTX_GANG_DEFAULT): New. (nvptx_goacc_validate_dims): Extend to handle global defaults. * target.def (OACC_VALIDATE_DIMS): Extend documentation. * doc/tm.texti: Rebuilt. * doc/invoke.texi (fopenacc-dim): Document. * lto-wrapper.c (merge_and_complain): Add OPT_fopenacc_dim_ case. (append_compiler_options): Likewise. * omp-low.c (oacc_default_dims, oacc_min_dims): New. (oacc_parse_default_dims): New. (oacc_validate_dims): Add USED arg. Select non-unity default when possible. (oacc_loop_fixed_partitions): Return mask of used partitions. (oacc_loop_auto_partitions): Emit dump info. (oacc_loop_partition): Return mask of used partitions. (execute_oacc_device_lower): Parse default dimension arg. Adjust loop partitioning and validation calls. gcc/c-family/ * c.opt (fopenacc-dim=): New option. gcc/fortran/ * lang.opt (fopenacc-dim=): New option. libgomp/ * testsuite/libgomp.oacc-c-c++-common/loop-dim-default.c: New. * testsuite/libgomp.oacc-fortran/routine-7.f90: Serialize loop. From-SVN: r233041
Diffstat (limited to 'gcc/omp-low.c')
-rw-r--r--gcc/omp-low.c152
1 files changed, 128 insertions, 24 deletions
diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index 677ad64..ec4b4b5 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -20238,13 +20238,80 @@ oacc_xform_loop (gcall *call)
gsi_replace_with_seq (&gsi, seq, true);
}
+/* Default partitioned and minimum partitioned dimensions. */
+
+static int oacc_default_dims[GOMP_DIM_MAX];
+static int oacc_min_dims[GOMP_DIM_MAX];
+
+/* Parse the default dimension parameter. This is a set of
+ :-separated optional compute dimensions. Each specified dimension
+ is a positive integer. When device type support is added, it is
+ planned to be a comma separated list of such compute dimensions,
+ with all but the first prefixed by the colon-terminated device
+ type. */
+
+static void
+oacc_parse_default_dims (const char *dims)
+{
+ int ix;
+
+ for (ix = GOMP_DIM_MAX; ix--;)
+ {
+ oacc_default_dims[ix] = -1;
+ oacc_min_dims[ix] = 1;
+ }
+
+#ifndef ACCEL_COMPILER
+ /* Cannot be overridden on the host. */
+ dims = NULL;
+#endif
+ if (dims)
+ {
+ const char *pos = dims;
+
+ for (ix = 0; *pos && ix != GOMP_DIM_MAX; ix++)
+ {
+ if (ix)
+ {
+ if (*pos != ':')
+ goto malformed;
+ pos++;
+ }
+
+ if (*pos != ':')
+ {
+ long val;
+ const char *eptr;
+
+ errno = 0;
+ val = strtol (pos, CONST_CAST (char **, &eptr), 10);
+ if (errno || val <= 0 || (unsigned)val != val)
+ goto malformed;
+ pos = eptr;
+ oacc_default_dims[ix] = (int)val;
+ }
+ }
+ if (*pos)
+ {
+ malformed:
+ error_at (UNKNOWN_LOCATION,
+ "-fopenacc-dim operand is malformed at '%s'", pos);
+ }
+ }
+
+ /* Allow the backend to validate the dimensions. */
+ targetm.goacc.validate_dims (NULL_TREE, oacc_default_dims, -1);
+ targetm.goacc.validate_dims (NULL_TREE, oacc_min_dims, -2);
+}
+
/* Validate and update the dimensions for offloaded FN. ATTRS is the
raw attribute. DIMS is an array of dimensions, which is filled in.
LEVEL is the partitioning level of a routine, or -1 for an offload
- region itself. */
+ region itself. USED is the mask of partitioned execution in the
+ function. */
static void
-oacc_validate_dims (tree fn, tree attrs, int *dims, int level)
+oacc_validate_dims (tree fn, tree attrs, int *dims, int level, unsigned used)
{
tree purpose[GOMP_DIM_MAX];
unsigned ix;
@@ -20265,11 +20332,29 @@ oacc_validate_dims (tree fn, tree attrs, int *dims, int level)
bool changed = targetm.goacc.validate_dims (fn, dims, level);
- /* Default anything left to 1. */
+ /* Default anything left to 1 or a partitioned default. */
for (ix = 0; ix != GOMP_DIM_MAX; ix++)
if (dims[ix] < 0)
{
- dims[ix] = 1;
+ /* The OpenACC spec says 'If the [num_gangs] clause is not
+ specified, an implementation-defined default will be used;
+ the default may depend on the code within the construct.'
+ (2.5.6). Thus an implementation is free to choose
+ non-unity default for a parallel region that doesn't have
+ any gang-partitioned loops. However, it appears that there
+ is a sufficient body of user code that expects non-gang
+ partitioned regions to not execute in gang-redundant mode.
+ So we (a) don't warn about the non-portability and (b) pick
+ the minimum permissible dimension size when there is no
+ partitioned execution. Otherwise we pick the global
+ default for the dimension, which the user can control. The
+ same wording and logic applies to num_workers and
+ vector_length, however the worker- or vector- single
+ execution doesn't have the same impact as gang-redundant
+ execution. (If the minimum gang-level partioning is not 1,
+ the target is probably too confusing.) */
+ dims[ix] = (used & GOMP_DIM_MASK (ix)
+ ? oacc_default_dims[ix] : oacc_min_dims[ix]);
changed = true;
}
@@ -20719,14 +20804,15 @@ oacc_loop_process (oacc_loop *loop)
/* Walk the OpenACC loop heirarchy checking and assigning the
programmer-specified partitionings. OUTER_MASK is the partitioning
- this loop is contained within. Return true if we contain an
- auto-partitionable loop. */
+ this loop is contained within. Return mask of partitioning
+ encountered. If any auto loops are discovered, set GOMP_DIM_MAX
+ bit. */
-static bool
+static unsigned
oacc_loop_fixed_partitions (oacc_loop *loop, unsigned outer_mask)
{
unsigned this_mask = loop->mask;
- bool has_auto = false;
+ unsigned mask_all = 0;
bool noisy = true;
#ifdef ACCEL_COMPILER
@@ -20760,7 +20846,7 @@ oacc_loop_fixed_partitions (oacc_loop *loop, unsigned outer_mask)
}
}
if (auto_par && (loop->flags & OLF_INDEPENDENT))
- has_auto = true;
+ mask_all |= GOMP_DIM_MASK (GOMP_DIM_MAX);
}
if (this_mask & outer_mask)
@@ -20814,16 +20900,16 @@ oacc_loop_fixed_partitions (oacc_loop *loop, unsigned outer_mask)
}
loop->mask = this_mask;
+ mask_all |= this_mask;
+
+ if (loop->child)
+ mask_all |= oacc_loop_fixed_partitions (loop->child,
+ outer_mask | this_mask);
- if (loop->child
- && oacc_loop_fixed_partitions (loop->child, outer_mask | this_mask))
- has_auto = true;
-
- if (loop->sibling
- && oacc_loop_fixed_partitions (loop->sibling, outer_mask))
- has_auto = true;
+ if (loop->sibling)
+ mask_all |= oacc_loop_fixed_partitions (loop->sibling, outer_mask);
- return has_auto;
+ return mask_all;
}
/* Walk the OpenACC loop heirarchy to assign auto-partitioned loops.
@@ -20865,6 +20951,11 @@ oacc_loop_auto_partitions (oacc_loop *loop, unsigned outer_mask)
warning_at (loop->loc, 0,
"insufficient partitioning available to parallelize loop");
+ if (dump_file)
+ fprintf (dump_file, "Auto loop %s:%d assigned %d\n",
+ LOCATION_FILE (loop->loc), LOCATION_LINE (loop->loc),
+ this_mask);
+
loop->mask = this_mask;
}
inner_mask |= loop->mask;
@@ -20876,13 +20967,19 @@ oacc_loop_auto_partitions (oacc_loop *loop, unsigned outer_mask)
}
/* Walk the OpenACC loop heirarchy to check and assign partitioning
- axes. */
+ axes. Return mask of partitioning. */
-static void
+static unsigned
oacc_loop_partition (oacc_loop *loop, unsigned outer_mask)
{
- if (oacc_loop_fixed_partitions (loop, outer_mask))
- oacc_loop_auto_partitions (loop, outer_mask);
+ unsigned mask_all = oacc_loop_fixed_partitions (loop, outer_mask);
+
+ if (mask_all & GOMP_DIM_MASK (GOMP_DIM_MAX))
+ {
+ mask_all ^= GOMP_DIM_MASK (GOMP_DIM_MAX);
+ mask_all |= oacc_loop_auto_partitions (loop, outer_mask);
+ }
+ return mask_all;
}
/* Default fork/join early expander. Delete the function calls if
@@ -20958,6 +21055,13 @@ execute_oacc_device_lower ()
/* Not an offloaded function. */
return 0;
+ /* Parse the default dim argument exactly once. */
+ if ((const void *)flag_openacc_dims != &flag_openacc_dims)
+ {
+ oacc_parse_default_dims (flag_openacc_dims);
+ flag_openacc_dims = (char *)&flag_openacc_dims;
+ }
+
/* Discover, partition and process the loops. */
oacc_loop *loops = oacc_loop_discovery ();
int fn_level = oacc_fn_attrib_level (attrs);
@@ -20969,10 +21073,10 @@ execute_oacc_device_lower ()
: "Function is routine level %d\n", fn_level);
unsigned outer_mask = fn_level >= 0 ? GOMP_DIM_MASK (fn_level) - 1 : 0;
- oacc_loop_partition (loops, outer_mask);
-
+ unsigned used_mask = oacc_loop_partition (loops, outer_mask);
int dims[GOMP_DIM_MAX];
- oacc_validate_dims (current_function_decl, attrs, dims, fn_level);
+
+ oacc_validate_dims (current_function_decl, attrs, dims, fn_level, used_mask);
if (dump_file)
{