diff options
author | Nathan Sidwell <nathan@codesourcery.com> | 2016-02-01 16:20:13 +0000 |
---|---|---|
committer | Nathan Sidwell <nathan@gcc.gnu.org> | 2016-02-01 16:20:13 +0000 |
commit | b6adbb9faabb776ae7b70a5f5943ae883b1f76ea (patch) | |
tree | 09305a38116a380d3ddfab6c7c4b51bb2e212610 /gcc | |
parent | ff86345f836c265d6bbb8d1bee5417e6f4c32ac9 (diff) | |
download | gcc-b6adbb9faabb776ae7b70a5f5943ae883b1f76ea.zip gcc-b6adbb9faabb776ae7b70a5f5943ae883b1f76ea.tar.gz gcc-b6adbb9faabb776ae7b70a5f5943ae883b1f76ea.tar.bz2 |
nvptx.c (PTX_GANG_DEFAULT): New.
gcc/
* config/nvptx/nvptx.c (PTX_GANG_DEFAULT): New.
(nvptx_goacc_validate_dims): Extend to handle global defaults.
* target.def (OACC_VALIDATE_DIMS): Extend documentation.
* doc/tm.texti: Rebuilt.
* doc/invoke.texi (fopenacc-dim): Document.
* lto-wrapper.c (merge_and_complain): Add OPT_fopenacc_dim_ case.
(append_compiler_options): Likewise.
* omp-low.c (oacc_default_dims, oacc_min_dims): New.
(oacc_parse_default_dims): New.
(oacc_validate_dims): Add USED arg. Select non-unity default when
possible.
(oacc_loop_fixed_partitions): Return mask of used partitions.
(oacc_loop_auto_partitions): Emit dump info.
(oacc_loop_partition): Return mask of used partitions.
(execute_oacc_device_lower): Parse default dimension arg. Adjust
loop partitioning and validation calls.
gcc/c-family/
* c.opt (fopenacc-dim=): New option.
gcc/fortran/
* lang.opt (fopenacc-dim=): New option.
libgomp/
* testsuite/libgomp.oacc-c-c++-common/loop-dim-default.c: New.
* testsuite/libgomp.oacc-fortran/routine-7.f90: Serialize loop.
From-SVN: r233041
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 19 | ||||
-rw-r--r-- | gcc/c-family/ChangeLog | 4 | ||||
-rw-r--r-- | gcc/c-family/c.opt | 4 | ||||
-rw-r--r-- | gcc/config/nvptx/nvptx.c | 23 | ||||
-rw-r--r-- | gcc/doc/invoke.texi | 10 | ||||
-rw-r--r-- | gcc/doc/tm.texi | 9 | ||||
-rw-r--r-- | gcc/fortran/ChangeLog | 4 | ||||
-rw-r--r-- | gcc/fortran/lang.opt | 4 | ||||
-rw-r--r-- | gcc/lto-wrapper.c | 16 | ||||
-rw-r--r-- | gcc/omp-low.c | 152 | ||||
-rw-r--r-- | gcc/target.def | 9 |
11 files changed, 213 insertions, 41 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index e17d3c5..cb6d8bd 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,22 @@ +2016-02-01 Nathan Sidwell <nathan@codesourcery.com> + + * config/nvptx/nvptx.c (PTX_GANG_DEFAULT): New. + (nvptx_goacc_validate_dims): Extend to handle global defaults. + * target.def (OACC_VALIDATE_DIMS): Extend documentation. + * doc/tm.texti: Rebuilt. + * doc/invoke.texi (fopenacc-dim): Document. + * lto-wrapper.c (merge_and_complain): Add OPT_fopenacc_dim_ case. + (append_compiler_options): Likewise. + * omp-low.c (oacc_default_dims, oacc_min_dims): New. + (oacc_parse_default_dims): New. + (oacc_validate_dims): Add USED arg. Select non-unity default when + possible. + (oacc_loop_fixed_partitions): Return mask of used partitions. + (oacc_loop_auto_partitions): Emit dump info. + (oacc_loop_partition): Return mask of used partitions. + (execute_oacc_device_lower): Parse default dimension arg. Adjust + loop partitioning and validation calls. + 2016-02-01 Richard Biener <rguenther@suse.de> PR middle-end/69556 diff --git a/gcc/c-family/ChangeLog b/gcc/c-family/ChangeLog index 8c86989..5bd7d3b 100644 --- a/gcc/c-family/ChangeLog +++ b/gcc/c-family/ChangeLog @@ -1,3 +1,7 @@ +2016-02-01 Nathan Sidwell <nathan@codesourcery.com> + + * c.opt (fopenacc-dim=): New option. + 2016-01-27 Ryan Burn <contact@rnburn.com> PR cilkplus/69267 diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt index 2b6b52d..f243744 100644 --- a/gcc/c-family/c.opt +++ b/gcc/c-family/c.opt @@ -1372,6 +1372,10 @@ fopenacc C ObjC C++ ObjC++ LTO Var(flag_openacc) Enable OpenACC. +fopenacc-dim= +C ObjC C++ ObjC++ LTO Joined Var(flag_openacc_dims) +Specify default OpenACC compute dimensions. + fopenmp C ObjC C++ ObjC++ LTO Var(flag_openmp) Enable OpenMP (implies -frecursive in Fortran). diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c index 2d4dad1e..1dadfc5 100644 --- a/gcc/config/nvptx/nvptx.c +++ b/gcc/config/nvptx/nvptx.c @@ -4122,10 +4122,12 @@ nvptx_expand_builtin (tree exp, rtx target, rtx ARG_UNUSED (subtarget), /* Define dimension sizes for known hardware. */ #define PTX_VECTOR_LENGTH 32 #define PTX_WORKER_LENGTH 32 +#define PTX_GANG_DEFAULT 32 /* Validate compute dimensions of an OpenACC offload or routine, fill in non-unity defaults. FN_LEVEL indicates the level at which a - routine might spawn a loop. It is negative for non-routines. */ + routine might spawn a loop. It is negative for non-routines. If + DECL is null, we are validating the default dimensions. */ static bool nvptx_goacc_validate_dims (tree decl, int dims[], int fn_level) @@ -4133,11 +4135,12 @@ nvptx_goacc_validate_dims (tree decl, int dims[], int fn_level) bool changed = false; /* The vector size must be 32, unless this is a SEQ routine. */ - if (fn_level <= GOMP_DIM_VECTOR + if (fn_level <= GOMP_DIM_VECTOR && fn_level >= -1 + && dims[GOMP_DIM_VECTOR] >= 0 && dims[GOMP_DIM_VECTOR] != PTX_VECTOR_LENGTH) { - if (dims[GOMP_DIM_VECTOR] >= 0 && fn_level < 0) - warning_at (DECL_SOURCE_LOCATION (decl), 0, + if (fn_level < 0 && dims[GOMP_DIM_VECTOR] >= 0) + warning_at (decl ? DECL_SOURCE_LOCATION (decl) : UNKNOWN_LOCATION, 0, dims[GOMP_DIM_VECTOR] ? "using vector_length (%d), ignoring %d" : "using vector_length (%d), ignoring runtime setting", @@ -4149,13 +4152,23 @@ nvptx_goacc_validate_dims (tree decl, int dims[], int fn_level) /* Check the num workers is not too large. */ if (dims[GOMP_DIM_WORKER] > PTX_WORKER_LENGTH) { - warning_at (DECL_SOURCE_LOCATION (decl), 0, + warning_at (decl ? DECL_SOURCE_LOCATION (decl) : UNKNOWN_LOCATION, 0, "using num_workers (%d), ignoring %d", PTX_WORKER_LENGTH, dims[GOMP_DIM_WORKER]); dims[GOMP_DIM_WORKER] = PTX_WORKER_LENGTH; changed = true; } + if (!decl) + { + dims[GOMP_DIM_VECTOR] = PTX_VECTOR_LENGTH; + if (dims[GOMP_DIM_WORKER] < 0) + dims[GOMP_DIM_WORKER] = PTX_WORKER_LENGTH; + if (dims[GOMP_DIM_GANG] < 0) + dims[GOMP_DIM_GANG] = PTX_GANG_DEFAULT; + changed = true; + } + return changed; } diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index ba0b4b2..fcc404e 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -1963,9 +1963,13 @@ Programming Interface v2.0 @w{@uref{http://www.openacc.org/}}. This option implies @option{-pthread}, and thus is only supported on targets that have support for @option{-pthread}. -Note that this is an experimental feature, incomplete, and subject to -change in future versions of GCC. See -@w{@uref{https://gcc.gnu.org/wiki/OpenACC}} for more information. +@item -fopenacc-dim=@var{geom} +@opindex fopenacc-dim +@cindex OpenACC accelerator programming +Specify default compute dimensions for parallel offload regions that do +not explicitly specify. The @var{geom} value is a triple of +':'-separated sizes, in order 'gang', 'worker' and, 'vector'. A size +can be omitted, to use a target-specific default value. @item -fopenmp @opindex fopenmp diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi index aae09bf..2392691 100644 --- a/gcc/doc/tm.texi +++ b/gcc/doc/tm.texi @@ -5767,11 +5767,12 @@ to use it. @deftypefn {Target Hook} bool TARGET_GOACC_VALIDATE_DIMS (tree @var{decl}, int *@var{dims}, int @var{fn_level}) This hook should check the launch dimensions provided for an OpenACC compute region, or routine. Defaulted values are represented as -1 -and non-constant values as 0. The @var{fn_level} is negative for the +and non-constant values as 0. The @var{fn_level} is negative for the function corresponding to the compute region. For a routine is is the -outermost level at which partitioned execution may be spawned. It -should fill in anything that needs to default to non-unity and verify -non-defaults. Diagnostics should be issued as appropriate. Return +outermost level at which partitioned execution may be spawned. The hook +should verify non-default values. If DECL is NULL, global defaults +are being validated and unspecified defaults should be filled in. +Diagnostics should be issued as appropriate. Return true, if changes have been made. You must override this hook to provide dimensions larger than 1. @end deftypefn diff --git a/gcc/fortran/ChangeLog b/gcc/fortran/ChangeLog index ccc29c1..36b4ddb 100644 --- a/gcc/fortran/ChangeLog +++ b/gcc/fortran/ChangeLog @@ -1,3 +1,7 @@ +2016-02-02 Nathan Sidwell <nathan@codesourcery.com> + + * lang.opt (fopenacc-dim=): New option. + 2016-01-31 Paul Thomas <pault@gcc.gnu.org> PR fortran/67564 diff --git a/gcc/fortran/lang.opt b/gcc/fortran/lang.opt index f368ab8..45428d8 100644 --- a/gcc/fortran/lang.opt +++ b/gcc/fortran/lang.opt @@ -578,6 +578,10 @@ fopenacc Fortran LTO ; Documented in C +fopenacc-dim= +Fortran LTO Joined Var(flag_openacc_dims) +; Documented in C + fopenmp Fortran LTO ; Documented in C diff --git a/gcc/lto-wrapper.c b/gcc/lto-wrapper.c index e636b8b..ced6f2f 100644 --- a/gcc/lto-wrapper.c +++ b/gcc/lto-wrapper.c @@ -287,12 +287,25 @@ merge_and_complain (struct cl_decoded_option **decoded_options, append_option (decoded_options, decoded_options_count, foption); /* -fmath-errno > -fno-math-errno, -fsigned-zeros > -fno-signed-zeros, - -ftrapping-math -> -fno-trapping-math, + -ftrapping-math > -fno-trapping-math, -fwrapv > -fno-wrapv. */ else if (foption->value > (*decoded_options)[j].value) (*decoded_options)[j] = *foption; break; + case OPT_fopenacc_dim_: + /* Append or check identical. */ + for (j = 0; j < *decoded_options_count; ++j) + if ((*decoded_options)[j].opt_index == foption->opt_index) + break; + if (j == *decoded_options_count) + append_option (decoded_options, decoded_options_count, foption); + else if (strcmp ((*decoded_options)[j].arg, foption->arg)) + fatal_error (input_location, + "Option %s with different values", + foption->orig_option_with_args_text); + break; + case OPT_freg_struct_return: case OPT_fpcc_struct_return: case OPT_fshort_double: @@ -506,6 +519,7 @@ append_compiler_options (obstack *argv_obstack, struct cl_decoded_option *opts, case OPT_fwrapv: case OPT_fopenmp: case OPT_fopenacc: + case OPT_fopenacc_dim_: case OPT_fcilkplus: case OPT_ftrapv: case OPT_fstrict_overflow: diff --git a/gcc/omp-low.c b/gcc/omp-low.c index 677ad64..ec4b4b5 100644 --- a/gcc/omp-low.c +++ b/gcc/omp-low.c @@ -20238,13 +20238,80 @@ oacc_xform_loop (gcall *call) gsi_replace_with_seq (&gsi, seq, true); } +/* Default partitioned and minimum partitioned dimensions. */ + +static int oacc_default_dims[GOMP_DIM_MAX]; +static int oacc_min_dims[GOMP_DIM_MAX]; + +/* Parse the default dimension parameter. This is a set of + :-separated optional compute dimensions. Each specified dimension + is a positive integer. When device type support is added, it is + planned to be a comma separated list of such compute dimensions, + with all but the first prefixed by the colon-terminated device + type. */ + +static void +oacc_parse_default_dims (const char *dims) +{ + int ix; + + for (ix = GOMP_DIM_MAX; ix--;) + { + oacc_default_dims[ix] = -1; + oacc_min_dims[ix] = 1; + } + +#ifndef ACCEL_COMPILER + /* Cannot be overridden on the host. */ + dims = NULL; +#endif + if (dims) + { + const char *pos = dims; + + for (ix = 0; *pos && ix != GOMP_DIM_MAX; ix++) + { + if (ix) + { + if (*pos != ':') + goto malformed; + pos++; + } + + if (*pos != ':') + { + long val; + const char *eptr; + + errno = 0; + val = strtol (pos, CONST_CAST (char **, &eptr), 10); + if (errno || val <= 0 || (unsigned)val != val) + goto malformed; + pos = eptr; + oacc_default_dims[ix] = (int)val; + } + } + if (*pos) + { + malformed: + error_at (UNKNOWN_LOCATION, + "-fopenacc-dim operand is malformed at '%s'", pos); + } + } + + /* Allow the backend to validate the dimensions. */ + targetm.goacc.validate_dims (NULL_TREE, oacc_default_dims, -1); + targetm.goacc.validate_dims (NULL_TREE, oacc_min_dims, -2); +} + /* Validate and update the dimensions for offloaded FN. ATTRS is the raw attribute. DIMS is an array of dimensions, which is filled in. LEVEL is the partitioning level of a routine, or -1 for an offload - region itself. */ + region itself. USED is the mask of partitioned execution in the + function. */ static void -oacc_validate_dims (tree fn, tree attrs, int *dims, int level) +oacc_validate_dims (tree fn, tree attrs, int *dims, int level, unsigned used) { tree purpose[GOMP_DIM_MAX]; unsigned ix; @@ -20265,11 +20332,29 @@ oacc_validate_dims (tree fn, tree attrs, int *dims, int level) bool changed = targetm.goacc.validate_dims (fn, dims, level); - /* Default anything left to 1. */ + /* Default anything left to 1 or a partitioned default. */ for (ix = 0; ix != GOMP_DIM_MAX; ix++) if (dims[ix] < 0) { - dims[ix] = 1; + /* The OpenACC spec says 'If the [num_gangs] clause is not + specified, an implementation-defined default will be used; + the default may depend on the code within the construct.' + (2.5.6). Thus an implementation is free to choose + non-unity default for a parallel region that doesn't have + any gang-partitioned loops. However, it appears that there + is a sufficient body of user code that expects non-gang + partitioned regions to not execute in gang-redundant mode. + So we (a) don't warn about the non-portability and (b) pick + the minimum permissible dimension size when there is no + partitioned execution. Otherwise we pick the global + default for the dimension, which the user can control. The + same wording and logic applies to num_workers and + vector_length, however the worker- or vector- single + execution doesn't have the same impact as gang-redundant + execution. (If the minimum gang-level partioning is not 1, + the target is probably too confusing.) */ + dims[ix] = (used & GOMP_DIM_MASK (ix) + ? oacc_default_dims[ix] : oacc_min_dims[ix]); changed = true; } @@ -20719,14 +20804,15 @@ oacc_loop_process (oacc_loop *loop) /* Walk the OpenACC loop heirarchy checking and assigning the programmer-specified partitionings. OUTER_MASK is the partitioning - this loop is contained within. Return true if we contain an - auto-partitionable loop. */ + this loop is contained within. Return mask of partitioning + encountered. If any auto loops are discovered, set GOMP_DIM_MAX + bit. */ -static bool +static unsigned oacc_loop_fixed_partitions (oacc_loop *loop, unsigned outer_mask) { unsigned this_mask = loop->mask; - bool has_auto = false; + unsigned mask_all = 0; bool noisy = true; #ifdef ACCEL_COMPILER @@ -20760,7 +20846,7 @@ oacc_loop_fixed_partitions (oacc_loop *loop, unsigned outer_mask) } } if (auto_par && (loop->flags & OLF_INDEPENDENT)) - has_auto = true; + mask_all |= GOMP_DIM_MASK (GOMP_DIM_MAX); } if (this_mask & outer_mask) @@ -20814,16 +20900,16 @@ oacc_loop_fixed_partitions (oacc_loop *loop, unsigned outer_mask) } loop->mask = this_mask; + mask_all |= this_mask; + + if (loop->child) + mask_all |= oacc_loop_fixed_partitions (loop->child, + outer_mask | this_mask); - if (loop->child - && oacc_loop_fixed_partitions (loop->child, outer_mask | this_mask)) - has_auto = true; - - if (loop->sibling - && oacc_loop_fixed_partitions (loop->sibling, outer_mask)) - has_auto = true; + if (loop->sibling) + mask_all |= oacc_loop_fixed_partitions (loop->sibling, outer_mask); - return has_auto; + return mask_all; } /* Walk the OpenACC loop heirarchy to assign auto-partitioned loops. @@ -20865,6 +20951,11 @@ oacc_loop_auto_partitions (oacc_loop *loop, unsigned outer_mask) warning_at (loop->loc, 0, "insufficient partitioning available to parallelize loop"); + if (dump_file) + fprintf (dump_file, "Auto loop %s:%d assigned %d\n", + LOCATION_FILE (loop->loc), LOCATION_LINE (loop->loc), + this_mask); + loop->mask = this_mask; } inner_mask |= loop->mask; @@ -20876,13 +20967,19 @@ oacc_loop_auto_partitions (oacc_loop *loop, unsigned outer_mask) } /* Walk the OpenACC loop heirarchy to check and assign partitioning - axes. */ + axes. Return mask of partitioning. */ -static void +static unsigned oacc_loop_partition (oacc_loop *loop, unsigned outer_mask) { - if (oacc_loop_fixed_partitions (loop, outer_mask)) - oacc_loop_auto_partitions (loop, outer_mask); + unsigned mask_all = oacc_loop_fixed_partitions (loop, outer_mask); + + if (mask_all & GOMP_DIM_MASK (GOMP_DIM_MAX)) + { + mask_all ^= GOMP_DIM_MASK (GOMP_DIM_MAX); + mask_all |= oacc_loop_auto_partitions (loop, outer_mask); + } + return mask_all; } /* Default fork/join early expander. Delete the function calls if @@ -20958,6 +21055,13 @@ execute_oacc_device_lower () /* Not an offloaded function. */ return 0; + /* Parse the default dim argument exactly once. */ + if ((const void *)flag_openacc_dims != &flag_openacc_dims) + { + oacc_parse_default_dims (flag_openacc_dims); + flag_openacc_dims = (char *)&flag_openacc_dims; + } + /* Discover, partition and process the loops. */ oacc_loop *loops = oacc_loop_discovery (); int fn_level = oacc_fn_attrib_level (attrs); @@ -20969,10 +21073,10 @@ execute_oacc_device_lower () : "Function is routine level %d\n", fn_level); unsigned outer_mask = fn_level >= 0 ? GOMP_DIM_MASK (fn_level) - 1 : 0; - oacc_loop_partition (loops, outer_mask); - + unsigned used_mask = oacc_loop_partition (loops, outer_mask); int dims[GOMP_DIM_MAX]; - oacc_validate_dims (current_function_decl, attrs, dims, fn_level); + + oacc_validate_dims (current_function_decl, attrs, dims, fn_level, used_mask); if (dump_file) { diff --git a/gcc/target.def b/gcc/target.def index d60319e..fa0af67 100644 --- a/gcc/target.def +++ b/gcc/target.def @@ -1648,11 +1648,12 @@ DEFHOOK (validate_dims, "This hook should check the launch dimensions provided for an OpenACC\n\ compute region, or routine. Defaulted values are represented as -1\n\ -and non-constant values as 0. The @var{fn_level} is negative for the\n\ +and non-constant values as 0. The @var{fn_level} is negative for the\n\ function corresponding to the compute region. For a routine is is the\n\ -outermost level at which partitioned execution may be spawned. It\n\ -should fill in anything that needs to default to non-unity and verify\n\ -non-defaults. Diagnostics should be issued as appropriate. Return\n\ +outermost level at which partitioned execution may be spawned. The hook\n\ +should verify non-default values. If DECL is NULL, global defaults\n\ +are being validated and unspecified defaults should be filled in.\n\ +Diagnostics should be issued as appropriate. Return\n\ true, if changes have been made. You must override this hook to\n\ provide dimensions larger than 1.", bool, (tree decl, int *dims, int fn_level), |