aboutsummaryrefslogtreecommitdiff
path: root/gcc/omp-low.c
diff options
context:
space:
mode:
authorJulian Brown <julian@codesourcery.com>2021-02-26 04:34:49 -0800
committerThomas Schwinge <thomas@codesourcery.com>2021-05-21 18:58:07 +0200
commit29a2f51806c5b30e17a8d0e9ba7915a3c53c34ff (patch)
tree0e18184b3d50407c5100b2dcc182134227ceb053 /gcc/omp-low.c
parent782e57f2c0900f3c3bbaec4b367568b6d05236b8 (diff)
downloadgcc-29a2f51806c5b30e17a8d0e9ba7915a3c53c34ff.zip
gcc-29a2f51806c5b30e17a8d0e9ba7915a3c53c34ff.tar.gz
gcc-29a2f51806c5b30e17a8d0e9ba7915a3c53c34ff.tar.bz2
openacc: Add support for gang local storage allocation in shared memory [PR90115]
This patch implements a method to track the "private-ness" of OpenACC variables declared in offload regions in gang-partitioned, worker-partitioned or vector-partitioned modes. Variables declared implicitly in scoped blocks and those declared "private" on enclosing directives (e.g. "acc parallel") are both handled. Variables that are e.g. gang-private can then be adjusted so they reside in GPU shared memory. The reason for doing this is twofold: correct implementation of OpenACC semantics, and optimisation, since shared memory might be faster than the main memory on a GPU. Handling of private variables is intimately tied to the execution model for gangs/workers/vectors implemented by a particular target: for current targets, we use (or on mainline, will soon use) a broadcasting/neutering scheme. That is sufficient for code that e.g. sets a variable in worker-single mode and expects to use the value in worker-partitioned mode. The difficulty (semantics-wise) comes when the user wants to do something like an atomic operation in worker-partitioned mode and expects a worker-single (gang private) variable to be shared across each partitioned worker. Forcing use of shared memory for such variables makes that work properly. In terms of implementation, the parallelism level of a given loop is not fixed until the oaccdevlow pass in the offload compiler, so the patch delays fixing the parallelism level of variables declared on or within such loops until the same point. This is done by adding a new internal UNIQUE function (OACC_PRIVATE) that lists (the address of) each private variable as an argument, and other arguments set so as to be able to determine the correct parallelism level to use for the listed variables. This new internal function fits into the existing scheme for demarcating OpenACC loops, as described in comments in the patch. Two new target hooks are introduced: TARGET_GOACC_ADJUST_PRIVATE_DECL and TARGET_GOACC_EXPAND_VAR_DECL. The first can tweak a variable declaration at oaccdevlow time, and the second at expand time. The first or both of these target hooks can be used by a given offload target, depending on its strategy for implementing private variables. This patch updates the TARGET_GOACC_ADJUST_PRIVATE_DECL target hook in the AMD GCN backend to the current name and prototype. (An earlier version of the hook was already present, but dormant.) gcc/ PR middle-end/90115 * doc/tm.texi.in (TARGET_GOACC_EXPAND_VAR_DECL) (TARGET_GOACC_ADJUST_PRIVATE_DECL): Add documentation hooks. * doc/tm.texi: Regenerate. * expr.c (expand_expr_real_1): Expand decls using the expand_var_decl OpenACC hook if defined. * internal-fn.c (expand_UNIQUE): Handle IFN_UNIQUE_OACC_PRIVATE. * internal-fn.h (IFN_UNIQUE_CODES): Add OACC_PRIVATE. * omp-low.c (omp_context): Add oacc_privatization_candidates field. (lower_oacc_reductions): Add PRIVATE_MARKER parameter. Insert before fork. (lower_oacc_head_tail): Add PRIVATE_MARKER parameter. Modify private marker's gimple call arguments, and pass it to lower_oacc_reductions. (oacc_privatization_scan_clause_chain) (oacc_privatization_scan_decl_chain, lower_oacc_private_marker): New functions. (lower_omp_for, lower_omp_target, lower_omp_1): Use these. * omp-offload.c (convert.h): Include. (oacc_loop_xform_head_tail): Treat private-variable markers like fork/join when transforming head/tail sequences. (struct var_decl_rewrite_info): Add struct. (oacc_rewrite_var_decl, is_sync_builtin_call): New functions. (execute_oacc_device_lower): Support rewriting gang-private variables using target hook, and fix up addr_expr and var_decl nodes afterwards. * target.def (adjust_private_decl, expand_var_decl): New hooks. * config/gcn/gcn-protos.h (gcn_goacc_adjust_gangprivate_decl): Rename to... (gcn_goacc_adjust_private_decl): ...this. * config/gcn/gcn-tree.c (gcn_goacc_adjust_gangprivate_decl): Rename to... (gcn_goacc_adjust_private_decl): ...this. Add LEVEL parameter. * config/gcn/gcn.c (TARGET_GOACC_ADJUST_GANGPRIVATE_DECL): Rename definition using gcn_goacc_adjust_gangprivate_decl... (TARGET_GOACC_ADJUST_PRIVATE_DECL): ...to this, using gcn_goacc_adjust_private_decl. * config/nvptx/nvptx.c (tree-pretty-print.h): Include. (gang_private_shared_size): New global variable. (gang_private_shared_align): Likewise. (gang_private_shared_sym): Likewise. (gang_private_shared_hmap): Likewise. (nvptx_option_override): Initialize these. (nvptx_file_end): Output gang_private_shared_sym. (nvptx_goacc_adjust_private_decl, nvptx_goacc_expand_var_decl): New functions. (nvptx_set_current_function): Clear gang_private_shared_hmap. (TARGET_GOACC_ADJUST_PRIVATE_DECL): Define hook. (TARGET_GOACC_EXPAND_VAR_DECL): Likewise. libgomp/ PR middle-end/90115 * testsuite/libgomp.oacc-c-c++-common/private-atomic-1-gang.c: New test. * testsuite/libgomp.oacc-fortran/private-atomic-1-gang.f90: Likewise. * testsuite/libgomp.oacc-fortran/private-atomic-1-worker.f90: Likewise. Co-Authored-By: Chung-Lin Tang <cltang@codesourcery.com> Co-Authored-By: Thomas Schwinge <thomas@codesourcery.com>
Diffstat (limited to 'gcc/omp-low.c')
-rw-r--r--gcc/omp-low.c125
1 files changed, 119 insertions, 6 deletions
diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index d1136d1..da827ef 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -179,6 +179,9 @@ struct omp_context
/* Only used for omp target contexts. True if an OpenMP construct other
than teams is strictly nested in it. */
bool nonteams_nested_p;
+
+ /* Candidates for adjusting OpenACC privatization level. */
+ vec<tree> oacc_privatization_candidates;
};
static splay_tree all_contexts;
@@ -7132,8 +7135,9 @@ lower_lastprivate_clauses (tree clauses, tree predicate, gimple_seq *body_p,
static void
lower_oacc_reductions (location_t loc, tree clauses, tree level, bool inner,
- gcall *fork, gcall *join, gimple_seq *fork_seq,
- gimple_seq *join_seq, omp_context *ctx)
+ gcall *fork, gcall *private_marker, gcall *join,
+ gimple_seq *fork_seq, gimple_seq *join_seq,
+ omp_context *ctx)
{
gimple_seq before_fork = NULL;
gimple_seq after_fork = NULL;
@@ -7337,6 +7341,8 @@ lower_oacc_reductions (location_t loc, tree clauses, tree level, bool inner,
/* Now stitch things together. */
gimple_seq_add_seq (fork_seq, before_fork);
+ if (private_marker)
+ gimple_seq_add_stmt (fork_seq, private_marker);
if (fork)
gimple_seq_add_stmt (fork_seq, fork);
gimple_seq_add_seq (fork_seq, after_fork);
@@ -8116,7 +8122,7 @@ lower_oacc_loop_marker (location_t loc, tree ddvar, bool head,
HEAD and TAIL. */
static void
-lower_oacc_head_tail (location_t loc, tree clauses,
+lower_oacc_head_tail (location_t loc, tree clauses, gcall *private_marker,
gimple_seq *head, gimple_seq *tail, omp_context *ctx)
{
bool inner = false;
@@ -8124,6 +8130,14 @@ lower_oacc_head_tail (location_t loc, tree clauses,
gimple_seq_add_stmt (head, gimple_build_assign (ddvar, integer_zero_node));
unsigned count = lower_oacc_head_mark (loc, ddvar, clauses, head, ctx);
+
+ if (private_marker)
+ {
+ gimple_set_location (private_marker, loc);
+ gimple_call_set_lhs (private_marker, ddvar);
+ gimple_call_set_arg (private_marker, 1, ddvar);
+ }
+
tree fork_kind = build_int_cst (unsigned_type_node, IFN_UNIQUE_OACC_FORK);
tree join_kind = build_int_cst (unsigned_type_node, IFN_UNIQUE_OACC_JOIN);
@@ -8154,7 +8168,8 @@ lower_oacc_head_tail (location_t loc, tree clauses,
&join_seq);
lower_oacc_reductions (loc, clauses, place, inner,
- fork, join, &fork_seq, &join_seq, ctx);
+ fork, (count == 1) ? private_marker : NULL,
+ join, &fork_seq, &join_seq, ctx);
/* Append this level to head. */
gimple_seq_add_seq (head, fork_seq);
@@ -10129,6 +10144,32 @@ lower_omp_for_lastprivate (struct omp_for_data *fd, gimple_seq *body_p,
}
}
+/* Scan CLAUSES for candidates for adjusting OpenACC privatization level in
+ CTX. */
+
+static void
+oacc_privatization_scan_clause_chain (omp_context *ctx, tree clauses)
+{
+ for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
+ if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_PRIVATE)
+ {
+ tree decl = OMP_CLAUSE_DECL (c);
+ if (VAR_P (decl) && TREE_ADDRESSABLE (decl))
+ ctx->oacc_privatization_candidates.safe_push (decl);
+ }
+}
+
+/* Scan DECLS for candidates for adjusting OpenACC privatization level in
+ CTX. */
+
+static void
+oacc_privatization_scan_decl_chain (omp_context *ctx, tree decls)
+{
+ for (tree decl = decls; decl; decl = DECL_CHAIN (decl))
+ if (VAR_P (decl) && TREE_ADDRESSABLE (decl))
+ ctx->oacc_privatization_candidates.safe_push (decl);
+}
+
/* Callback for walk_gimple_seq. Find #pragma omp scan statement. */
static tree
@@ -10958,6 +10999,58 @@ lower_omp_for_scan (gimple_seq *body_p, gimple_seq *dlist, gomp_for *stmt,
*dlist = new_dlist;
}
+/* Build an internal UNIQUE function with type IFN_UNIQUE_OACC_PRIVATE listing
+ the addresses of variables to be made private at the surrounding
+ parallelism level. Such functions appear in the gimple code stream in two
+ forms, e.g. for a partitioned loop:
+
+ .data_dep.6 = .UNIQUE (OACC_HEAD_MARK, .data_dep.6, 1, 68);
+ .data_dep.6 = .UNIQUE (OACC_PRIVATE, .data_dep.6, -1, &w);
+ .data_dep.6 = .UNIQUE (OACC_FORK, .data_dep.6, -1);
+ .data_dep.6 = .UNIQUE (OACC_HEAD_MARK, .data_dep.6);
+
+ or alternatively, OACC_PRIVATE can appear at the top level of a parallel,
+ not as part of a HEAD_MARK sequence:
+
+ .UNIQUE (OACC_PRIVATE, 0, 0, &w);
+
+ For such stand-alone appearances, the 3rd argument is always 0, denoting
+ gang partitioning. */
+
+static gcall *
+lower_oacc_private_marker (omp_context *ctx)
+{
+ if (ctx->oacc_privatization_candidates.length () == 0)
+ return NULL;
+
+ auto_vec<tree, 5> args;
+
+ args.quick_push (build_int_cst (integer_type_node, IFN_UNIQUE_OACC_PRIVATE));
+ args.quick_push (integer_zero_node);
+ args.quick_push (integer_minus_one_node);
+
+ int i;
+ tree decl;
+ FOR_EACH_VEC_ELT (ctx->oacc_privatization_candidates, i, decl)
+ {
+ for (omp_context *thisctx = ctx; thisctx; thisctx = thisctx->outer)
+ {
+ tree inner_decl = maybe_lookup_decl (decl, thisctx);
+ if (inner_decl)
+ {
+ decl = inner_decl;
+ break;
+ }
+ }
+ gcc_checking_assert (decl);
+
+ tree addr = build_fold_addr_expr (decl);
+ args.safe_push (addr);
+ }
+
+ return gimple_build_call_internal_vec (IFN_UNIQUE, args);
+}
+
/* Lower code for an OMP loop directive. */
static void
@@ -10974,6 +11067,8 @@ lower_omp_for (gimple_stmt_iterator *gsi_p, omp_context *ctx)
push_gimplify_context ();
+ oacc_privatization_scan_clause_chain (ctx, gimple_omp_for_clauses (stmt));
+
lower_omp (gimple_omp_for_pre_body_ptr (stmt), ctx);
block = make_node (BLOCK);
@@ -10992,6 +11087,8 @@ lower_omp_for (gimple_stmt_iterator *gsi_p, omp_context *ctx)
gbind *inner_bind
= as_a <gbind *> (gimple_seq_first_stmt (omp_for_body));
tree vars = gimple_bind_vars (inner_bind);
+ if (is_gimple_omp_oacc (ctx->stmt))
+ oacc_privatization_scan_decl_chain (ctx, vars);
gimple_bind_append_vars (new_stmt, vars);
/* bind_vars/BLOCK_VARS are being moved to new_stmt/block, don't
keep them on the inner_bind and it's block. */
@@ -11105,6 +11202,11 @@ lower_omp_for (gimple_stmt_iterator *gsi_p, omp_context *ctx)
lower_omp (gimple_omp_body_ptr (stmt), ctx);
+ gcall *private_marker = NULL;
+ if (is_gimple_omp_oacc (ctx->stmt)
+ && !gimple_seq_empty_p (omp_for_body))
+ private_marker = lower_oacc_private_marker (ctx);
+
/* Lower the header expressions. At this point, we can assume that
the header is of the form:
@@ -11159,7 +11261,7 @@ lower_omp_for (gimple_stmt_iterator *gsi_p, omp_context *ctx)
if (is_gimple_omp_oacc (ctx->stmt)
&& !ctx_in_oacc_kernels_region (ctx))
lower_oacc_head_tail (gimple_location (stmt),
- gimple_omp_for_clauses (stmt),
+ gimple_omp_for_clauses (stmt), private_marker,
&oacc_head, &oacc_tail, ctx);
/* Add OpenACC partitioning and reduction markers just before the loop. */
@@ -13156,8 +13258,14 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
them as a dummy GANG loop. */
tree level = build_int_cst (integer_type_node, GOMP_DIM_GANG);
+ gcall *private_marker = lower_oacc_private_marker (ctx);
+
+ if (private_marker)
+ gimple_call_set_arg (private_marker, 2, level);
+
lower_oacc_reductions (gimple_location (ctx->stmt), clauses, level,
- false, NULL, NULL, &fork_seq, &join_seq, ctx);
+ false, NULL, private_marker, NULL, &fork_seq,
+ &join_seq, ctx);
}
gimple_seq_add_seq (&new_body, fork_seq);
@@ -13399,6 +13507,11 @@ lower_omp_1 (gimple_stmt_iterator *gsi_p, omp_context *ctx)
ctx);
break;
case GIMPLE_BIND:
+ if (ctx && is_gimple_omp_oacc (ctx->stmt))
+ {
+ tree vars = gimple_bind_vars (as_a <gbind *> (stmt));
+ oacc_privatization_scan_decl_chain (ctx, vars);
+ }
lower_omp (gimple_bind_body_ptr (as_a <gbind *> (stmt)), ctx);
maybe_remove_omp_member_access_dummy_vars (as_a <gbind *> (stmt));
break;