aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/gcn/gcn-protos.h2
-rw-r--r--gcc/config/gcn/gcn-tree.c9
-rw-r--r--gcc/config/gcn/gcn.c4
-rw-r--r--gcc/config/nvptx/nvptx.c80
-rw-r--r--gcc/doc/tm.texi25
-rw-r--r--gcc/doc/tm.texi.in4
-rw-r--r--gcc/expr.c13
-rw-r--r--gcc/internal-fn.c2
-rw-r--r--gcc/internal-fn.h8
-rw-r--r--gcc/omp-low.c125
-rw-r--r--gcc/omp-offload.c225
-rw-r--r--gcc/target.def29
12 files changed, 511 insertions, 15 deletions
diff --git a/gcc/config/gcn/gcn-protos.h b/gcc/config/gcn/gcn-protos.h
index dc9331c..7ef7ae8 100644
--- a/gcc/config/gcn/gcn-protos.h
+++ b/gcc/config/gcn/gcn-protos.h
@@ -40,7 +40,7 @@ extern rtx gcn_gen_undef (machine_mode);
extern bool gcn_global_address_p (rtx);
extern tree gcn_goacc_adjust_propagation_record (tree record_type, bool sender,
const char *name);
-extern void gcn_goacc_adjust_gangprivate_decl (tree var);
+extern tree gcn_goacc_adjust_private_decl (tree var, int level);
extern void gcn_goacc_reduction (gcall *call);
extern bool gcn_hard_regno_rename_ok (unsigned int from_reg,
unsigned int to_reg);
diff --git a/gcc/config/gcn/gcn-tree.c b/gcc/config/gcn/gcn-tree.c
index 8f27099..75ea50c 100644
--- a/gcc/config/gcn/gcn-tree.c
+++ b/gcc/config/gcn/gcn-tree.c
@@ -577,9 +577,12 @@ gcn_goacc_adjust_propagation_record (tree record_type, bool sender,
return decl;
}
-void
-gcn_goacc_adjust_gangprivate_decl (tree var)
+tree
+gcn_goacc_adjust_private_decl (tree var, int level)
{
+ if (level != GOMP_DIM_GANG)
+ return var;
+
tree type = TREE_TYPE (var);
tree lds_type = build_qualified_type (type,
TYPE_QUALS_NO_ADDR_SPACE (type)
@@ -597,6 +600,8 @@ gcn_goacc_adjust_gangprivate_decl (tree var)
if (machfun)
machfun->use_flat_addressing = true;
+
+ return var;
}
/* }}} */
diff --git a/gcc/config/gcn/gcn.c b/gcc/config/gcn/gcn.c
index 9660ca6..283a91f 100644
--- a/gcc/config/gcn/gcn.c
+++ b/gcc/config/gcn/gcn.c
@@ -6320,8 +6320,8 @@ gcn_dwarf_register_span (rtx rtl)
#undef TARGET_GOACC_ADJUST_PROPAGATION_RECORD
#define TARGET_GOACC_ADJUST_PROPAGATION_RECORD \
gcn_goacc_adjust_propagation_record
-#undef TARGET_GOACC_ADJUST_GANGPRIVATE_DECL
-#define TARGET_GOACC_ADJUST_GANGPRIVATE_DECL gcn_goacc_adjust_gangprivate_decl
+#undef TARGET_GOACC_ADJUST_PRIVATE_DECL
+#define TARGET_GOACC_ADJUST_PRIVATE_DECL gcn_goacc_adjust_private_decl
#undef TARGET_GOACC_FORK_JOIN
#define TARGET_GOACC_FORK_JOIN gcn_fork_join
#undef TARGET_GOACC_REDUCTION
diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c
index 722b0fa..80116e5 100644
--- a/gcc/config/nvptx/nvptx.c
+++ b/gcc/config/nvptx/nvptx.c
@@ -75,6 +75,7 @@
#include "fold-const.h"
#include "intl.h"
#include "opts.h"
+#include "tree-pretty-print.h"
/* This file should be included last. */
#include "target-def.h"
@@ -167,6 +168,12 @@ static unsigned vector_red_align;
static unsigned vector_red_partition;
static GTY(()) rtx vector_red_sym;
+/* Shared memory block for gang-private variables. */
+static unsigned gang_private_shared_size;
+static unsigned gang_private_shared_align;
+static GTY(()) rtx gang_private_shared_sym;
+static hash_map<tree_decl_hash, unsigned int> gang_private_shared_hmap;
+
/* Global lock variable, needed for 128bit worker & gang reductions. */
static GTY(()) tree global_lock_var;
@@ -251,6 +258,10 @@ nvptx_option_override (void)
vector_red_align = GET_MODE_ALIGNMENT (SImode) / BITS_PER_UNIT;
vector_red_partition = 0;
+ gang_private_shared_sym = gen_rtx_SYMBOL_REF (Pmode, "__gang_private_shared");
+ SET_SYMBOL_DATA_AREA (gang_private_shared_sym, DATA_AREA_SHARED);
+ gang_private_shared_align = GET_MODE_ALIGNMENT (SImode) / BITS_PER_UNIT;
+
diagnose_openacc_conflict (TARGET_GOMP, "-mgomp");
diagnose_openacc_conflict (TARGET_SOFT_STACK, "-msoft-stack");
diagnose_openacc_conflict (TARGET_UNIFORM_SIMT, "-muniform-simt");
@@ -5435,6 +5446,10 @@ nvptx_file_end (void)
write_shared_buffer (asm_out_file, vector_red_sym,
vector_red_align, vector_red_size);
+ if (gang_private_shared_size)
+ write_shared_buffer (asm_out_file, gang_private_shared_sym,
+ gang_private_shared_align, gang_private_shared_size);
+
if (need_softstack_decl)
{
write_var_marker (asm_out_file, false, true, "__nvptx_stacks");
@@ -6662,6 +6677,64 @@ nvptx_truly_noop_truncation (poly_uint64, poly_uint64)
return false;
}
+/* Implement TARGET_GOACC_ADJUST_PRIVATE_DECL. */
+
+static tree
+nvptx_goacc_adjust_private_decl (tree decl, int level)
+{
+ if (level != GOMP_DIM_GANG)
+ return decl;
+
+ /* Set "oacc gang-private" attribute for gang-private variable
+ declarations. */
+ if (!lookup_attribute ("oacc gang-private", DECL_ATTRIBUTES (decl)))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "Setting 'oacc gang-private' attribute for decl:");
+ print_generic_decl (dump_file, decl, TDF_SLIM);
+ fputc ('\n', dump_file);
+ }
+ tree id = get_identifier ("oacc gang-private");
+ DECL_ATTRIBUTES (decl) = tree_cons (id, NULL, DECL_ATTRIBUTES (decl));
+ }
+
+ return decl;
+}
+
+/* Implement TARGET_GOACC_EXPAND_VAR_DECL. */
+
+static rtx
+nvptx_goacc_expand_var_decl (tree var)
+{
+ /* Place "oacc gang-private" variables in shared memory. */
+ if (VAR_P (var)
+ && lookup_attribute ("oacc gang-private", DECL_ATTRIBUTES (var)))
+ {
+ unsigned int offset, *poffset;
+ poffset = gang_private_shared_hmap.get (var);
+ if (poffset)
+ offset = *poffset;
+ else
+ {
+ unsigned HOST_WIDE_INT align = DECL_ALIGN (var);
+ gang_private_shared_size
+ = (gang_private_shared_size + align - 1) & ~(align - 1);
+ if (gang_private_shared_align < align)
+ gang_private_shared_align = align;
+
+ offset = gang_private_shared_size;
+ bool existed = gang_private_shared_hmap.put (var, offset);
+ gcc_checking_assert (!existed);
+ gang_private_shared_size += tree_to_uhwi (DECL_SIZE_UNIT (var));
+ }
+ rtx addr = plus_constant (Pmode, gang_private_shared_sym, offset);
+ return gen_rtx_MEM (TYPE_MODE (TREE_TYPE (var)), addr);
+ }
+
+ return NULL_RTX;
+}
+
static GTY(()) tree nvptx_previous_fndecl;
static void
@@ -6670,6 +6743,7 @@ nvptx_set_current_function (tree fndecl)
if (!fndecl || fndecl == nvptx_previous_fndecl)
return;
+ gang_private_shared_hmap.empty ();
nvptx_previous_fndecl = fndecl;
vector_red_partition = 0;
oacc_bcast_partition = 0;
@@ -6834,6 +6908,12 @@ nvptx_libc_has_function (enum function_class fn_class, tree type)
#undef TARGET_HAVE_SPECULATION_SAFE_VALUE
#define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed
+#undef TARGET_GOACC_ADJUST_PRIVATE_DECL
+#define TARGET_GOACC_ADJUST_PRIVATE_DECL nvptx_goacc_adjust_private_decl
+
+#undef TARGET_GOACC_EXPAND_VAR_DECL
+#define TARGET_GOACC_EXPAND_VAR_DECL nvptx_goacc_expand_var_decl
+
#undef TARGET_SET_CURRENT_FUNCTION
#define TARGET_SET_CURRENT_FUNCTION nvptx_set_current_function
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index 85ea939..78c330c 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -6236,6 +6236,31 @@ like @code{cond_add@var{m}}. The default implementation returns a zero
constant of type @var{type}.
@end deftypefn
+@deftypefn {Target Hook} tree TARGET_GOACC_ADJUST_PRIVATE_DECL (tree @var{var}, int @var{level})
+This hook, if defined, is used by accelerator target back-ends to adjust
+OpenACC variable declarations that should be made private to the given
+parallelism level (i.e. @code{GOMP_DIM_GANG}, @code{GOMP_DIM_WORKER} or
+@code{GOMP_DIM_VECTOR}). A typical use for this hook is to force variable
+declarations at the @code{gang} level to reside in GPU shared memory.
+
+You may also use the @code{TARGET_GOACC_EXPAND_VAR_DECL} hook if the
+adjusted variable declaration needs to be expanded to RTL in a non-standard
+way.
+@end deftypefn
+
+@deftypefn {Target Hook} rtx TARGET_GOACC_EXPAND_VAR_DECL (tree @var{var})
+This hook, if defined, is used by accelerator target back-ends to expand
+specially handled kinds of @code{VAR_DECL} expressions. A particular use is
+to place variables with specific attributes inside special accelarator
+memories. A return value of @code{NULL} indicates that the target does not
+handle this @code{VAR_DECL}, and normal RTL expanding is resumed.
+
+Only define this hook if your accelerator target needs to expand certain
+@code{VAR_DECL} nodes in a way that differs from the default. You can also adjust
+private variables at OpenACC device-lowering time using the
+@code{TARGET_GOACC_ADJUST_PRIVATE_DECL} target hook.
+@end deftypefn
+
@node Anchored Addresses
@section Anchored Addresses
@cindex anchored addresses
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index d8e3de1..d9fbbe2 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -4221,6 +4221,10 @@ address; but often a machine-dependent strategy can generate better code.
@hook TARGET_PREFERRED_ELSE_VALUE
+@hook TARGET_GOACC_ADJUST_PRIVATE_DECL
+
+@hook TARGET_GOACC_EXPAND_VAR_DECL
+
@node Anchored Addresses
@section Anchored Addresses
@cindex anchored addresses
diff --git a/gcc/expr.c b/gcc/expr.c
index ba61eb9..e4660f0 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -10419,8 +10419,19 @@ expand_expr_real_1 (tree exp, rtx target, machine_mode tmode,
exp = SSA_NAME_VAR (ssa_name);
goto expand_decl_rtl;
- case PARM_DECL:
case VAR_DECL:
+ /* Allow accel compiler to handle variables that require special
+ treatment, e.g. if they have been modified in some way earlier in
+ compilation by the adjust_private_decl OpenACC hook. */
+ if (flag_openacc && targetm.goacc.expand_var_decl)
+ {
+ temp = targetm.goacc.expand_var_decl (exp);
+ if (temp)
+ return temp;
+ }
+ /* ... fall through ... */
+
+ case PARM_DECL:
/* If a static var's type was incomplete when the decl was written,
but the type is complete now, lay out the decl now. */
if (DECL_SIZE (exp) == 0
diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
index d209a52..d92080c 100644
--- a/gcc/internal-fn.c
+++ b/gcc/internal-fn.c
@@ -2969,6 +2969,8 @@ expand_UNIQUE (internal_fn, gcall *stmt)
else
gcc_unreachable ();
break;
+ case IFN_UNIQUE_OACC_PRIVATE:
+ break;
}
if (pattern)
diff --git a/gcc/internal-fn.h b/gcc/internal-fn.h
index c6599ce..5bc5660 100644
--- a/gcc/internal-fn.h
+++ b/gcc/internal-fn.h
@@ -32,11 +32,15 @@ along with GCC; see the file COPYING3. If not see
or leaving partitioned execution.
DEP_VAR = UNIQUE ({HEAD,TAIL}_MARK, REMAINING_MARKS, ...PRIMARY_FLAGS)
- The PRIMARY_FLAGS only occur on the first HEAD_MARK of a sequence. */
+ The PRIMARY_FLAGS only occur on the first HEAD_MARK of a sequence.
+
+ PRIVATE captures variables to be made private at the surrounding parallelism
+ level. */
#define IFN_UNIQUE_CODES \
DEF(UNSPEC), \
DEF(OACC_FORK), DEF(OACC_JOIN), \
- DEF(OACC_HEAD_MARK), DEF(OACC_TAIL_MARK)
+ DEF(OACC_HEAD_MARK), DEF(OACC_TAIL_MARK), \
+ DEF(OACC_PRIVATE)
enum ifn_unique_kind {
#define DEF(X) IFN_UNIQUE_##X
diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index d1136d1..da827ef 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -179,6 +179,9 @@ struct omp_context
/* Only used for omp target contexts. True if an OpenMP construct other
than teams is strictly nested in it. */
bool nonteams_nested_p;
+
+ /* Candidates for adjusting OpenACC privatization level. */
+ vec<tree> oacc_privatization_candidates;
};
static splay_tree all_contexts;
@@ -7132,8 +7135,9 @@ lower_lastprivate_clauses (tree clauses, tree predicate, gimple_seq *body_p,
static void
lower_oacc_reductions (location_t loc, tree clauses, tree level, bool inner,
- gcall *fork, gcall *join, gimple_seq *fork_seq,
- gimple_seq *join_seq, omp_context *ctx)
+ gcall *fork, gcall *private_marker, gcall *join,
+ gimple_seq *fork_seq, gimple_seq *join_seq,
+ omp_context *ctx)
{
gimple_seq before_fork = NULL;
gimple_seq after_fork = NULL;
@@ -7337,6 +7341,8 @@ lower_oacc_reductions (location_t loc, tree clauses, tree level, bool inner,
/* Now stitch things together. */
gimple_seq_add_seq (fork_seq, before_fork);
+ if (private_marker)
+ gimple_seq_add_stmt (fork_seq, private_marker);
if (fork)
gimple_seq_add_stmt (fork_seq, fork);
gimple_seq_add_seq (fork_seq, after_fork);
@@ -8116,7 +8122,7 @@ lower_oacc_loop_marker (location_t loc, tree ddvar, bool head,
HEAD and TAIL. */
static void
-lower_oacc_head_tail (location_t loc, tree clauses,
+lower_oacc_head_tail (location_t loc, tree clauses, gcall *private_marker,
gimple_seq *head, gimple_seq *tail, omp_context *ctx)
{
bool inner = false;
@@ -8124,6 +8130,14 @@ lower_oacc_head_tail (location_t loc, tree clauses,
gimple_seq_add_stmt (head, gimple_build_assign (ddvar, integer_zero_node));
unsigned count = lower_oacc_head_mark (loc, ddvar, clauses, head, ctx);
+
+ if (private_marker)
+ {
+ gimple_set_location (private_marker, loc);
+ gimple_call_set_lhs (private_marker, ddvar);
+ gimple_call_set_arg (private_marker, 1, ddvar);
+ }
+
tree fork_kind = build_int_cst (unsigned_type_node, IFN_UNIQUE_OACC_FORK);
tree join_kind = build_int_cst (unsigned_type_node, IFN_UNIQUE_OACC_JOIN);
@@ -8154,7 +8168,8 @@ lower_oacc_head_tail (location_t loc, tree clauses,
&join_seq);
lower_oacc_reductions (loc, clauses, place, inner,
- fork, join, &fork_seq, &join_seq, ctx);
+ fork, (count == 1) ? private_marker : NULL,
+ join, &fork_seq, &join_seq, ctx);
/* Append this level to head. */
gimple_seq_add_seq (head, fork_seq);
@@ -10129,6 +10144,32 @@ lower_omp_for_lastprivate (struct omp_for_data *fd, gimple_seq *body_p,
}
}
+/* Scan CLAUSES for candidates for adjusting OpenACC privatization level in
+ CTX. */
+
+static void
+oacc_privatization_scan_clause_chain (omp_context *ctx, tree clauses)
+{
+ for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
+ if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_PRIVATE)
+ {
+ tree decl = OMP_CLAUSE_DECL (c);
+ if (VAR_P (decl) && TREE_ADDRESSABLE (decl))
+ ctx->oacc_privatization_candidates.safe_push (decl);
+ }
+}
+
+/* Scan DECLS for candidates for adjusting OpenACC privatization level in
+ CTX. */
+
+static void
+oacc_privatization_scan_decl_chain (omp_context *ctx, tree decls)
+{
+ for (tree decl = decls; decl; decl = DECL_CHAIN (decl))
+ if (VAR_P (decl) && TREE_ADDRESSABLE (decl))
+ ctx->oacc_privatization_candidates.safe_push (decl);
+}
+
/* Callback for walk_gimple_seq. Find #pragma omp scan statement. */
static tree
@@ -10958,6 +10999,58 @@ lower_omp_for_scan (gimple_seq *body_p, gimple_seq *dlist, gomp_for *stmt,
*dlist = new_dlist;
}
+/* Build an internal UNIQUE function with type IFN_UNIQUE_OACC_PRIVATE listing
+ the addresses of variables to be made private at the surrounding
+ parallelism level. Such functions appear in the gimple code stream in two
+ forms, e.g. for a partitioned loop:
+
+ .data_dep.6 = .UNIQUE (OACC_HEAD_MARK, .data_dep.6, 1, 68);
+ .data_dep.6 = .UNIQUE (OACC_PRIVATE, .data_dep.6, -1, &w);
+ .data_dep.6 = .UNIQUE (OACC_FORK, .data_dep.6, -1);
+ .data_dep.6 = .UNIQUE (OACC_HEAD_MARK, .data_dep.6);
+
+ or alternatively, OACC_PRIVATE can appear at the top level of a parallel,
+ not as part of a HEAD_MARK sequence:
+
+ .UNIQUE (OACC_PRIVATE, 0, 0, &w);
+
+ For such stand-alone appearances, the 3rd argument is always 0, denoting
+ gang partitioning. */
+
+static gcall *
+lower_oacc_private_marker (omp_context *ctx)
+{
+ if (ctx->oacc_privatization_candidates.length () == 0)
+ return NULL;
+
+ auto_vec<tree, 5> args;
+
+ args.quick_push (build_int_cst (integer_type_node, IFN_UNIQUE_OACC_PRIVATE));
+ args.quick_push (integer_zero_node);
+ args.quick_push (integer_minus_one_node);
+
+ int i;
+ tree decl;
+ FOR_EACH_VEC_ELT (ctx->oacc_privatization_candidates, i, decl)
+ {
+ for (omp_context *thisctx = ctx; thisctx; thisctx = thisctx->outer)
+ {
+ tree inner_decl = maybe_lookup_decl (decl, thisctx);
+ if (inner_decl)
+ {
+ decl = inner_decl;
+ break;
+ }
+ }
+ gcc_checking_assert (decl);
+
+ tree addr = build_fold_addr_expr (decl);
+ args.safe_push (addr);
+ }
+
+ return gimple_build_call_internal_vec (IFN_UNIQUE, args);
+}
+
/* Lower code for an OMP loop directive. */
static void
@@ -10974,6 +11067,8 @@ lower_omp_for (gimple_stmt_iterator *gsi_p, omp_context *ctx)
push_gimplify_context ();
+ oacc_privatization_scan_clause_chain (ctx, gimple_omp_for_clauses (stmt));
+
lower_omp (gimple_omp_for_pre_body_ptr (stmt), ctx);
block = make_node (BLOCK);
@@ -10992,6 +11087,8 @@ lower_omp_for (gimple_stmt_iterator *gsi_p, omp_context *ctx)
gbind *inner_bind
= as_a <gbind *> (gimple_seq_first_stmt (omp_for_body));
tree vars = gimple_bind_vars (inner_bind);
+ if (is_gimple_omp_oacc (ctx->stmt))
+ oacc_privatization_scan_decl_chain (ctx, vars);
gimple_bind_append_vars (new_stmt, vars);
/* bind_vars/BLOCK_VARS are being moved to new_stmt/block, don't
keep them on the inner_bind and it's block. */
@@ -11105,6 +11202,11 @@ lower_omp_for (gimple_stmt_iterator *gsi_p, omp_context *ctx)
lower_omp (gimple_omp_body_ptr (stmt), ctx);
+ gcall *private_marker = NULL;
+ if (is_gimple_omp_oacc (ctx->stmt)
+ && !gimple_seq_empty_p (omp_for_body))
+ private_marker = lower_oacc_private_marker (ctx);
+
/* Lower the header expressions. At this point, we can assume that
the header is of the form:
@@ -11159,7 +11261,7 @@ lower_omp_for (gimple_stmt_iterator *gsi_p, omp_context *ctx)
if (is_gimple_omp_oacc (ctx->stmt)
&& !ctx_in_oacc_kernels_region (ctx))
lower_oacc_head_tail (gimple_location (stmt),
- gimple_omp_for_clauses (stmt),
+ gimple_omp_for_clauses (stmt), private_marker,
&oacc_head, &oacc_tail, ctx);
/* Add OpenACC partitioning and reduction markers just before the loop. */
@@ -13156,8 +13258,14 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
them as a dummy GANG loop. */
tree level = build_int_cst (integer_type_node, GOMP_DIM_GANG);
+ gcall *private_marker = lower_oacc_private_marker (ctx);
+
+ if (private_marker)
+ gimple_call_set_arg (private_marker, 2, level);
+
lower_oacc_reductions (gimple_location (ctx->stmt), clauses, level,
- false, NULL, NULL, &fork_seq, &join_seq, ctx);
+ false, NULL, private_marker, NULL, &fork_seq,
+ &join_seq, ctx);
}
gimple_seq_add_seq (&new_body, fork_seq);
@@ -13399,6 +13507,11 @@ lower_omp_1 (gimple_stmt_iterator *gsi_p, omp_context *ctx)
ctx);
break;
case GIMPLE_BIND:
+ if (ctx && is_gimple_omp_oacc (ctx->stmt))
+ {
+ tree vars = gimple_bind_vars (as_a <gbind *> (stmt));
+ oacc_privatization_scan_decl_chain (ctx, vars);
+ }
lower_omp (gimple_bind_body_ptr (as_a <gbind *> (stmt)), ctx);
maybe_remove_omp_member_access_dummy_vars (as_a <gbind *> (stmt));
break;
diff --git a/gcc/omp-offload.c b/gcc/omp-offload.c
index 1612461..080bddd 100644
--- a/gcc/omp-offload.c
+++ b/gcc/omp-offload.c
@@ -53,6 +53,7 @@ along with GCC; see the file COPYING3. If not see
#include "attribs.h"
#include "cfgloop.h"
#include "context.h"
+#include "convert.h"
/* Describe the OpenACC looping structure of a function. The entire
function is held in a 'NULL' loop. */
@@ -1357,7 +1358,9 @@ oacc_loop_xform_head_tail (gcall *from, int level)
= ((enum ifn_unique_kind)
TREE_INT_CST_LOW (gimple_call_arg (stmt, 0)));
- if (k == IFN_UNIQUE_OACC_FORK || k == IFN_UNIQUE_OACC_JOIN)
+ if (k == IFN_UNIQUE_OACC_FORK
+ || k == IFN_UNIQUE_OACC_JOIN
+ || k == IFN_UNIQUE_OACC_PRIVATE)
*gimple_call_arg_ptr (stmt, 2) = replacement;
else if (k == kind && stmt != from)
break;
@@ -1774,6 +1777,136 @@ default_goacc_reduction (gcall *call)
gsi_replace_with_seq (&gsi, seq, true);
}
+struct var_decl_rewrite_info
+{
+ gimple *stmt;
+ hash_map<tree, tree> *adjusted_vars;
+ bool avoid_pointer_conversion;
+ bool modified;
+};
+
+/* Helper function for execute_oacc_device_lower. Rewrite VAR_DECLs (by
+ themselves or wrapped in various other nodes) according to ADJUSTED_VARS in
+ the var_decl_rewrite_info pointed to via DATA. Used as part of coercing
+ gang-private variables in OpenACC offload regions to reside in GPU shared
+ memory. */
+
+static tree
+oacc_rewrite_var_decl (tree *tp, int *walk_subtrees, void *data)
+{
+ walk_stmt_info *wi = (walk_stmt_info *) data;
+ var_decl_rewrite_info *info = (var_decl_rewrite_info *) wi->info;
+
+ if (TREE_CODE (*tp) == ADDR_EXPR)
+ {
+ tree arg = TREE_OPERAND (*tp, 0);
+ tree *new_arg = info->adjusted_vars->get (arg);
+
+ if (new_arg)
+ {
+ if (info->avoid_pointer_conversion)
+ {
+ *tp = build_fold_addr_expr (*new_arg);
+ info->modified = true;
+ *walk_subtrees = 0;
+ }
+ else
+ {
+ gimple_stmt_iterator gsi = gsi_for_stmt (info->stmt);
+ tree repl = build_fold_addr_expr (*new_arg);
+ gimple *stmt1
+ = gimple_build_assign (make_ssa_name (TREE_TYPE (repl)), repl);
+ tree conv = convert_to_pointer (TREE_TYPE (*tp),
+ gimple_assign_lhs (stmt1));
+ gimple *stmt2
+ = gimple_build_assign (make_ssa_name (TREE_TYPE (*tp)), conv);
+ gsi_insert_before (&gsi, stmt1, GSI_SAME_STMT);
+ gsi_insert_before (&gsi, stmt2, GSI_SAME_STMT);
+ *tp = gimple_assign_lhs (stmt2);
+ info->modified = true;
+ *walk_subtrees = 0;
+ }
+ }
+ }
+ else if (TREE_CODE (*tp) == COMPONENT_REF || TREE_CODE (*tp) == ARRAY_REF)
+ {
+ tree *base = &TREE_OPERAND (*tp, 0);
+
+ while (TREE_CODE (*base) == COMPONENT_REF
+ || TREE_CODE (*base) == ARRAY_REF)
+ base = &TREE_OPERAND (*base, 0);
+
+ if (TREE_CODE (*base) != VAR_DECL)
+ return NULL;
+
+ tree *new_decl = info->adjusted_vars->get (*base);
+ if (!new_decl)
+ return NULL;
+
+ int base_quals = TYPE_QUALS (TREE_TYPE (*new_decl));
+ tree field = TREE_OPERAND (*tp, 1);
+
+ /* Adjust the type of the field. */
+ int field_quals = TYPE_QUALS (TREE_TYPE (field));
+ if (TREE_CODE (field) == FIELD_DECL && field_quals != base_quals)
+ {
+ tree *field_type = &TREE_TYPE (field);
+ while (TREE_CODE (*field_type) == ARRAY_TYPE)
+ field_type = &TREE_TYPE (*field_type);
+ field_quals |= base_quals;
+ *field_type = build_qualified_type (*field_type, field_quals);
+ }
+
+ /* Adjust the type of the component ref itself. */
+ tree comp_type = TREE_TYPE (*tp);
+ int comp_quals = TYPE_QUALS (comp_type);
+ if (TREE_CODE (*tp) == COMPONENT_REF && comp_quals != base_quals)
+ {
+ comp_quals |= base_quals;
+ TREE_TYPE (*tp)
+ = build_qualified_type (comp_type, comp_quals);
+ }
+
+ *base = *new_decl;
+ info->modified = true;
+ }
+ else if (TREE_CODE (*tp) == VAR_DECL)
+ {
+ tree *new_decl = info->adjusted_vars->get (*tp);
+ if (new_decl)
+ {
+ *tp = *new_decl;
+ info->modified = true;
+ }
+ }
+
+ return NULL_TREE;
+}
+
+/* Return TRUE if CALL is a call to a builtin atomic/sync operation. */
+
+static bool
+is_sync_builtin_call (gcall *call)
+{
+ tree callee = gimple_call_fndecl (call);
+
+ if (callee != NULL_TREE
+ && gimple_call_builtin_p (call, BUILT_IN_NORMAL))
+ switch (DECL_FUNCTION_CODE (callee))
+ {
+#undef DEF_SYNC_BUILTIN
+#define DEF_SYNC_BUILTIN(ENUM, NAME, TYPE, ATTRS) case ENUM:
+#include "sync-builtins.def"
+#undef DEF_SYNC_BUILTIN
+ return true;
+
+ default:
+ ;
+ }
+
+ return false;
+}
+
/* Main entry point for oacc transformations which run on the device
compiler after LTO, so we know what the target device is at this
point (including the host fallback). */
@@ -1923,6 +2056,8 @@ execute_oacc_device_lower ()
dominance information to update SSA. */
calculate_dominance_info (CDI_DOMINATORS);
+ hash_map<tree, tree> adjusted_vars;
+
/* Now lower internal loop functions to target-specific code
sequences. */
basic_block bb;
@@ -1999,6 +2134,45 @@ execute_oacc_device_lower ()
case IFN_UNIQUE_OACC_TAIL_MARK:
remove = true;
break;
+
+ case IFN_UNIQUE_OACC_PRIVATE:
+ {
+ HOST_WIDE_INT level
+ = TREE_INT_CST_LOW (gimple_call_arg (call, 2));
+ if (level == -1)
+ break;
+ for (unsigned i = 3;
+ i < gimple_call_num_args (call);
+ i++)
+ {
+ tree arg = gimple_call_arg (call, i);
+ gcc_checking_assert (TREE_CODE (arg) == ADDR_EXPR);
+ tree decl = TREE_OPERAND (arg, 0);
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ static char const *const axes[] =
+ /* Must be kept in sync with GOMP_DIM
+ enumeration. */
+ { "gang", "worker", "vector" };
+ fprintf (dump_file, "Decl UID %u has %s "
+ "partitioning:", DECL_UID (decl),
+ axes[level]);
+ print_generic_decl (dump_file, decl, TDF_SLIM);
+ fputc ('\n', dump_file);
+ }
+ if (targetm.goacc.adjust_private_decl)
+ {
+ tree oldtype = TREE_TYPE (decl);
+ tree newdecl
+ = targetm.goacc.adjust_private_decl (decl, level);
+ if (TREE_TYPE (newdecl) != oldtype
+ || newdecl != decl)
+ adjusted_vars.put (decl, newdecl);
+ }
+ }
+ remove = true;
+ }
+ break;
}
break;
}
@@ -2030,6 +2204,55 @@ execute_oacc_device_lower ()
gsi_next (&gsi);
}
+ /* Make adjustments to gang-private local variables if required by the
+ target, e.g. forcing them into a particular address space. Afterwards,
+ ADDR_EXPR nodes which have adjusted variables as their argument need to
+ be modified in one of two ways:
+
+ 1. They can be recreated, making a pointer to the variable in the new
+ address space, or
+
+ 2. The address of the variable in the new address space can be taken,
+ converted to the default (original) address space, and the result of
+ that conversion subsituted in place of the original ADDR_EXPR node.
+
+ Which of these is done depends on the gimple statement being processed.
+ At present atomic operations and inline asms use (1), and everything else
+ uses (2). At least on AMD GCN, there are atomic operations that work
+ directly in the LDS address space.
+
+ COMPONENT_REFS, ARRAY_REFS and plain VAR_DECLs are also rewritten to use
+ the new decl, adjusting types of appropriate tree nodes as necessary. */
+
+ if (targetm.goacc.adjust_private_decl)
+ {
+ FOR_ALL_BB_FN (bb, cfun)
+ for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
+ !gsi_end_p (gsi);
+ gsi_next (&gsi))
+ {
+ gimple *stmt = gsi_stmt (gsi);
+ walk_stmt_info wi;
+ var_decl_rewrite_info info;
+
+ info.avoid_pointer_conversion
+ = (is_gimple_call (stmt)
+ && is_sync_builtin_call (as_a <gcall *> (stmt)))
+ || gimple_code (stmt) == GIMPLE_ASM;
+ info.stmt = stmt;
+ info.modified = false;
+ info.adjusted_vars = &adjusted_vars;
+
+ memset (&wi, 0, sizeof (wi));
+ wi.info = &info;
+
+ walk_gimple_op (stmt, oacc_rewrite_var_decl, &wi);
+
+ if (info.modified)
+ update_stmt (stmt);
+ }
+ }
+
free_oacc_loop (loops);
return 0;
diff --git a/gcc/target.def b/gcc/target.def
index bbaf6b4..660b69f 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -1726,6 +1726,35 @@ for allocating any storage for reductions when necessary.",
void, (gcall *call),
default_goacc_reduction)
+DEFHOOK
+(adjust_private_decl,
+"This hook, if defined, is used by accelerator target back-ends to adjust\n\
+OpenACC variable declarations that should be made private to the given\n\
+parallelism level (i.e. @code{GOMP_DIM_GANG}, @code{GOMP_DIM_WORKER} or\n\
+@code{GOMP_DIM_VECTOR}). A typical use for this hook is to force variable\n\
+declarations at the @code{gang} level to reside in GPU shared memory.\n\
+\n\
+You may also use the @code{TARGET_GOACC_EXPAND_VAR_DECL} hook if the\n\
+adjusted variable declaration needs to be expanded to RTL in a non-standard\n\
+way.",
+tree, (tree var, int level),
+NULL)
+
+DEFHOOK
+(expand_var_decl,
+"This hook, if defined, is used by accelerator target back-ends to expand\n\
+specially handled kinds of @code{VAR_DECL} expressions. A particular use is\n\
+to place variables with specific attributes inside special accelarator\n\
+memories. A return value of @code{NULL} indicates that the target does not\n\
+handle this @code{VAR_DECL}, and normal RTL expanding is resumed.\n\
+\n\
+Only define this hook if your accelerator target needs to expand certain\n\
+@code{VAR_DECL} nodes in a way that differs from the default. You can also adjust\n\
+private variables at OpenACC device-lowering time using the\n\
+@code{TARGET_GOACC_ADJUST_PRIVATE_DECL} target hook.",
+rtx, (tree var),
+NULL)
+
HOOK_VECTOR_END (goacc)
/* Functions relating to vectorization. */