aboutsummaryrefslogtreecommitdiff
path: root/gcc/omp-offload.cc
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/omp-offload.cc')
-rw-r--r--gcc/omp-offload.cc652
1 files changed, 591 insertions, 61 deletions
diff --git a/gcc/omp-offload.cc b/gcc/omp-offload.cc
index da2b54b..3218f69 100644
--- a/gcc/omp-offload.cc
+++ b/gcc/omp-offload.cc
@@ -52,6 +52,7 @@ along with GCC; see the file COPYING3. If not see
#include "stringpool.h"
#include "attribs.h"
#include "cfgloop.h"
+#include "cfghooks.h"
#include "context.h"
#include "convert.h"
#include "opts.h"
@@ -391,6 +392,268 @@ omp_discover_implicit_declare_target (void)
lang_hooks.decls.omp_finish_decl_inits ();
}
+static bool ompacc_supported_clauses_p (tree clauses)
+{
+ for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
+ switch (OMP_CLAUSE_CODE (c))
+ {
+ case OMP_CLAUSE_COLLAPSE:
+ case OMP_CLAUSE_NOWAIT:
+ continue;
+ default:
+ return false;
+ }
+ return true;
+}
+
+struct target_region_data
+{
+ tree func_decl;
+ bool has_omp_for;
+ bool has_omp_parallel;
+ bool ompacc_invalid;
+ auto_vec<const char *> warning_msgs;
+ auto_vec<location_t> warning_locs;
+ target_region_data (void)
+ : func_decl (NULL_TREE),
+ has_omp_for (false), has_omp_parallel (false), ompacc_invalid (false),
+ warning_msgs (), warning_locs () {}
+};
+
+static tree scan_omp_target_region_r (tree *, int *, void *);
+
+static void
+scan_fndecl_for_ompacc (tree decl, target_region_data *tgtdata)
+{
+ target_region_data td;
+ td.func_decl = decl;
+ walk_tree_without_duplicates (&DECL_SAVED_TREE (decl),
+ scan_omp_target_region_r, &td);
+ tree v;
+ if ((v = lookup_attribute ("omp declare variant base",
+ DECL_ATTRIBUTES (decl)))
+ || (v = lookup_attribute ("omp declare variant variant",
+ DECL_ATTRIBUTES (decl))))
+ {
+ td.ompacc_invalid = true;
+ td.warning_msgs.safe_push ("declare variant not supported for OMPACC");
+ td.warning_locs.safe_push (EXPR_LOCATION (v));
+ }
+ if (tgtdata)
+ {
+ tgtdata->has_omp_for |= td.has_omp_for;
+ tgtdata->has_omp_parallel |= td.has_omp_parallel;
+ tgtdata->ompacc_invalid |= td.ompacc_invalid;
+ for (unsigned i = 0; i < td.warning_msgs.length (); i++)
+ tgtdata->warning_msgs.safe_push (td.warning_msgs[i]);
+ for (unsigned i = 0; i < td.warning_locs.length (); i++)
+ tgtdata->warning_locs.safe_push (td.warning_locs[i]);
+ }
+
+ if (!td.ompacc_invalid
+ && !lookup_attribute ("ompacc", DECL_ATTRIBUTES (decl)))
+ {
+ DECL_ATTRIBUTES (decl)
+ = tree_cons (get_identifier ("ompacc"), NULL_TREE,
+ DECL_ATTRIBUTES (decl));
+ if (!td.has_omp_parallel)
+ DECL_ATTRIBUTES (decl)
+ = tree_cons (get_identifier ("ompacc seq"), NULL_TREE,
+ DECL_ATTRIBUTES (decl));
+ }
+}
+
+static tree
+scan_omp_target_region_r (tree *tp, int *walk_subtrees, void *data)
+{
+ target_region_data *tgtdata = (target_region_data *) data;
+
+ if (TREE_CODE (*tp) == FUNCTION_DECL
+ && !(fndecl_built_in_p (*tp, BUILT_IN_OMP_GET_THREAD_NUM)
+ || fndecl_built_in_p (*tp, BUILT_IN_OMP_GET_NUM_THREADS)
+ || fndecl_built_in_p (*tp, BUILT_IN_OMP_GET_TEAM_NUM)
+ || fndecl_built_in_p (*tp, BUILT_IN_OMP_GET_NUM_TEAMS)
+ || id_equal (DECL_NAME (*tp), "omp_get_thread_num")
+ || id_equal (DECL_NAME (*tp), "omp_get_num_threads")
+ || id_equal (DECL_NAME (*tp), "omp_get_team_num")
+ || id_equal (DECL_NAME (*tp), "omp_get_num_teams"))
+ && *tp != tgtdata->func_decl)
+ {
+ tree decl = *tp;
+ symtab_node *node = symtab_node::get (*tp);
+ if (node)
+ {
+ node = node->ultimate_alias_target ();
+ decl = node->decl;
+ }
+
+ if (!DECL_EXTERNAL (decl) && DECL_SAVED_TREE (decl))
+ {
+ scan_fndecl_for_ompacc (decl, tgtdata);
+ }
+ else
+ {
+ tgtdata->warning_msgs.safe_push ("referencing external function");
+ tgtdata->warning_locs.safe_push (EXPR_LOCATION (*tp));
+ tgtdata->ompacc_invalid = true;
+ }
+ *walk_subtrees = 0;
+ return NULL_TREE;
+ }
+
+ switch (TREE_CODE (*tp))
+ {
+ case OMP_FOR:
+ if (!ompacc_supported_clauses_p (OMP_CLAUSES (*tp)))
+ {
+ tgtdata->ompacc_invalid = true;
+ tgtdata->warning_msgs.safe_push ("clauses not supported");
+ tgtdata->warning_locs.safe_push (EXPR_LOCATION (*tp));
+ }
+ else if (OMP_FOR_NON_RECTANGULAR (*tp))
+ {
+ tgtdata->ompacc_invalid = true;
+ tgtdata->warning_msgs.safe_push ("non-rectangular loops not supported");
+ tgtdata->warning_locs.safe_push (EXPR_LOCATION (*tp));
+ }
+ else
+ tgtdata->has_omp_for = true;
+ break;
+
+ case OMP_PARALLEL:
+ if (!ompacc_supported_clauses_p (OMP_CLAUSES (*tp)))
+ {
+ tgtdata->ompacc_invalid = true;
+ tgtdata->warning_msgs.safe_push ("clauses not supported");
+ tgtdata->warning_locs.safe_push (EXPR_LOCATION (*tp));
+ }
+ else
+ tgtdata->has_omp_parallel = true;
+ break;
+
+ case OMP_DISTRIBUTE:
+ case OMP_TEAMS:
+ if (!ompacc_supported_clauses_p (OMP_CLAUSES (*tp)))
+ {
+ tgtdata->ompacc_invalid = true;
+ tgtdata->warning_msgs.safe_push ("clauses not supported");
+ tgtdata->warning_locs.safe_push (EXPR_LOCATION (*tp));
+ }
+ /* Fallthru. */
+
+ case OMP_ATOMIC:
+ case OMP_ATOMIC_READ:
+ case OMP_ATOMIC_CAPTURE_OLD:
+ case OMP_ATOMIC_CAPTURE_NEW:
+ break;
+
+ case OMP_SIMD:
+ case OMP_TASK:
+ case OMP_LOOP:
+ case OMP_TASKLOOP:
+ case OMP_TASKGROUP:
+ case OMP_SECTION:
+ case OMP_MASTER:
+ case OMP_MASKED:
+ case OMP_ORDERED:
+ case OMP_CRITICAL:
+ case OMP_SCAN:
+ tgtdata->ompacc_invalid = true;
+ tgtdata->warning_msgs.safe_push ("construct not supported");
+ tgtdata->warning_locs.safe_push (EXPR_LOCATION (*tp));
+ *walk_subtrees = 0;
+ break;
+
+ case OMP_TARGET:
+ tgtdata->ompacc_invalid = true;
+ tgtdata->warning_msgs.safe_push ("nested target/reverse offload "
+ "not supported");
+ tgtdata->warning_locs.safe_push (EXPR_LOCATION (*tp));
+ *walk_subtrees = 0;
+ break;
+
+ default:
+ break;
+ }
+ return NULL_TREE;
+}
+
+static tree
+scan_omp_target_construct_r (tree *tp, int *walk_subtrees,
+ void *data)
+{
+ if (TREE_CODE (*tp) == OMP_TARGET)
+ {
+ target_region_data td;
+ td.func_decl = (tree) data;
+ walk_tree_without_duplicates (&OMP_TARGET_BODY (*tp),
+ scan_omp_target_region_r, &td);
+ for (tree c = OMP_TARGET_CLAUSES (*tp); c; c = OMP_CLAUSE_CHAIN (c))
+ {
+ switch (OMP_CLAUSE_CODE (c))
+ {
+ case OMP_CLAUSE_MAP:
+ continue;
+ default:
+ td.ompacc_invalid = true;
+ td.warning_msgs.safe_push ("clause not supported");
+ td.warning_locs.safe_push (EXPR_LOCATION (c));
+ break;
+ }
+ break;
+ }
+ if (!td.ompacc_invalid)
+ {
+ tree c = build_omp_clause (EXPR_LOCATION (*tp), OMP_CLAUSE__OMPACC_);
+ if (!td.has_omp_parallel)
+ OMP_CLAUSE__OMPACC__SEQ (c) = 1;
+ OMP_CLAUSE_CHAIN (c) = OMP_TARGET_CLAUSES (*tp);
+ OMP_TARGET_CLAUSES (*tp) = c;
+ }
+ else
+ {
+ warning_at (EXPR_LOCATION (*tp), 0, "Target region not suitable for "
+ "OMPACC mode");
+ for (unsigned i = 0; i < td.warning_locs.length (); i++)
+ warning_at (td.warning_locs[i], 0, td.warning_msgs[i]);
+ }
+ *walk_subtrees = 0;
+ }
+ return NULL_TREE;
+}
+
+void
+omp_ompacc_attribute_tagging (void)
+{
+ cgraph_node *node;
+ FOR_EACH_DEFINED_FUNCTION (node)
+ if (DECL_SAVED_TREE (node->decl))
+ {
+ if (DECL_STRUCT_FUNCTION (node->decl)
+ && DECL_STRUCT_FUNCTION (node->decl)->has_omp_target)
+ walk_tree_without_duplicates (&DECL_SAVED_TREE (node->decl),
+ scan_omp_target_construct_r,
+ node->decl);
+
+ for (cgraph_node *cgn = first_nested_function (node);
+ cgn; cgn = next_nested_function (cgn))
+ if (omp_declare_target_fn_p (cgn->decl))
+ {
+ scan_fndecl_for_ompacc (cgn->decl, NULL);
+
+ if (lookup_attribute ("ompacc", DECL_ATTRIBUTES (cgn->decl))
+ && !lookup_attribute ("noinline", DECL_ATTRIBUTES (cgn->decl)))
+ {
+ DECL_ATTRIBUTES (cgn->decl)
+ = tree_cons (get_identifier ("noinline"),
+ NULL, DECL_ATTRIBUTES (cgn->decl));
+ DECL_ATTRIBUTES (cgn->decl)
+ = tree_cons (get_identifier ("noipa"),
+ NULL, DECL_ATTRIBUTES (cgn->decl));
+ }
+ }
+ }
+}
/* Create new symbols containing (address, size) pairs for global variables,
marked with "omp declare target" attribute, as well as addresses for the
@@ -509,6 +772,22 @@ omp_finish_file (void)
static tree
oacc_dim_call (bool pos, int dim, gimple_seq *seq)
{
+ if (flag_openmp && flag_openmp_target == OMP_TARGET_MODE_OMPACC)
+ {
+ enum built_in_function fn;
+ if (dim == GOMP_DIM_VECTOR)
+ fn = pos ? BUILT_IN_OMP_GET_THREAD_NUM : BUILT_IN_OMP_GET_NUM_THREADS;
+ else if (dim == GOMP_DIM_GANG)
+ fn = pos ? BUILT_IN_OMP_GET_TEAM_NUM : BUILT_IN_OMP_GET_NUM_TEAMS;
+ else
+ gcc_unreachable ();
+ tree size = create_tmp_var (integer_type_node);
+ gimple *call = gimple_build_call (builtin_decl_explicit (fn), 0);
+ gimple_call_set_lhs (call, size);
+ gimple_seq_add_stmt (seq, call);
+ return size;
+ }
+
tree arg = build_int_cst (unsigned_type_node, dim);
tree size = create_tmp_var (integer_type_node);
enum internal_fn fn = pos ? IFN_GOACC_DIM_POS : IFN_GOACC_DIM_SIZE;
@@ -521,11 +800,13 @@ oacc_dim_call (bool pos, int dim, gimple_seq *seq)
}
/* Find the number of threads (POS = false), or thread number (POS =
- true) for an OpenACC region partitioned as MASK. Setup code
+ true) for an OpenACC region partitioned as MASK. If VF_BY_VECTORIZER is
+ true, use that as the vectorization factor for the auto-vectorized
+ dimension size, instead of calling the builtin function. Setup code
required for the calculation is added to SEQ. */
static tree
-oacc_thread_numbers (bool pos, int mask, gimple_seq *seq)
+oacc_thread_numbers (bool pos, int mask, tree vf_by_vectorizer, gimple_seq *seq)
{
tree res = pos ? NULL_TREE : build_int_cst (unsigned_type_node, 1);
unsigned ix;
@@ -538,13 +819,15 @@ oacc_thread_numbers (bool pos, int mask, gimple_seq *seq)
{
/* We had an outer index, so scale that by the size of
this dimension. */
- tree n = oacc_dim_call (false, ix, seq);
+ tree n = (ix == GOMP_DIM_VECTOR && vf_by_vectorizer)
+ ? vf_by_vectorizer : oacc_dim_call (false, ix, seq);
res = fold_build2 (MULT_EXPR, integer_type_node, res, n);
}
if (pos)
{
/* Determine index in this dimension. */
- tree id = oacc_dim_call (true, ix, seq);
+ tree id = (ix == GOMP_DIM_VECTOR && vf_by_vectorizer)
+ ? integer_zero_node : oacc_dim_call (true, ix, seq);
if (res)
res = fold_build2 (PLUS_EXPR, integer_type_node, res, id);
else
@@ -558,6 +841,12 @@ oacc_thread_numbers (bool pos, int mask, gimple_seq *seq)
return res;
}
+static tree
+oacc_thread_numbers (bool pos, int mask, gimple_seq *seq)
+{
+ return oacc_thread_numbers (pos, mask, NULL_TREE, seq);
+}
+
/* Transform IFN_GOACC_LOOP calls to actual code. See
expand_oacc_for for where these are generated. At the vector
level, we stride loops, such that each member of a warp will
@@ -585,6 +874,7 @@ oacc_xform_loop (gcall *call)
bool chunking = false, striding = true;
unsigned outer_mask = mask & (~mask + 1); // Outermost partitioning
unsigned inner_mask = mask & ~outer_mask; // Inner partitioning (if any)
+ tree vf_by_vectorizer = NULL_TREE;
/* Skip lowering if return value of IFN_GOACC_LOOP call is not used. */
if (!lhs)
@@ -612,16 +902,39 @@ oacc_xform_loop (gcall *call)
striding = integer_onep (chunk_size);
chunking = !striding;
}
+
+ if (!chunking
+ && !targetm.simt.vf
+ && (mask & GOMP_DIM_MASK (GOMP_DIM_VECTOR)))
+ {
+ poly_uint64 max_vf = omp_max_vf (false);
+ vf_by_vectorizer = build_int_cst (integer_type_node, max_vf);
+ }
+
#endif
- /* striding=true, chunking=true
+ /* For SIMT targets:
+
+ striding=true, chunking=true
-> invalid.
striding=true, chunking=false
-> chunks=1
striding=false,chunking=true
-> chunks=ceil (range/(chunksize*threads*step))
striding=false,chunking=false
- -> chunk_size=ceil(range/(threads*step)),chunks=1 */
+ -> chunk_size=ceil(range/(threads*step)),chunks=1
+
+ For non-SIMT targets:
+
+ striding=N/A, chunking=true
+ -> as above, for now.
+ striding=N/A, chunking=false
+ -> chunks=1
+ threads=gangs*workers*vf
+ chunk_size=ceil(range/(threads*step))
+ inner chunking loop steps by "step", vf*chunk_size times.
+ */
+
push_gimplify_context (true);
switch (code)
@@ -640,49 +953,83 @@ oacc_xform_loop (gcall *call)
chunk_size = fold_convert (type, chunk_size);
per = fold_build2 (MULT_EXPR, type, per, chunk_size);
per = fold_build2 (MULT_EXPR, type, per, step);
- r = build2 (MINUS_EXPR, type, range, dir);
- r = build2 (PLUS_EXPR, type, r, per);
+ r = fold_build2 (MINUS_EXPR, type, range, dir);
+ r = fold_build2 (PLUS_EXPR, type, r, per);
r = build2 (TRUNC_DIV_EXPR, type, r, per);
}
break;
case IFN_GOACC_LOOP_STEP:
{
- /* If striding, step by the entire compute volume, otherwise
- step by the inner volume. */
- unsigned volume = striding ? mask : inner_mask;
+ if (vf_by_vectorizer)
+ r = step;
+ else
+ {
+ /* If striding, step by the entire compute volume, otherwise
+ step by the inner volume. */
+ unsigned volume = striding ? mask : inner_mask;
- r = oacc_thread_numbers (false, volume, &seq);
- r = build2 (MULT_EXPR, type, fold_convert (type, r), step);
+ r = oacc_thread_numbers (false, volume, &seq);
+ r = build2 (MULT_EXPR, type, fold_convert (type, r), step);
+ }
}
break;
case IFN_GOACC_LOOP_OFFSET:
- /* Enable vectorization on non-SIMT targets. */
- if (!targetm.simt.vf
- && outer_mask == GOMP_DIM_MASK (GOMP_DIM_VECTOR)
+ if (vf_by_vectorizer)
+ {
/* If not -fno-tree-loop-vectorize, hint that we want to vectorize
the loop. */
- && (flag_tree_loop_vectorize
- || !OPTION_SET_P (flag_tree_loop_vectorize)))
- {
- basic_block bb = gsi_bb (gsi);
- class loop *parent = bb->loop_father;
- class loop *body = parent->inner;
-
- parent->force_vectorize = true;
- parent->safelen = INT_MAX;
-
- /* "Chunking loops" may have inner loops. */
- if (parent->inner)
+ if (flag_tree_loop_vectorize
+ || !OPTION_SET_P (flag_tree_loop_vectorize))
{
- body->force_vectorize = true;
- body->safelen = INT_MAX;
+ /* Enable vectorization on non-SIMT targets. */
+ basic_block bb = gsi_bb (gsi);
+ class loop *chunk_loop = bb->loop_father;
+ class loop *inner_loop = chunk_loop->inner;
+
+ /* Chunking isn't supported for VF_BY_VECTORIZER loops yet,
+ so we know that the outer chunking loop will be executed just
+ once and the inner loop is the one which must be
+ vectorized (unless it has been optimized out for some
+ reason). */
+ gcc_assert (!chunking);
+
+ if (inner_loop)
+ {
+ inner_loop->force_vectorize = true;
+ inner_loop->safelen = INT_MAX;
+
+ cfun->has_force_vectorize_loops = true;
+ }
}
- cfun->has_force_vectorize_loops = true;
+ /* ...and expand the abstract loops such that the vectorizer can
+ work on them more effectively.
+
+ It might be nicer to merge this code with the "!striding" case
+ below, particularly if chunking support is added. */
+ tree warppos
+ = oacc_thread_numbers (true, mask, vf_by_vectorizer, &seq);
+ warppos = fold_convert (diff_type, warppos);
+
+ tree volume
+ = oacc_thread_numbers (false, mask, vf_by_vectorizer, &seq);
+ volume = fold_convert (diff_type, volume);
+
+ tree per = fold_build2 (MULT_EXPR, diff_type, volume, step);
+ chunk_size = fold_build2 (PLUS_EXPR, diff_type, range, per);
+ chunk_size = fold_build2 (MINUS_EXPR, diff_type, chunk_size, dir);
+ chunk_size = fold_build2 (TRUNC_DIV_EXPR, diff_type, chunk_size,
+ per);
+
+ warppos = fold_build2 (MULT_EXPR, diff_type, warppos, chunk_size);
+
+ tree chunk = fold_convert (diff_type, gimple_call_arg (call, 6));
+ chunk = fold_build2 (MULT_EXPR, diff_type, chunk, volume);
+ r = fold_build2 (PLUS_EXPR, diff_type, chunk, warppos);
}
- if (striding)
+ else if (striding)
{
r = oacc_thread_numbers (true, mask, &seq);
r = fold_convert (diff_type, r);
@@ -700,7 +1047,7 @@ oacc_xform_loop (gcall *call)
else
{
tree per = fold_build2 (MULT_EXPR, diff_type, volume, step);
-
+ /* chunk_size = (range + per - 1) / per. */
chunk_size = build2 (MINUS_EXPR, diff_type, range, dir);
chunk_size = build2 (PLUS_EXPR, diff_type, chunk_size, per);
chunk_size = build2 (TRUNC_DIV_EXPR, diff_type, chunk_size, per);
@@ -732,7 +1079,28 @@ oacc_xform_loop (gcall *call)
break;
case IFN_GOACC_LOOP_BOUND:
- if (striding)
+ if (vf_by_vectorizer)
+ {
+ tree volume
+ = oacc_thread_numbers (false, mask, vf_by_vectorizer, &seq);
+ volume = fold_convert (diff_type, volume);
+
+ tree per = fold_build2 (MULT_EXPR, diff_type, volume, step);
+ chunk_size = fold_build2 (PLUS_EXPR, diff_type, range, per);
+ chunk_size = fold_build2 (MINUS_EXPR, diff_type, chunk_size, dir);
+ chunk_size = fold_build2 (TRUNC_DIV_EXPR, diff_type, chunk_size,
+ per);
+
+ vf_by_vectorizer = fold_convert (diff_type, vf_by_vectorizer);
+ tree vecsize = fold_build2 (MULT_EXPR, diff_type, chunk_size,
+ vf_by_vectorizer);
+ vecsize = fold_build2 (MULT_EXPR, diff_type, vecsize, step);
+ tree vecend = fold_convert (diff_type, gimple_call_arg (call, 6));
+ vecend = fold_build2 (PLUS_EXPR, diff_type, vecend, vecsize);
+ r = fold_build2 (integer_onep (dir) ? MIN_EXPR : MAX_EXPR, diff_type,
+ range, vecend);
+ }
+ else if (striding)
r = range;
else
{
@@ -747,7 +1115,7 @@ oacc_xform_loop (gcall *call)
else
{
tree per = fold_build2 (MULT_EXPR, diff_type, volume, step);
-
+ /* chunk_size = (range + per - 1) / per. */
chunk_size = build2 (MINUS_EXPR, diff_type, range, dir);
chunk_size = build2 (PLUS_EXPR, diff_type, chunk_size, per);
chunk_size = build2 (TRUNC_DIV_EXPR, diff_type, chunk_size, per);
@@ -875,8 +1243,9 @@ oacc_get_min_dim (int dim)
}
/* Parse the default dimension parameter. This is a set of
- :-separated optional compute dimensions. Each specified dimension
- is a positive integer. When device type support is added, it is
+ :-separated optional compute dimensions. Each dimension is either
+ a positive integer, or '-' for a dynamic value computed at
+ runtime. When device type support is added, it is
planned to be a comma separated list of such compute dimensions,
with all but the first prefixed by the colon-terminated device
type. */
@@ -911,14 +1280,20 @@ oacc_parse_default_dims (const char *dims)
if (*pos != ':')
{
- long val;
- const char *eptr;
+ long val = 0;
- errno = 0;
- val = strtol (pos, CONST_CAST (char **, &eptr), 10);
- if (errno || val <= 0 || (int) val != val)
- goto malformed;
- pos = eptr;
+ if (*pos == '-')
+ pos++;
+ else
+ {
+ const char *eptr;
+
+ errno = 0;
+ val = strtol (pos, CONST_CAST (char **, &eptr), 10);
+ if (errno || val <= 0 || (int) val != val)
+ goto malformed;
+ pos = eptr;
+ }
oacc_default_dims[ix] = (int) val;
}
}
@@ -1777,6 +2152,129 @@ default_goacc_fork_join (gcall *ARG_UNUSED (call),
return targetm.have_oacc_join ();
}
+void
+oacc_build_array_copy (tree dst, tree src, tree max_idx, gimple_seq *seq)
+{
+ push_gimplify_context (true);
+
+ tree len = fold_build2 (PLUS_EXPR, size_type_node, max_idx, size_int (1));
+ tree ptr_to_array = (TREE_TYPE (dst) == ptr_type_node ? src : dst);
+ tree elem_type;
+ if (TREE_CODE (TREE_TYPE (ptr_to_array)) == POINTER_TYPE
+ && TREE_CODE (TREE_TYPE (TREE_TYPE (ptr_to_array))) == ARRAY_TYPE)
+ elem_type = TREE_TYPE (TREE_TYPE (TREE_TYPE (ptr_to_array)));
+ else
+ elem_type = TREE_TYPE (TREE_TYPE (ptr_to_array));
+ tree elem_size = TYPE_SIZE_UNIT (elem_type);
+ tree size = fold_build2 (MULT_EXPR, size_type_node, len, elem_size);
+
+ tree memcpy_decl = builtin_decl_implicit (BUILT_IN_MEMCPY);
+ tree call = build_call_expr (memcpy_decl, 3, dst, src, size);
+ gimplify_and_add (call, seq);
+ pop_gimplify_context (NULL);
+}
+
+void
+oacc_build_array_copy_loop (location_t loc, tree dst, tree src, tree max_idx,
+ gimple_stmt_iterator *gsi)
+{
+ push_gimplify_context (true);
+
+ tree loop_index;
+ gimple_stmt_iterator loop_body_gsi;
+ oacc_build_indexed_ssa_loop (loc, max_idx, gsi,
+ &loop_index, &loop_body_gsi);
+ gimple_seq copy_seq = NULL;
+
+ tree dst_array_type = TREE_TYPE (TREE_TYPE (dst));
+ tree dst_elem_type = build_qualified_type (TREE_TYPE (dst_array_type),
+ TYPE_QUALS (dst_array_type));
+ tree dst_elem_ptr_type = build_pointer_type (dst_elem_type);
+ tree dst_ptr = fold_convert (dst_elem_ptr_type, dst);
+
+ tree src_array_type = TREE_TYPE (TREE_TYPE (src));
+ tree src_elem_type = build_qualified_type (TREE_TYPE (src_array_type),
+ TYPE_QUALS (src_array_type));
+ tree src_elem_ptr_type = build_pointer_type (src_elem_type);
+ tree src_ptr = fold_convert (src_elem_ptr_type, src);
+
+ tree offset = build2 (MULT_EXPR, sizetype,
+ loop_index, TYPE_SIZE_UNIT (dst_elem_type));
+
+ dst_ptr = build2 (POINTER_PLUS_EXPR, dst_elem_ptr_type, dst_ptr, offset);
+ src_ptr = build2 (POINTER_PLUS_EXPR, src_elem_ptr_type, src_ptr, offset);
+
+ tree dst_mem_ref = build_simple_mem_ref (dst_ptr);
+ tree src_mem_ref = build_simple_mem_ref (src_ptr);
+
+ gimplify_assign (dst_mem_ref, src_mem_ref, &copy_seq);
+
+ gsi_insert_seq_before (&loop_body_gsi, copy_seq, GSI_SAME_STMT);
+ pop_gimplify_context (NULL);
+}
+
+void
+oacc_build_indexed_ssa_loop (location_t loc, tree max_index,
+ gimple_stmt_iterator *gsi, tree *out_loop_index,
+ gimple_stmt_iterator *out_loop_body_code_gsi)
+{
+ gimple *g;
+ gimple_seq seq = NULL;
+
+ tree init_index = make_ssa_name (TREE_TYPE (max_index));
+ tree loop_index = make_ssa_name (TREE_TYPE (max_index));
+ tree update_index = make_ssa_name (TREE_TYPE (max_index));
+
+ g = gimple_build_assign (init_index,
+ build_int_cst (TREE_TYPE (init_index), 0));
+ gimple_seq_add_stmt (&seq, g);
+
+ gimple *init_end = gimple_seq_last (seq);
+ gsi_insert_seq_before (gsi, seq, GSI_SAME_STMT);
+
+ basic_block init_bb = gsi_bb (*gsi);
+ edge init_edge = split_block (init_bb, init_end);
+ basic_block loop_bb = init_edge->dest;
+ /* Reset the iterator. */
+ *gsi = gsi_for_stmt (gsi_stmt (*gsi));
+
+ seq = NULL;
+ g = gimple_build_assign (update_index, PLUS_EXPR, loop_index,
+ build_int_cst (TREE_TYPE (loop_index), 1));
+ gimple_seq_add_stmt (&seq, g);
+
+ g = gimple_build_cond (LE_EXPR, update_index, max_index, NULL, NULL);
+ gimple_seq_add_stmt (&seq, g);
+ gsi_insert_seq_before (gsi, seq, GSI_SAME_STMT);
+
+ edge post_edge = split_block (loop_bb, g);
+ basic_block post_bb = post_edge->dest;
+ loop_bb = post_edge->src;
+ /* Reset the iterator. */
+ *gsi = gsi_for_stmt (gsi_stmt (*gsi));
+
+ /* Return place where we insert loop body code. */
+ gimple_stmt_iterator loop_body_code_gsi = gsi_start_bb (loop_bb);
+
+ post_edge->flags ^= EDGE_FALSE_VALUE | EDGE_FALLTHRU;
+ post_edge->probability = profile_probability::even ();
+ edge loop_edge = make_edge (loop_bb, loop_bb, EDGE_TRUE_VALUE);
+ loop_edge->probability = profile_probability::even ();
+ set_immediate_dominator (CDI_DOMINATORS, loop_bb, init_bb);
+ set_immediate_dominator (CDI_DOMINATORS, post_bb, loop_bb);
+ class loop *new_loop = alloc_loop ();
+ new_loop->header = loop_bb;
+ new_loop->latch = loop_bb;
+ add_loop (new_loop, loop_bb->loop_father);
+
+ gphi *phi = create_phi_node (loop_index, loop_bb);
+ add_phi_arg (phi, init_index, init_edge, loc);
+ add_phi_arg (phi, update_index, loop_edge, loc);
+
+ *out_loop_index = loop_index;
+ *out_loop_body_code_gsi = loop_body_code_gsi;
+}
+
/* Default goacc.reduction early expander.
LHS-opt = IFN_REDUCTION (KIND, RES_PTR, VAR, LEVEL, OP, OFFSET)
@@ -1802,18 +2300,44 @@ default_goacc_reduction (gcall *call)
if there is one. */
tree ref_to_res = gimple_call_arg (call, 1);
+ tree array_addr = gimple_call_arg (call, 6);
+ tree array_max_idx = gimple_call_arg (call, 7);
+
if (!integer_zerop (ref_to_res))
{
- tree dst = build_simple_mem_ref (ref_to_res);
- tree src = var;
-
- if (code == IFN_GOACC_REDUCTION_SETUP)
+ if (!integer_zerop (array_addr))
{
- src = dst;
- dst = lhs;
- lhs = NULL;
+ tree dst, src;
+ if (code == IFN_GOACC_REDUCTION_SETUP)
+ dst = array_addr, src = ref_to_res;
+ else
+ src = array_addr, dst = ref_to_res;
+ oacc_build_array_copy (dst, src, array_max_idx, &seq);
+ }
+ else
+ {
+ /* Dummy reduction vars that have GOMP_MAP_FIRSTPRIVATE_POINTER data
+ mappings gets retyped to (void *). Adjust the type of ref_to_res
+ as appropriate. */
+ if (TREE_TYPE (TREE_TYPE (ref_to_res)) != TREE_TYPE (var))
+ {
+ tree ptype = build_pointer_type (TREE_TYPE (var));
+ tree t = make_ssa_name (ptype);
+ tree expr = fold_build1 (NOP_EXPR, ptype, ref_to_res);
+ gimple_seq_add_stmt (&seq, gimple_build_assign (t, expr));
+ ref_to_res = t;
+ }
+ tree dst = build_simple_mem_ref (ref_to_res);
+ tree src = var;
+
+ if (code == IFN_GOACC_REDUCTION_SETUP)
+ {
+ src = dst;
+ dst = lhs;
+ lhs = NULL;
+ }
+ gimple_seq_add_stmt (&seq, gimple_build_assign (dst, src));
}
- gimple_seq_add_stmt (&seq, gimple_build_assign (dst, src));
}
}
@@ -2145,15 +2669,19 @@ execute_oacc_loop_designation ()
static unsigned int
execute_oacc_device_lower ()
{
- tree attrs = oacc_get_fn_attrib (current_function_decl);
+ tree attrs;
+ int dims[GOMP_DIM_MAX];
- if (!attrs)
- /* Not an offloaded function. */
- return 0;
+ if (flag_openacc)
+ {
+ attrs = oacc_get_fn_attrib (current_function_decl);
+ if (!attrs)
+ /* Not an offloaded function. */
+ return 0;
- int dims[GOMP_DIM_MAX];
- for (unsigned i = 0; i < GOMP_DIM_MAX; i++)
- dims[i] = oacc_get_fn_dim_size (current_function_decl, i);
+ for (unsigned i = 0; i < GOMP_DIM_MAX; i++)
+ dims[i] = oacc_get_fn_dim_size (current_function_decl, i);
+ }
hash_map<tree, tree> adjusted_vars;
@@ -2222,7 +2750,8 @@ execute_oacc_device_lower ()
case IFN_UNIQUE_OACC_FORK:
case IFN_UNIQUE_OACC_JOIN:
- if (integer_minus_onep (gimple_call_arg (call, 2)))
+ if (flag_openacc
+ && integer_minus_onep (gimple_call_arg (call, 2)))
remove = true;
else if (!targetm.goacc.fork_join
(call, dims, kind == IFN_UNIQUE_OACC_FORK))
@@ -2509,7 +3038,8 @@ public:
{}
/* opt_pass methods: */
- bool gate (function *) final override { return flag_openacc; };
+ bool gate (function *) final override
+ { return flag_openacc || (flag_openmp && flag_openmp_target == OMP_TARGET_MODE_OMPACC); };
unsigned int execute (function *) final override
{