diff options
Diffstat (limited to 'gcc/omp-offload.cc')
-rw-r--r-- | gcc/omp-offload.cc | 652 |
1 files changed, 591 insertions, 61 deletions
diff --git a/gcc/omp-offload.cc b/gcc/omp-offload.cc index da2b54b..3218f69 100644 --- a/gcc/omp-offload.cc +++ b/gcc/omp-offload.cc @@ -52,6 +52,7 @@ along with GCC; see the file COPYING3. If not see #include "stringpool.h" #include "attribs.h" #include "cfgloop.h" +#include "cfghooks.h" #include "context.h" #include "convert.h" #include "opts.h" @@ -391,6 +392,268 @@ omp_discover_implicit_declare_target (void) lang_hooks.decls.omp_finish_decl_inits (); } +static bool ompacc_supported_clauses_p (tree clauses) +{ + for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c)) + switch (OMP_CLAUSE_CODE (c)) + { + case OMP_CLAUSE_COLLAPSE: + case OMP_CLAUSE_NOWAIT: + continue; + default: + return false; + } + return true; +} + +struct target_region_data +{ + tree func_decl; + bool has_omp_for; + bool has_omp_parallel; + bool ompacc_invalid; + auto_vec<const char *> warning_msgs; + auto_vec<location_t> warning_locs; + target_region_data (void) + : func_decl (NULL_TREE), + has_omp_for (false), has_omp_parallel (false), ompacc_invalid (false), + warning_msgs (), warning_locs () {} +}; + +static tree scan_omp_target_region_r (tree *, int *, void *); + +static void +scan_fndecl_for_ompacc (tree decl, target_region_data *tgtdata) +{ + target_region_data td; + td.func_decl = decl; + walk_tree_without_duplicates (&DECL_SAVED_TREE (decl), + scan_omp_target_region_r, &td); + tree v; + if ((v = lookup_attribute ("omp declare variant base", + DECL_ATTRIBUTES (decl))) + || (v = lookup_attribute ("omp declare variant variant", + DECL_ATTRIBUTES (decl)))) + { + td.ompacc_invalid = true; + td.warning_msgs.safe_push ("declare variant not supported for OMPACC"); + td.warning_locs.safe_push (EXPR_LOCATION (v)); + } + if (tgtdata) + { + tgtdata->has_omp_for |= td.has_omp_for; + tgtdata->has_omp_parallel |= td.has_omp_parallel; + tgtdata->ompacc_invalid |= td.ompacc_invalid; + for (unsigned i = 0; i < td.warning_msgs.length (); i++) + tgtdata->warning_msgs.safe_push (td.warning_msgs[i]); + for (unsigned i = 0; i < td.warning_locs.length (); i++) + tgtdata->warning_locs.safe_push (td.warning_locs[i]); + } + + if (!td.ompacc_invalid + && !lookup_attribute ("ompacc", DECL_ATTRIBUTES (decl))) + { + DECL_ATTRIBUTES (decl) + = tree_cons (get_identifier ("ompacc"), NULL_TREE, + DECL_ATTRIBUTES (decl)); + if (!td.has_omp_parallel) + DECL_ATTRIBUTES (decl) + = tree_cons (get_identifier ("ompacc seq"), NULL_TREE, + DECL_ATTRIBUTES (decl)); + } +} + +static tree +scan_omp_target_region_r (tree *tp, int *walk_subtrees, void *data) +{ + target_region_data *tgtdata = (target_region_data *) data; + + if (TREE_CODE (*tp) == FUNCTION_DECL + && !(fndecl_built_in_p (*tp, BUILT_IN_OMP_GET_THREAD_NUM) + || fndecl_built_in_p (*tp, BUILT_IN_OMP_GET_NUM_THREADS) + || fndecl_built_in_p (*tp, BUILT_IN_OMP_GET_TEAM_NUM) + || fndecl_built_in_p (*tp, BUILT_IN_OMP_GET_NUM_TEAMS) + || id_equal (DECL_NAME (*tp), "omp_get_thread_num") + || id_equal (DECL_NAME (*tp), "omp_get_num_threads") + || id_equal (DECL_NAME (*tp), "omp_get_team_num") + || id_equal (DECL_NAME (*tp), "omp_get_num_teams")) + && *tp != tgtdata->func_decl) + { + tree decl = *tp; + symtab_node *node = symtab_node::get (*tp); + if (node) + { + node = node->ultimate_alias_target (); + decl = node->decl; + } + + if (!DECL_EXTERNAL (decl) && DECL_SAVED_TREE (decl)) + { + scan_fndecl_for_ompacc (decl, tgtdata); + } + else + { + tgtdata->warning_msgs.safe_push ("referencing external function"); + tgtdata->warning_locs.safe_push (EXPR_LOCATION (*tp)); + tgtdata->ompacc_invalid = true; + } + *walk_subtrees = 0; + return NULL_TREE; + } + + switch (TREE_CODE (*tp)) + { + case OMP_FOR: + if (!ompacc_supported_clauses_p (OMP_CLAUSES (*tp))) + { + tgtdata->ompacc_invalid = true; + tgtdata->warning_msgs.safe_push ("clauses not supported"); + tgtdata->warning_locs.safe_push (EXPR_LOCATION (*tp)); + } + else if (OMP_FOR_NON_RECTANGULAR (*tp)) + { + tgtdata->ompacc_invalid = true; + tgtdata->warning_msgs.safe_push ("non-rectangular loops not supported"); + tgtdata->warning_locs.safe_push (EXPR_LOCATION (*tp)); + } + else + tgtdata->has_omp_for = true; + break; + + case OMP_PARALLEL: + if (!ompacc_supported_clauses_p (OMP_CLAUSES (*tp))) + { + tgtdata->ompacc_invalid = true; + tgtdata->warning_msgs.safe_push ("clauses not supported"); + tgtdata->warning_locs.safe_push (EXPR_LOCATION (*tp)); + } + else + tgtdata->has_omp_parallel = true; + break; + + case OMP_DISTRIBUTE: + case OMP_TEAMS: + if (!ompacc_supported_clauses_p (OMP_CLAUSES (*tp))) + { + tgtdata->ompacc_invalid = true; + tgtdata->warning_msgs.safe_push ("clauses not supported"); + tgtdata->warning_locs.safe_push (EXPR_LOCATION (*tp)); + } + /* Fallthru. */ + + case OMP_ATOMIC: + case OMP_ATOMIC_READ: + case OMP_ATOMIC_CAPTURE_OLD: + case OMP_ATOMIC_CAPTURE_NEW: + break; + + case OMP_SIMD: + case OMP_TASK: + case OMP_LOOP: + case OMP_TASKLOOP: + case OMP_TASKGROUP: + case OMP_SECTION: + case OMP_MASTER: + case OMP_MASKED: + case OMP_ORDERED: + case OMP_CRITICAL: + case OMP_SCAN: + tgtdata->ompacc_invalid = true; + tgtdata->warning_msgs.safe_push ("construct not supported"); + tgtdata->warning_locs.safe_push (EXPR_LOCATION (*tp)); + *walk_subtrees = 0; + break; + + case OMP_TARGET: + tgtdata->ompacc_invalid = true; + tgtdata->warning_msgs.safe_push ("nested target/reverse offload " + "not supported"); + tgtdata->warning_locs.safe_push (EXPR_LOCATION (*tp)); + *walk_subtrees = 0; + break; + + default: + break; + } + return NULL_TREE; +} + +static tree +scan_omp_target_construct_r (tree *tp, int *walk_subtrees, + void *data) +{ + if (TREE_CODE (*tp) == OMP_TARGET) + { + target_region_data td; + td.func_decl = (tree) data; + walk_tree_without_duplicates (&OMP_TARGET_BODY (*tp), + scan_omp_target_region_r, &td); + for (tree c = OMP_TARGET_CLAUSES (*tp); c; c = OMP_CLAUSE_CHAIN (c)) + { + switch (OMP_CLAUSE_CODE (c)) + { + case OMP_CLAUSE_MAP: + continue; + default: + td.ompacc_invalid = true; + td.warning_msgs.safe_push ("clause not supported"); + td.warning_locs.safe_push (EXPR_LOCATION (c)); + break; + } + break; + } + if (!td.ompacc_invalid) + { + tree c = build_omp_clause (EXPR_LOCATION (*tp), OMP_CLAUSE__OMPACC_); + if (!td.has_omp_parallel) + OMP_CLAUSE__OMPACC__SEQ (c) = 1; + OMP_CLAUSE_CHAIN (c) = OMP_TARGET_CLAUSES (*tp); + OMP_TARGET_CLAUSES (*tp) = c; + } + else + { + warning_at (EXPR_LOCATION (*tp), 0, "Target region not suitable for " + "OMPACC mode"); + for (unsigned i = 0; i < td.warning_locs.length (); i++) + warning_at (td.warning_locs[i], 0, td.warning_msgs[i]); + } + *walk_subtrees = 0; + } + return NULL_TREE; +} + +void +omp_ompacc_attribute_tagging (void) +{ + cgraph_node *node; + FOR_EACH_DEFINED_FUNCTION (node) + if (DECL_SAVED_TREE (node->decl)) + { + if (DECL_STRUCT_FUNCTION (node->decl) + && DECL_STRUCT_FUNCTION (node->decl)->has_omp_target) + walk_tree_without_duplicates (&DECL_SAVED_TREE (node->decl), + scan_omp_target_construct_r, + node->decl); + + for (cgraph_node *cgn = first_nested_function (node); + cgn; cgn = next_nested_function (cgn)) + if (omp_declare_target_fn_p (cgn->decl)) + { + scan_fndecl_for_ompacc (cgn->decl, NULL); + + if (lookup_attribute ("ompacc", DECL_ATTRIBUTES (cgn->decl)) + && !lookup_attribute ("noinline", DECL_ATTRIBUTES (cgn->decl))) + { + DECL_ATTRIBUTES (cgn->decl) + = tree_cons (get_identifier ("noinline"), + NULL, DECL_ATTRIBUTES (cgn->decl)); + DECL_ATTRIBUTES (cgn->decl) + = tree_cons (get_identifier ("noipa"), + NULL, DECL_ATTRIBUTES (cgn->decl)); + } + } + } +} /* Create new symbols containing (address, size) pairs for global variables, marked with "omp declare target" attribute, as well as addresses for the @@ -509,6 +772,22 @@ omp_finish_file (void) static tree oacc_dim_call (bool pos, int dim, gimple_seq *seq) { + if (flag_openmp && flag_openmp_target == OMP_TARGET_MODE_OMPACC) + { + enum built_in_function fn; + if (dim == GOMP_DIM_VECTOR) + fn = pos ? BUILT_IN_OMP_GET_THREAD_NUM : BUILT_IN_OMP_GET_NUM_THREADS; + else if (dim == GOMP_DIM_GANG) + fn = pos ? BUILT_IN_OMP_GET_TEAM_NUM : BUILT_IN_OMP_GET_NUM_TEAMS; + else + gcc_unreachable (); + tree size = create_tmp_var (integer_type_node); + gimple *call = gimple_build_call (builtin_decl_explicit (fn), 0); + gimple_call_set_lhs (call, size); + gimple_seq_add_stmt (seq, call); + return size; + } + tree arg = build_int_cst (unsigned_type_node, dim); tree size = create_tmp_var (integer_type_node); enum internal_fn fn = pos ? IFN_GOACC_DIM_POS : IFN_GOACC_DIM_SIZE; @@ -521,11 +800,13 @@ oacc_dim_call (bool pos, int dim, gimple_seq *seq) } /* Find the number of threads (POS = false), or thread number (POS = - true) for an OpenACC region partitioned as MASK. Setup code + true) for an OpenACC region partitioned as MASK. If VF_BY_VECTORIZER is + true, use that as the vectorization factor for the auto-vectorized + dimension size, instead of calling the builtin function. Setup code required for the calculation is added to SEQ. */ static tree -oacc_thread_numbers (bool pos, int mask, gimple_seq *seq) +oacc_thread_numbers (bool pos, int mask, tree vf_by_vectorizer, gimple_seq *seq) { tree res = pos ? NULL_TREE : build_int_cst (unsigned_type_node, 1); unsigned ix; @@ -538,13 +819,15 @@ oacc_thread_numbers (bool pos, int mask, gimple_seq *seq) { /* We had an outer index, so scale that by the size of this dimension. */ - tree n = oacc_dim_call (false, ix, seq); + tree n = (ix == GOMP_DIM_VECTOR && vf_by_vectorizer) + ? vf_by_vectorizer : oacc_dim_call (false, ix, seq); res = fold_build2 (MULT_EXPR, integer_type_node, res, n); } if (pos) { /* Determine index in this dimension. */ - tree id = oacc_dim_call (true, ix, seq); + tree id = (ix == GOMP_DIM_VECTOR && vf_by_vectorizer) + ? integer_zero_node : oacc_dim_call (true, ix, seq); if (res) res = fold_build2 (PLUS_EXPR, integer_type_node, res, id); else @@ -558,6 +841,12 @@ oacc_thread_numbers (bool pos, int mask, gimple_seq *seq) return res; } +static tree +oacc_thread_numbers (bool pos, int mask, gimple_seq *seq) +{ + return oacc_thread_numbers (pos, mask, NULL_TREE, seq); +} + /* Transform IFN_GOACC_LOOP calls to actual code. See expand_oacc_for for where these are generated. At the vector level, we stride loops, such that each member of a warp will @@ -585,6 +874,7 @@ oacc_xform_loop (gcall *call) bool chunking = false, striding = true; unsigned outer_mask = mask & (~mask + 1); // Outermost partitioning unsigned inner_mask = mask & ~outer_mask; // Inner partitioning (if any) + tree vf_by_vectorizer = NULL_TREE; /* Skip lowering if return value of IFN_GOACC_LOOP call is not used. */ if (!lhs) @@ -612,16 +902,39 @@ oacc_xform_loop (gcall *call) striding = integer_onep (chunk_size); chunking = !striding; } + + if (!chunking + && !targetm.simt.vf + && (mask & GOMP_DIM_MASK (GOMP_DIM_VECTOR))) + { + poly_uint64 max_vf = omp_max_vf (false); + vf_by_vectorizer = build_int_cst (integer_type_node, max_vf); + } + #endif - /* striding=true, chunking=true + /* For SIMT targets: + + striding=true, chunking=true -> invalid. striding=true, chunking=false -> chunks=1 striding=false,chunking=true -> chunks=ceil (range/(chunksize*threads*step)) striding=false,chunking=false - -> chunk_size=ceil(range/(threads*step)),chunks=1 */ + -> chunk_size=ceil(range/(threads*step)),chunks=1 + + For non-SIMT targets: + + striding=N/A, chunking=true + -> as above, for now. + striding=N/A, chunking=false + -> chunks=1 + threads=gangs*workers*vf + chunk_size=ceil(range/(threads*step)) + inner chunking loop steps by "step", vf*chunk_size times. + */ + push_gimplify_context (true); switch (code) @@ -640,49 +953,83 @@ oacc_xform_loop (gcall *call) chunk_size = fold_convert (type, chunk_size); per = fold_build2 (MULT_EXPR, type, per, chunk_size); per = fold_build2 (MULT_EXPR, type, per, step); - r = build2 (MINUS_EXPR, type, range, dir); - r = build2 (PLUS_EXPR, type, r, per); + r = fold_build2 (MINUS_EXPR, type, range, dir); + r = fold_build2 (PLUS_EXPR, type, r, per); r = build2 (TRUNC_DIV_EXPR, type, r, per); } break; case IFN_GOACC_LOOP_STEP: { - /* If striding, step by the entire compute volume, otherwise - step by the inner volume. */ - unsigned volume = striding ? mask : inner_mask; + if (vf_by_vectorizer) + r = step; + else + { + /* If striding, step by the entire compute volume, otherwise + step by the inner volume. */ + unsigned volume = striding ? mask : inner_mask; - r = oacc_thread_numbers (false, volume, &seq); - r = build2 (MULT_EXPR, type, fold_convert (type, r), step); + r = oacc_thread_numbers (false, volume, &seq); + r = build2 (MULT_EXPR, type, fold_convert (type, r), step); + } } break; case IFN_GOACC_LOOP_OFFSET: - /* Enable vectorization on non-SIMT targets. */ - if (!targetm.simt.vf - && outer_mask == GOMP_DIM_MASK (GOMP_DIM_VECTOR) + if (vf_by_vectorizer) + { /* If not -fno-tree-loop-vectorize, hint that we want to vectorize the loop. */ - && (flag_tree_loop_vectorize - || !OPTION_SET_P (flag_tree_loop_vectorize))) - { - basic_block bb = gsi_bb (gsi); - class loop *parent = bb->loop_father; - class loop *body = parent->inner; - - parent->force_vectorize = true; - parent->safelen = INT_MAX; - - /* "Chunking loops" may have inner loops. */ - if (parent->inner) + if (flag_tree_loop_vectorize + || !OPTION_SET_P (flag_tree_loop_vectorize)) { - body->force_vectorize = true; - body->safelen = INT_MAX; + /* Enable vectorization on non-SIMT targets. */ + basic_block bb = gsi_bb (gsi); + class loop *chunk_loop = bb->loop_father; + class loop *inner_loop = chunk_loop->inner; + + /* Chunking isn't supported for VF_BY_VECTORIZER loops yet, + so we know that the outer chunking loop will be executed just + once and the inner loop is the one which must be + vectorized (unless it has been optimized out for some + reason). */ + gcc_assert (!chunking); + + if (inner_loop) + { + inner_loop->force_vectorize = true; + inner_loop->safelen = INT_MAX; + + cfun->has_force_vectorize_loops = true; + } } - cfun->has_force_vectorize_loops = true; + /* ...and expand the abstract loops such that the vectorizer can + work on them more effectively. + + It might be nicer to merge this code with the "!striding" case + below, particularly if chunking support is added. */ + tree warppos + = oacc_thread_numbers (true, mask, vf_by_vectorizer, &seq); + warppos = fold_convert (diff_type, warppos); + + tree volume + = oacc_thread_numbers (false, mask, vf_by_vectorizer, &seq); + volume = fold_convert (diff_type, volume); + + tree per = fold_build2 (MULT_EXPR, diff_type, volume, step); + chunk_size = fold_build2 (PLUS_EXPR, diff_type, range, per); + chunk_size = fold_build2 (MINUS_EXPR, diff_type, chunk_size, dir); + chunk_size = fold_build2 (TRUNC_DIV_EXPR, diff_type, chunk_size, + per); + + warppos = fold_build2 (MULT_EXPR, diff_type, warppos, chunk_size); + + tree chunk = fold_convert (diff_type, gimple_call_arg (call, 6)); + chunk = fold_build2 (MULT_EXPR, diff_type, chunk, volume); + r = fold_build2 (PLUS_EXPR, diff_type, chunk, warppos); } - if (striding) + else if (striding) { r = oacc_thread_numbers (true, mask, &seq); r = fold_convert (diff_type, r); @@ -700,7 +1047,7 @@ oacc_xform_loop (gcall *call) else { tree per = fold_build2 (MULT_EXPR, diff_type, volume, step); - + /* chunk_size = (range + per - 1) / per. */ chunk_size = build2 (MINUS_EXPR, diff_type, range, dir); chunk_size = build2 (PLUS_EXPR, diff_type, chunk_size, per); chunk_size = build2 (TRUNC_DIV_EXPR, diff_type, chunk_size, per); @@ -732,7 +1079,28 @@ oacc_xform_loop (gcall *call) break; case IFN_GOACC_LOOP_BOUND: - if (striding) + if (vf_by_vectorizer) + { + tree volume + = oacc_thread_numbers (false, mask, vf_by_vectorizer, &seq); + volume = fold_convert (diff_type, volume); + + tree per = fold_build2 (MULT_EXPR, diff_type, volume, step); + chunk_size = fold_build2 (PLUS_EXPR, diff_type, range, per); + chunk_size = fold_build2 (MINUS_EXPR, diff_type, chunk_size, dir); + chunk_size = fold_build2 (TRUNC_DIV_EXPR, diff_type, chunk_size, + per); + + vf_by_vectorizer = fold_convert (diff_type, vf_by_vectorizer); + tree vecsize = fold_build2 (MULT_EXPR, diff_type, chunk_size, + vf_by_vectorizer); + vecsize = fold_build2 (MULT_EXPR, diff_type, vecsize, step); + tree vecend = fold_convert (diff_type, gimple_call_arg (call, 6)); + vecend = fold_build2 (PLUS_EXPR, diff_type, vecend, vecsize); + r = fold_build2 (integer_onep (dir) ? MIN_EXPR : MAX_EXPR, diff_type, + range, vecend); + } + else if (striding) r = range; else { @@ -747,7 +1115,7 @@ oacc_xform_loop (gcall *call) else { tree per = fold_build2 (MULT_EXPR, diff_type, volume, step); - + /* chunk_size = (range + per - 1) / per. */ chunk_size = build2 (MINUS_EXPR, diff_type, range, dir); chunk_size = build2 (PLUS_EXPR, diff_type, chunk_size, per); chunk_size = build2 (TRUNC_DIV_EXPR, diff_type, chunk_size, per); @@ -875,8 +1243,9 @@ oacc_get_min_dim (int dim) } /* Parse the default dimension parameter. This is a set of - :-separated optional compute dimensions. Each specified dimension - is a positive integer. When device type support is added, it is + :-separated optional compute dimensions. Each dimension is either + a positive integer, or '-' for a dynamic value computed at + runtime. When device type support is added, it is planned to be a comma separated list of such compute dimensions, with all but the first prefixed by the colon-terminated device type. */ @@ -911,14 +1280,20 @@ oacc_parse_default_dims (const char *dims) if (*pos != ':') { - long val; - const char *eptr; + long val = 0; - errno = 0; - val = strtol (pos, CONST_CAST (char **, &eptr), 10); - if (errno || val <= 0 || (int) val != val) - goto malformed; - pos = eptr; + if (*pos == '-') + pos++; + else + { + const char *eptr; + + errno = 0; + val = strtol (pos, CONST_CAST (char **, &eptr), 10); + if (errno || val <= 0 || (int) val != val) + goto malformed; + pos = eptr; + } oacc_default_dims[ix] = (int) val; } } @@ -1777,6 +2152,129 @@ default_goacc_fork_join (gcall *ARG_UNUSED (call), return targetm.have_oacc_join (); } +void +oacc_build_array_copy (tree dst, tree src, tree max_idx, gimple_seq *seq) +{ + push_gimplify_context (true); + + tree len = fold_build2 (PLUS_EXPR, size_type_node, max_idx, size_int (1)); + tree ptr_to_array = (TREE_TYPE (dst) == ptr_type_node ? src : dst); + tree elem_type; + if (TREE_CODE (TREE_TYPE (ptr_to_array)) == POINTER_TYPE + && TREE_CODE (TREE_TYPE (TREE_TYPE (ptr_to_array))) == ARRAY_TYPE) + elem_type = TREE_TYPE (TREE_TYPE (TREE_TYPE (ptr_to_array))); + else + elem_type = TREE_TYPE (TREE_TYPE (ptr_to_array)); + tree elem_size = TYPE_SIZE_UNIT (elem_type); + tree size = fold_build2 (MULT_EXPR, size_type_node, len, elem_size); + + tree memcpy_decl = builtin_decl_implicit (BUILT_IN_MEMCPY); + tree call = build_call_expr (memcpy_decl, 3, dst, src, size); + gimplify_and_add (call, seq); + pop_gimplify_context (NULL); +} + +void +oacc_build_array_copy_loop (location_t loc, tree dst, tree src, tree max_idx, + gimple_stmt_iterator *gsi) +{ + push_gimplify_context (true); + + tree loop_index; + gimple_stmt_iterator loop_body_gsi; + oacc_build_indexed_ssa_loop (loc, max_idx, gsi, + &loop_index, &loop_body_gsi); + gimple_seq copy_seq = NULL; + + tree dst_array_type = TREE_TYPE (TREE_TYPE (dst)); + tree dst_elem_type = build_qualified_type (TREE_TYPE (dst_array_type), + TYPE_QUALS (dst_array_type)); + tree dst_elem_ptr_type = build_pointer_type (dst_elem_type); + tree dst_ptr = fold_convert (dst_elem_ptr_type, dst); + + tree src_array_type = TREE_TYPE (TREE_TYPE (src)); + tree src_elem_type = build_qualified_type (TREE_TYPE (src_array_type), + TYPE_QUALS (src_array_type)); + tree src_elem_ptr_type = build_pointer_type (src_elem_type); + tree src_ptr = fold_convert (src_elem_ptr_type, src); + + tree offset = build2 (MULT_EXPR, sizetype, + loop_index, TYPE_SIZE_UNIT (dst_elem_type)); + + dst_ptr = build2 (POINTER_PLUS_EXPR, dst_elem_ptr_type, dst_ptr, offset); + src_ptr = build2 (POINTER_PLUS_EXPR, src_elem_ptr_type, src_ptr, offset); + + tree dst_mem_ref = build_simple_mem_ref (dst_ptr); + tree src_mem_ref = build_simple_mem_ref (src_ptr); + + gimplify_assign (dst_mem_ref, src_mem_ref, ©_seq); + + gsi_insert_seq_before (&loop_body_gsi, copy_seq, GSI_SAME_STMT); + pop_gimplify_context (NULL); +} + +void +oacc_build_indexed_ssa_loop (location_t loc, tree max_index, + gimple_stmt_iterator *gsi, tree *out_loop_index, + gimple_stmt_iterator *out_loop_body_code_gsi) +{ + gimple *g; + gimple_seq seq = NULL; + + tree init_index = make_ssa_name (TREE_TYPE (max_index)); + tree loop_index = make_ssa_name (TREE_TYPE (max_index)); + tree update_index = make_ssa_name (TREE_TYPE (max_index)); + + g = gimple_build_assign (init_index, + build_int_cst (TREE_TYPE (init_index), 0)); + gimple_seq_add_stmt (&seq, g); + + gimple *init_end = gimple_seq_last (seq); + gsi_insert_seq_before (gsi, seq, GSI_SAME_STMT); + + basic_block init_bb = gsi_bb (*gsi); + edge init_edge = split_block (init_bb, init_end); + basic_block loop_bb = init_edge->dest; + /* Reset the iterator. */ + *gsi = gsi_for_stmt (gsi_stmt (*gsi)); + + seq = NULL; + g = gimple_build_assign (update_index, PLUS_EXPR, loop_index, + build_int_cst (TREE_TYPE (loop_index), 1)); + gimple_seq_add_stmt (&seq, g); + + g = gimple_build_cond (LE_EXPR, update_index, max_index, NULL, NULL); + gimple_seq_add_stmt (&seq, g); + gsi_insert_seq_before (gsi, seq, GSI_SAME_STMT); + + edge post_edge = split_block (loop_bb, g); + basic_block post_bb = post_edge->dest; + loop_bb = post_edge->src; + /* Reset the iterator. */ + *gsi = gsi_for_stmt (gsi_stmt (*gsi)); + + /* Return place where we insert loop body code. */ + gimple_stmt_iterator loop_body_code_gsi = gsi_start_bb (loop_bb); + + post_edge->flags ^= EDGE_FALSE_VALUE | EDGE_FALLTHRU; + post_edge->probability = profile_probability::even (); + edge loop_edge = make_edge (loop_bb, loop_bb, EDGE_TRUE_VALUE); + loop_edge->probability = profile_probability::even (); + set_immediate_dominator (CDI_DOMINATORS, loop_bb, init_bb); + set_immediate_dominator (CDI_DOMINATORS, post_bb, loop_bb); + class loop *new_loop = alloc_loop (); + new_loop->header = loop_bb; + new_loop->latch = loop_bb; + add_loop (new_loop, loop_bb->loop_father); + + gphi *phi = create_phi_node (loop_index, loop_bb); + add_phi_arg (phi, init_index, init_edge, loc); + add_phi_arg (phi, update_index, loop_edge, loc); + + *out_loop_index = loop_index; + *out_loop_body_code_gsi = loop_body_code_gsi; +} + /* Default goacc.reduction early expander. LHS-opt = IFN_REDUCTION (KIND, RES_PTR, VAR, LEVEL, OP, OFFSET) @@ -1802,18 +2300,44 @@ default_goacc_reduction (gcall *call) if there is one. */ tree ref_to_res = gimple_call_arg (call, 1); + tree array_addr = gimple_call_arg (call, 6); + tree array_max_idx = gimple_call_arg (call, 7); + if (!integer_zerop (ref_to_res)) { - tree dst = build_simple_mem_ref (ref_to_res); - tree src = var; - - if (code == IFN_GOACC_REDUCTION_SETUP) + if (!integer_zerop (array_addr)) { - src = dst; - dst = lhs; - lhs = NULL; + tree dst, src; + if (code == IFN_GOACC_REDUCTION_SETUP) + dst = array_addr, src = ref_to_res; + else + src = array_addr, dst = ref_to_res; + oacc_build_array_copy (dst, src, array_max_idx, &seq); + } + else + { + /* Dummy reduction vars that have GOMP_MAP_FIRSTPRIVATE_POINTER data + mappings gets retyped to (void *). Adjust the type of ref_to_res + as appropriate. */ + if (TREE_TYPE (TREE_TYPE (ref_to_res)) != TREE_TYPE (var)) + { + tree ptype = build_pointer_type (TREE_TYPE (var)); + tree t = make_ssa_name (ptype); + tree expr = fold_build1 (NOP_EXPR, ptype, ref_to_res); + gimple_seq_add_stmt (&seq, gimple_build_assign (t, expr)); + ref_to_res = t; + } + tree dst = build_simple_mem_ref (ref_to_res); + tree src = var; + + if (code == IFN_GOACC_REDUCTION_SETUP) + { + src = dst; + dst = lhs; + lhs = NULL; + } + gimple_seq_add_stmt (&seq, gimple_build_assign (dst, src)); } - gimple_seq_add_stmt (&seq, gimple_build_assign (dst, src)); } } @@ -2145,15 +2669,19 @@ execute_oacc_loop_designation () static unsigned int execute_oacc_device_lower () { - tree attrs = oacc_get_fn_attrib (current_function_decl); + tree attrs; + int dims[GOMP_DIM_MAX]; - if (!attrs) - /* Not an offloaded function. */ - return 0; + if (flag_openacc) + { + attrs = oacc_get_fn_attrib (current_function_decl); + if (!attrs) + /* Not an offloaded function. */ + return 0; - int dims[GOMP_DIM_MAX]; - for (unsigned i = 0; i < GOMP_DIM_MAX; i++) - dims[i] = oacc_get_fn_dim_size (current_function_decl, i); + for (unsigned i = 0; i < GOMP_DIM_MAX; i++) + dims[i] = oacc_get_fn_dim_size (current_function_decl, i); + } hash_map<tree, tree> adjusted_vars; @@ -2222,7 +2750,8 @@ execute_oacc_device_lower () case IFN_UNIQUE_OACC_FORK: case IFN_UNIQUE_OACC_JOIN: - if (integer_minus_onep (gimple_call_arg (call, 2))) + if (flag_openacc + && integer_minus_onep (gimple_call_arg (call, 2))) remove = true; else if (!targetm.goacc.fork_join (call, dims, kind == IFN_UNIQUE_OACC_FORK)) @@ -2509,7 +3038,8 @@ public: {} /* opt_pass methods: */ - bool gate (function *) final override { return flag_openacc; }; + bool gate (function *) final override + { return flag_openacc || (flag_openmp && flag_openmp_target == OMP_TARGET_MODE_OMPACC); }; unsigned int execute (function *) final override { |