aboutsummaryrefslogtreecommitdiff
path: root/gcc/omp-offload.cc
diff options
context:
space:
mode:
authorChung-Lin Tang <cltang@codesourcery.com>2023-05-19 12:14:04 -0700
committerChung-Lin Tang <cltang@codesourcery.com>2023-05-19 12:14:04 -0700
commit5f881613fa9128edae5bbfa4e19f9752809e4bd7 (patch)
tree9677f855effa09243c00f6530cb3b5b03b70ffdd /gcc/omp-offload.cc
parent17c41b39078fc8ad67fd1b82f74ef5174f34452e (diff)
downloadgcc-devel/omp/gcc-12.zip
gcc-devel/omp/gcc-12.tar.gz
gcc-devel/omp/gcc-12.tar.bz2
Use OpenACC code to process OpenMP target regionsdevel/omp/gcc-12
This is a backport of: https://gcc.gnu.org/pipermail/gcc-patches/2023-May/619003.html This patch implements '-fopenmp-target=acc', which enables internally handling a subset of OpenMP target regions as OpenACC parallel regions. This basically includes target, teams, parallel, distribute, for/do constructs, and atomics. Essentially, we adjust the internal kinds to OpenACC type, and let OpenACC code paths handle them, with various needed adjustments throughout middle-end and nvptx backend. When using this "OMPACC" mode, if there are cases the patch doesn't handle, it issues a warning, and reverts to normal processing for that target region. gcc/ChangeLog: * builtins.cc (expand_builtin_omp_builtins): New function. (expand_builtin): Add expand cases for BUILT_IN_GOMP_BARRIER, BUILT_IN_OMP_GET_THREAD_NUM, BUILT_IN_OMP_GET_NUM_THREADS, BUILT_IN_OMP_GET_TEAM_NUM, and BUILT_IN_OMP_GET_NUM_TEAMS using expand_builtin_omp_builtins, enabled under -fopenmp-target=acc. * cgraphunit.cc (analyze_functions): Add call to omp_ompacc_attribute_tagging, enabled under -fopenmp-target=acc. * common.opt (fopenmp-target=): Add new option and enums. * config/nvptx/mkoffload.cc (main): Handle -fopenmp-target=. * config/nvptx/nvptx-protos.h (nvptx_expand_omp_get_num_threads): New prototype. (nvptx_mem_shared_p): Likewise. * config/nvptx/nvptx.cc (omp_num_threads_sym): New global static RTX symbol for number of threads in team. (omp_num_threads_align): New var for alignment of omp_num_threads_sym. (need_omp_num_threads): New bool for if any function references omp_num_threads_sym. (nvptx_option_override): Initialize omp_num_threads_sym/align. (write_as_kernel): Disable normal OpenMP kernel entry under OMPACC mode. (nvptx_declare_function_name): Disable shim function under OMPACC mode. Disable soft-stack under OMPACC mode. Add generation of neutering init code under OMPACC mode. (nvptx_output_set_softstack): Return "" under OMPACC mode. (nvptx_expand_call): Set parallelism to vector for function calls with "ompacc for" attached. (nvptx_expand_oacc_fork): Set mode to GOMP_DIM_VECTOR under OMPACC mode. (nvptx_expand_oacc_join): Likewise. (nvptx_expand_omp_get_num_threads): New function. (nvptx_mem_shared_p): New function. (nvptx_mach_max_workers): Return 1 under OMPACC mode. (nvptx_mach_vector_length): Return 32 under OMPACC mode. (nvptx_single): Add adjustments for OMPACC mode, which have parallel-construct fork/joins, and regions of code where neutering is dynamically determined. (nvptx_reorg): Enable neutering under OMPACC mode when "ompacc for" attribute is attached to function. Disable uniform-simt when under OMPACC mode. (nvptx_file_end): Write __nvptx_omp_num_threads out when needed. (nvptx_goacc_fork_join): Return true under OMPACC mode. * config/nvptx/nvptx.h (struct GTY(()) machine_function): Add omp_parallel_predicate and omp_fn_entry_num_threads_reg fields. * config/nvptx/nvptx.md (unspecv): Add UNSPECV_GET_TID, UNSPECV_GET_NTID, UNSPECV_GET_CTAID, UNSPECV_GET_NCTAID, UNSPECV_OMP_PARALLEL_FORK, UNSPECV_OMP_PARALLEL_JOIN entries. (nvptx_shared_mem_operand): New predicate. (gomp_barrier): New expand pattern. (omp_get_num_threads): New expand pattern. (omp_get_num_teams): New insn pattern. (omp_get_thread_num): Likewise. (omp_get_team_num): Likewise. (get_ntid): Likewise. (nvptx_omp_parallel_fork): Likewise. (nvptx_omp_parallel_join): Likewise. * flag-types.h (omp_target_mode_kind): New flag value enum. * gimplify.cc (struct gimplify_omp_ctx): Add 'bool ompacc' field. (gimplify_scan_omp_clauses): Handle OMP_CLAUSE__OMPACC_. (gimplify_adjust_omp_clauses): Likewise. (gimplify_omp_ctx_ompacc_p): New function. (gimplify_omp_for): Handle combined loops under OMPACC. * lto-wrapper.cc (append_compiler_options): Add OPT_fopenmp_target_. * omp-builtins.def (BUILT_IN_OMP_GET_THREAD_NUM): Remove CONST. (BUILT_IN_OMP_GET_NUM_THREADS): Likewise. * omp-expand.cc (remove_exit_barrier): Disable addressable-var processing for parallel construct child functions under OMPACC mode. (expand_oacc_for): Add OMPACC mode handling. (get_target_arguments): Force thread_limit clause value to 1 under OMPACC mode. (expand_omp): Under OMPACC mode, avoid child function expanding of GIMPLE_OMP_PARALLEL. * omp-general.cc (omp_extract_for_data): Adjustments for OMPACC mode. * omp-low.cc (struct omp_context): Add 'bool ompacc_p' field. (scan_sharing_clauses): Handle OMP_CLAUSE__OMPACC_. (ompacc_ctx_p): New function. (scan_omp_parallel): Handle OMPACC mode, avoid creating child function. (scan_omp_target): Tag "ompacc"/"ompacc for" attributes for target construct child function, remove OMP_CLAUSE__OMPACC_ clauses. (lower_oacc_head_mark): Handle OMPACC mode cases. (lower_omp_for): Adjust OMP_FOR kind from OpenMP to OpenACC kinds, add vector/gang clauses as needed. Add other OMPACC handling. (lower_omp_taskreg): Add call to lower_oacc_head_tail for OMPACC case. (lower_omp_target): Do OpenACC gang privatization under OMPACC case. (lower_omp_teams): Forward OpenACC privatization variables to outer target region under OMPACC mode. (lower_omp_1): Do OpenACC gang privatization under OMPACC case for GIMPLE_BIND. * omp-offload.cc (ompacc_supported_clauses_p): New function. (struct target_region_data): New struct type for tree walk. (scan_fndecl_for_ompacc): New function. (scan_omp_target_region_r): New function. (scan_omp_target_construct_r): New function. (omp_ompacc_attribute_tagging): New function. (oacc_dim_call): Add OMPACC case handling. (execute_oacc_device_lower): Make parts explicitly only OpenACC enabled. (pass_oacc_device_lower::gate): Enable pass under OMPACC mode. * omp-offload.h (omp_ompacc_attribute_tagging): New prototype. * opts.cc (finish_options): Only allow -fopenmp-target= when -fopenmp and no -fopenacc. * target-insns.def (gomp_barrier): New defined insn pattern. (omp_get_thread_num): Likewise. (omp_get_num_threads): Likewise. (omp_get_team_num): Likewise. (omp_get_num_teams): Likewise. * tree-core.h (enum omp_clause_code): Add new OMP_CLAUSE__OMPACC_ entry for internal clause. * tree-nested.cc (convert_nonlocal_omp_clauses): Handle OMP_CLAUSE__OMPACC_. * tree-pretty-print.cc (dump_omp_clause): Handle OMP_CLAUSE__OMPACC_. * tree.cc (omp_clause_num_ops): Add OMP_CLAUSE__OMPACC_ entry. (omp_clause_code_name): Likewise. * tree.h (OMP_CLAUSE__OMPACC__FOR): New macro for OMP_CLAUSE__OMPACC_. * tree-ssa-loop.cc (pass_oacc_only::gate): Enable pass under OMPACC mode cases. libgomp/ChangeLog: * config/nvptx/team.c (__nvptx_omp_num_threads): New global variable in shared memory.
Diffstat (limited to 'gcc/omp-offload.cc')
-rw-r--r--gcc/omp-offload.cc303
1 files changed, 294 insertions, 9 deletions
diff --git a/gcc/omp-offload.cc b/gcc/omp-offload.cc
index b18f28f..9dae07c 100644
--- a/gcc/omp-offload.cc
+++ b/gcc/omp-offload.cc
@@ -388,6 +388,269 @@ omp_discover_implicit_declare_target (void)
lang_hooks.decls.omp_finish_decl_inits ();
}
+static bool ompacc_supported_clauses_p (tree clauses)
+{
+ for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
+ switch (OMP_CLAUSE_CODE (c))
+ {
+ case OMP_CLAUSE_COLLAPSE:
+ case OMP_CLAUSE_NOWAIT:
+ continue;
+ default:
+ return false;
+ }
+ return true;
+}
+
+struct target_region_data
+{
+ tree func_decl;
+ bool has_omp_for;
+ bool has_omp_parallel;
+ bool ompacc_invalid;
+ auto_vec<const char *> warning_msgs;
+ auto_vec<location_t> warning_locs;
+ target_region_data (void)
+ : func_decl (NULL_TREE),
+ has_omp_for (false), has_omp_parallel (false), ompacc_invalid (false),
+ warning_msgs (), warning_locs () {}
+};
+
+static tree scan_omp_target_region_r (tree *, int *, void *);
+
+static void
+scan_fndecl_for_ompacc (tree decl, target_region_data *tgtdata)
+{
+ target_region_data td;
+ td.func_decl = decl;
+ walk_tree_without_duplicates (&DECL_SAVED_TREE (decl),
+ scan_omp_target_region_r, &td);
+ tree v;
+ if ((v = lookup_attribute ("omp declare variant base",
+ DECL_ATTRIBUTES (decl)))
+ || (v = lookup_attribute ("omp declare variant variant",
+ DECL_ATTRIBUTES (decl))))
+ {
+ td.ompacc_invalid = true;
+ td.warning_msgs.safe_push ("declare variant not supported for OMPACC");
+ td.warning_locs.safe_push (EXPR_LOCATION (v));
+ }
+ if (tgtdata)
+ {
+ tgtdata->has_omp_for |= td.has_omp_for;
+ tgtdata->has_omp_parallel |= td.has_omp_parallel;
+ tgtdata->ompacc_invalid |= td.ompacc_invalid;
+ for (unsigned i = 0; i < td.warning_msgs.length (); i++)
+ tgtdata->warning_msgs.safe_push (td.warning_msgs[i]);
+ for (unsigned i = 0; i < td.warning_locs.length (); i++)
+ tgtdata->warning_locs.safe_push (td.warning_locs[i]);
+ }
+
+ if (!td.ompacc_invalid
+ && !lookup_attribute ("ompacc", DECL_ATTRIBUTES (decl)))
+ {
+ DECL_ATTRIBUTES (decl)
+ = tree_cons (get_identifier ("ompacc"), NULL_TREE,
+ DECL_ATTRIBUTES (decl));
+ if (!td.has_omp_parallel)
+ DECL_ATTRIBUTES (decl)
+ = tree_cons (get_identifier ("ompacc seq"), NULL_TREE,
+ DECL_ATTRIBUTES (decl));
+ }
+}
+
+static tree
+scan_omp_target_region_r (tree *tp, int *walk_subtrees, void *data)
+{
+ target_region_data *tgtdata = (target_region_data *) data;
+
+ if (TREE_CODE (*tp) == FUNCTION_DECL
+ && !(fndecl_built_in_p (*tp, BUILT_IN_OMP_GET_THREAD_NUM)
+ || fndecl_built_in_p (*tp, BUILT_IN_OMP_GET_NUM_THREADS)
+ || fndecl_built_in_p (*tp, BUILT_IN_OMP_GET_TEAM_NUM)
+ || fndecl_built_in_p (*tp, BUILT_IN_OMP_GET_NUM_TEAMS)
+ || id_equal (DECL_NAME (*tp), "omp_get_thread_num")
+ || id_equal (DECL_NAME (*tp), "omp_get_num_threads")
+ || id_equal (DECL_NAME (*tp), "omp_get_team_num")
+ || id_equal (DECL_NAME (*tp), "omp_get_num_teams"))
+ && *tp != tgtdata->func_decl)
+ {
+ tree decl = *tp;
+ symtab_node *node = symtab_node::get (*tp);
+ if (node)
+ {
+ node = node->ultimate_alias_target ();
+ decl = node->decl;
+ }
+
+ if (!DECL_EXTERNAL (decl) && DECL_SAVED_TREE (decl))
+ {
+ scan_fndecl_for_ompacc (decl, tgtdata);
+ }
+ else
+ {
+ tgtdata->warning_msgs.safe_push ("referencing external function");
+ tgtdata->warning_locs.safe_push (EXPR_LOCATION (*tp));
+ tgtdata->ompacc_invalid = true;
+ }
+ *walk_subtrees = 0;
+ return NULL_TREE;
+ }
+
+ switch (TREE_CODE (*tp))
+ {
+ case OMP_FOR:
+ if (!ompacc_supported_clauses_p (OMP_CLAUSES (*tp)))
+ {
+ tgtdata->ompacc_invalid = true;
+ tgtdata->warning_msgs.safe_push ("clauses not supported");
+ tgtdata->warning_locs.safe_push (EXPR_LOCATION (*tp));
+ }
+ else if (OMP_FOR_NON_RECTANGULAR (*tp))
+ {
+ tgtdata->ompacc_invalid = true;
+ tgtdata->warning_msgs.safe_push ("non-rectangular loops not supported");
+ tgtdata->warning_locs.safe_push (EXPR_LOCATION (*tp));
+ }
+ else
+ tgtdata->has_omp_for = true;
+ break;
+
+ case OMP_PARALLEL:
+ if (!ompacc_supported_clauses_p (OMP_CLAUSES (*tp)))
+ {
+ tgtdata->ompacc_invalid = true;
+ tgtdata->warning_msgs.safe_push ("clauses not supported");
+ tgtdata->warning_locs.safe_push (EXPR_LOCATION (*tp));
+ }
+ else
+ tgtdata->has_omp_parallel = true;
+ break;
+
+ case OMP_DISTRIBUTE:
+ case OMP_TEAMS:
+ if (!ompacc_supported_clauses_p (OMP_CLAUSES (*tp)))
+ {
+ tgtdata->ompacc_invalid = true;
+ tgtdata->warning_msgs.safe_push ("clauses not supported");
+ tgtdata->warning_locs.safe_push (EXPR_LOCATION (*tp));
+ }
+ /* Fallthru. */
+
+ case OMP_ATOMIC:
+ case OMP_ATOMIC_READ:
+ case OMP_ATOMIC_CAPTURE_OLD:
+ case OMP_ATOMIC_CAPTURE_NEW:
+ break;
+
+ case OMP_SIMD:
+ case OMP_TASK:
+ case OMP_LOOP:
+ case OMP_TASKLOOP:
+ case OMP_TASKGROUP:
+ case OMP_SECTION:
+ case OMP_MASTER:
+ case OMP_MASKED:
+ case OMP_ORDERED:
+ case OMP_CRITICAL:
+ case OMP_SCAN:
+ case OMP_METADIRECTIVE:
+ tgtdata->ompacc_invalid = true;
+ tgtdata->warning_msgs.safe_push ("construct not supported");
+ tgtdata->warning_locs.safe_push (EXPR_LOCATION (*tp));
+ *walk_subtrees = 0;
+ break;
+
+ case OMP_TARGET:
+ tgtdata->ompacc_invalid = true;
+ tgtdata->warning_msgs.safe_push ("nested target/reverse offload "
+ "not supported");
+ tgtdata->warning_locs.safe_push (EXPR_LOCATION (*tp));
+ *walk_subtrees = 0;
+ break;
+
+ default:
+ break;
+ }
+ return NULL_TREE;
+}
+
+static tree
+scan_omp_target_construct_r (tree *tp, int *walk_subtrees,
+ void *data)
+{
+ if (TREE_CODE (*tp) == OMP_TARGET)
+ {
+ target_region_data td;
+ td.func_decl = (tree) data;
+ walk_tree_without_duplicates (&OMP_TARGET_BODY (*tp),
+ scan_omp_target_region_r, &td);
+ for (tree c = OMP_TARGET_CLAUSES (*tp); c; c = OMP_CLAUSE_CHAIN (c))
+ {
+ switch (OMP_CLAUSE_CODE (c))
+ {
+ case OMP_CLAUSE_MAP:
+ continue;
+ default:
+ td.ompacc_invalid = true;
+ td.warning_msgs.safe_push ("clause not supported");
+ td.warning_locs.safe_push (EXPR_LOCATION (c));
+ break;
+ }
+ break;
+ }
+ if (!td.ompacc_invalid)
+ {
+ tree c = build_omp_clause (EXPR_LOCATION (*tp), OMP_CLAUSE__OMPACC_);
+ if (!td.has_omp_parallel)
+ OMP_CLAUSE__OMPACC__SEQ (c) = 1;
+ OMP_CLAUSE_CHAIN (c) = OMP_TARGET_CLAUSES (*tp);
+ OMP_TARGET_CLAUSES (*tp) = c;
+ }
+ else
+ {
+ warning_at (EXPR_LOCATION (*tp), 0, "Target region not suitable for "
+ "OMPACC mode");
+ for (unsigned i = 0; i < td.warning_locs.length (); i++)
+ warning_at (td.warning_locs[i], 0, td.warning_msgs[i]);
+ }
+ *walk_subtrees = 0;
+ }
+ return NULL_TREE;
+}
+
+void
+omp_ompacc_attribute_tagging (void)
+{
+ cgraph_node *node;
+ FOR_EACH_DEFINED_FUNCTION (node)
+ if (DECL_SAVED_TREE (node->decl))
+ {
+ if (DECL_STRUCT_FUNCTION (node->decl)
+ && DECL_STRUCT_FUNCTION (node->decl)->has_omp_target)
+ walk_tree_without_duplicates (&DECL_SAVED_TREE (node->decl),
+ scan_omp_target_construct_r,
+ node->decl);
+
+ for (cgraph_node *cgn = first_nested_function (node);
+ cgn; cgn = next_nested_function (cgn))
+ if (omp_declare_target_fn_p (cgn->decl))
+ {
+ scan_fndecl_for_ompacc (cgn->decl, NULL);
+
+ if (lookup_attribute ("ompacc", DECL_ATTRIBUTES (cgn->decl))
+ && !lookup_attribute ("noinline", DECL_ATTRIBUTES (cgn->decl)))
+ {
+ DECL_ATTRIBUTES (cgn->decl)
+ = tree_cons (get_identifier ("noinline"),
+ NULL, DECL_ATTRIBUTES (cgn->decl));
+ DECL_ATTRIBUTES (cgn->decl)
+ = tree_cons (get_identifier ("noipa"),
+ NULL, DECL_ATTRIBUTES (cgn->decl));
+ }
+ }
+ }
+}
/* Create new symbols containing (address, size) pairs for global variables,
marked with "omp declare target" attribute, as well as addresses for the
@@ -480,6 +743,22 @@ omp_finish_file (void)
static tree
oacc_dim_call (bool pos, int dim, gimple_seq *seq)
{
+ if (flag_openmp && flag_openmp_target == OMP_TARGET_MODE_OMPACC)
+ {
+ enum built_in_function fn;
+ if (dim == GOMP_DIM_VECTOR)
+ fn = pos ? BUILT_IN_OMP_GET_THREAD_NUM : BUILT_IN_OMP_GET_NUM_THREADS;
+ else if (dim == GOMP_DIM_GANG)
+ fn = pos ? BUILT_IN_OMP_GET_TEAM_NUM : BUILT_IN_OMP_GET_NUM_TEAMS;
+ else
+ gcc_unreachable ();
+ tree size = create_tmp_var (integer_type_node);
+ gimple *call = gimple_build_call (builtin_decl_explicit (fn), 0);
+ gimple_call_set_lhs (call, size);
+ gimple_seq_add_stmt (seq, call);
+ return size;
+ }
+
tree arg = build_int_cst (unsigned_type_node, dim);
tree size = create_tmp_var (integer_type_node);
enum internal_fn fn = pos ? IFN_GOACC_DIM_POS : IFN_GOACC_DIM_SIZE;
@@ -2776,15 +3055,19 @@ execute_oacc_loop_designation ()
static unsigned int
execute_oacc_device_lower ()
{
- tree attrs = oacc_get_fn_attrib (current_function_decl);
+ tree attrs;
+ int dims[GOMP_DIM_MAX];
- if (!attrs)
- /* Not an offloaded function. */
- return 0;
+ if (flag_openacc)
+ {
+ attrs = oacc_get_fn_attrib (current_function_decl);
+ if (!attrs)
+ /* Not an offloaded function. */
+ return 0;
- int dims[GOMP_DIM_MAX];
- for (unsigned i = 0; i < GOMP_DIM_MAX; i++)
- dims[i] = oacc_get_fn_dim_size (current_function_decl, i);
+ for (unsigned i = 0; i < GOMP_DIM_MAX; i++)
+ dims[i] = oacc_get_fn_dim_size (current_function_decl, i);
+ }
hash_map<tree, tree> adjusted_vars;
@@ -2853,7 +3136,8 @@ execute_oacc_device_lower ()
case IFN_UNIQUE_OACC_FORK:
case IFN_UNIQUE_OACC_JOIN:
- if (integer_minus_onep (gimple_call_arg (call, 2)))
+ if (flag_openacc
+ && integer_minus_onep (gimple_call_arg (call, 2)))
remove = true;
else if (!targetm.goacc.fork_join
(call, dims, kind == IFN_UNIQUE_OACC_FORK))
@@ -3150,7 +3434,8 @@ public:
/* TODO If this were gated on something like '!(fun->curr_properties &
PROP_gimple_oaccdevlow)', then we could easily have several instances
in the pass pipeline? */
- virtual bool gate (function *) { return flag_openacc; };
+ virtual bool gate (function *)
+ { return flag_openacc || (flag_openmp && flag_openmp_target == OMP_TARGET_MODE_OMPACC); };
virtual unsigned int execute (function *)
{