aboutsummaryrefslogtreecommitdiff
path: root/gcc/omp-low.cc
diff options
context:
space:
mode:
authorChung-Lin Tang <cltang@codesourcery.com>2023-05-19 12:14:04 -0700
committerChung-Lin Tang <cltang@codesourcery.com>2023-05-19 12:14:04 -0700
commit5f881613fa9128edae5bbfa4e19f9752809e4bd7 (patch)
tree9677f855effa09243c00f6530cb3b5b03b70ffdd /gcc/omp-low.cc
parent17c41b39078fc8ad67fd1b82f74ef5174f34452e (diff)
downloadgcc-devel/omp/gcc-12.zip
gcc-devel/omp/gcc-12.tar.gz
gcc-devel/omp/gcc-12.tar.bz2
Use OpenACC code to process OpenMP target regionsdevel/omp/gcc-12
This is a backport of: https://gcc.gnu.org/pipermail/gcc-patches/2023-May/619003.html This patch implements '-fopenmp-target=acc', which enables internally handling a subset of OpenMP target regions as OpenACC parallel regions. This basically includes target, teams, parallel, distribute, for/do constructs, and atomics. Essentially, we adjust the internal kinds to OpenACC type, and let OpenACC code paths handle them, with various needed adjustments throughout middle-end and nvptx backend. When using this "OMPACC" mode, if there are cases the patch doesn't handle, it issues a warning, and reverts to normal processing for that target region. gcc/ChangeLog: * builtins.cc (expand_builtin_omp_builtins): New function. (expand_builtin): Add expand cases for BUILT_IN_GOMP_BARRIER, BUILT_IN_OMP_GET_THREAD_NUM, BUILT_IN_OMP_GET_NUM_THREADS, BUILT_IN_OMP_GET_TEAM_NUM, and BUILT_IN_OMP_GET_NUM_TEAMS using expand_builtin_omp_builtins, enabled under -fopenmp-target=acc. * cgraphunit.cc (analyze_functions): Add call to omp_ompacc_attribute_tagging, enabled under -fopenmp-target=acc. * common.opt (fopenmp-target=): Add new option and enums. * config/nvptx/mkoffload.cc (main): Handle -fopenmp-target=. * config/nvptx/nvptx-protos.h (nvptx_expand_omp_get_num_threads): New prototype. (nvptx_mem_shared_p): Likewise. * config/nvptx/nvptx.cc (omp_num_threads_sym): New global static RTX symbol for number of threads in team. (omp_num_threads_align): New var for alignment of omp_num_threads_sym. (need_omp_num_threads): New bool for if any function references omp_num_threads_sym. (nvptx_option_override): Initialize omp_num_threads_sym/align. (write_as_kernel): Disable normal OpenMP kernel entry under OMPACC mode. (nvptx_declare_function_name): Disable shim function under OMPACC mode. Disable soft-stack under OMPACC mode. Add generation of neutering init code under OMPACC mode. (nvptx_output_set_softstack): Return "" under OMPACC mode. (nvptx_expand_call): Set parallelism to vector for function calls with "ompacc for" attached. (nvptx_expand_oacc_fork): Set mode to GOMP_DIM_VECTOR under OMPACC mode. (nvptx_expand_oacc_join): Likewise. (nvptx_expand_omp_get_num_threads): New function. (nvptx_mem_shared_p): New function. (nvptx_mach_max_workers): Return 1 under OMPACC mode. (nvptx_mach_vector_length): Return 32 under OMPACC mode. (nvptx_single): Add adjustments for OMPACC mode, which have parallel-construct fork/joins, and regions of code where neutering is dynamically determined. (nvptx_reorg): Enable neutering under OMPACC mode when "ompacc for" attribute is attached to function. Disable uniform-simt when under OMPACC mode. (nvptx_file_end): Write __nvptx_omp_num_threads out when needed. (nvptx_goacc_fork_join): Return true under OMPACC mode. * config/nvptx/nvptx.h (struct GTY(()) machine_function): Add omp_parallel_predicate and omp_fn_entry_num_threads_reg fields. * config/nvptx/nvptx.md (unspecv): Add UNSPECV_GET_TID, UNSPECV_GET_NTID, UNSPECV_GET_CTAID, UNSPECV_GET_NCTAID, UNSPECV_OMP_PARALLEL_FORK, UNSPECV_OMP_PARALLEL_JOIN entries. (nvptx_shared_mem_operand): New predicate. (gomp_barrier): New expand pattern. (omp_get_num_threads): New expand pattern. (omp_get_num_teams): New insn pattern. (omp_get_thread_num): Likewise. (omp_get_team_num): Likewise. (get_ntid): Likewise. (nvptx_omp_parallel_fork): Likewise. (nvptx_omp_parallel_join): Likewise. * flag-types.h (omp_target_mode_kind): New flag value enum. * gimplify.cc (struct gimplify_omp_ctx): Add 'bool ompacc' field. (gimplify_scan_omp_clauses): Handle OMP_CLAUSE__OMPACC_. (gimplify_adjust_omp_clauses): Likewise. (gimplify_omp_ctx_ompacc_p): New function. (gimplify_omp_for): Handle combined loops under OMPACC. * lto-wrapper.cc (append_compiler_options): Add OPT_fopenmp_target_. * omp-builtins.def (BUILT_IN_OMP_GET_THREAD_NUM): Remove CONST. (BUILT_IN_OMP_GET_NUM_THREADS): Likewise. * omp-expand.cc (remove_exit_barrier): Disable addressable-var processing for parallel construct child functions under OMPACC mode. (expand_oacc_for): Add OMPACC mode handling. (get_target_arguments): Force thread_limit clause value to 1 under OMPACC mode. (expand_omp): Under OMPACC mode, avoid child function expanding of GIMPLE_OMP_PARALLEL. * omp-general.cc (omp_extract_for_data): Adjustments for OMPACC mode. * omp-low.cc (struct omp_context): Add 'bool ompacc_p' field. (scan_sharing_clauses): Handle OMP_CLAUSE__OMPACC_. (ompacc_ctx_p): New function. (scan_omp_parallel): Handle OMPACC mode, avoid creating child function. (scan_omp_target): Tag "ompacc"/"ompacc for" attributes for target construct child function, remove OMP_CLAUSE__OMPACC_ clauses. (lower_oacc_head_mark): Handle OMPACC mode cases. (lower_omp_for): Adjust OMP_FOR kind from OpenMP to OpenACC kinds, add vector/gang clauses as needed. Add other OMPACC handling. (lower_omp_taskreg): Add call to lower_oacc_head_tail for OMPACC case. (lower_omp_target): Do OpenACC gang privatization under OMPACC case. (lower_omp_teams): Forward OpenACC privatization variables to outer target region under OMPACC mode. (lower_omp_1): Do OpenACC gang privatization under OMPACC case for GIMPLE_BIND. * omp-offload.cc (ompacc_supported_clauses_p): New function. (struct target_region_data): New struct type for tree walk. (scan_fndecl_for_ompacc): New function. (scan_omp_target_region_r): New function. (scan_omp_target_construct_r): New function. (omp_ompacc_attribute_tagging): New function. (oacc_dim_call): Add OMPACC case handling. (execute_oacc_device_lower): Make parts explicitly only OpenACC enabled. (pass_oacc_device_lower::gate): Enable pass under OMPACC mode. * omp-offload.h (omp_ompacc_attribute_tagging): New prototype. * opts.cc (finish_options): Only allow -fopenmp-target= when -fopenmp and no -fopenacc. * target-insns.def (gomp_barrier): New defined insn pattern. (omp_get_thread_num): Likewise. (omp_get_num_threads): Likewise. (omp_get_team_num): Likewise. (omp_get_num_teams): Likewise. * tree-core.h (enum omp_clause_code): Add new OMP_CLAUSE__OMPACC_ entry for internal clause. * tree-nested.cc (convert_nonlocal_omp_clauses): Handle OMP_CLAUSE__OMPACC_. * tree-pretty-print.cc (dump_omp_clause): Handle OMP_CLAUSE__OMPACC_. * tree.cc (omp_clause_num_ops): Add OMP_CLAUSE__OMPACC_ entry. (omp_clause_code_name): Likewise. * tree.h (OMP_CLAUSE__OMPACC__FOR): New macro for OMP_CLAUSE__OMPACC_. * tree-ssa-loop.cc (pass_oacc_only::gate): Enable pass under OMPACC mode cases. libgomp/ChangeLog: * config/nvptx/team.c (__nvptx_omp_num_threads): New global variable in shared memory.
Diffstat (limited to 'gcc/omp-low.cc')
-rw-r--r--gcc/omp-low.cc150
1 files changed, 135 insertions, 15 deletions
diff --git a/gcc/omp-low.cc b/gcc/omp-low.cc
index bb4d148..9a569df 100644
--- a/gcc/omp-low.cc
+++ b/gcc/omp-low.cc
@@ -187,6 +187,10 @@ struct omp_context
than teams is strictly nested in it. */
bool nonteams_nested_p;
+ /* Indicates that context is in OMPACC mode, set after _ompacc_ internal
+ clauses are removed. */
+ bool ompacc_p;
+
/* Candidates for adjusting OpenACC privatization level. */
vec<tree> oacc_privatization_candidates;
@@ -2039,6 +2043,7 @@ scan_sharing_clauses (tree clauses, omp_context *ctx,
case OMP_CLAUSE_TASK_REDUCTION:
case OMP_CLAUSE_ALLOCATE:
case OMP_CLAUSE_ALLOCATOR:
+ case OMP_CLAUSE__OMPACC_:
break;
case OMP_CLAUSE_ALIGNED:
@@ -2263,6 +2268,7 @@ scan_sharing_clauses (tree clauses, omp_context *ctx,
case OMP_CLAUSE_FILTER:
case OMP_CLAUSE__CONDTEMP_:
case OMP_CLAUSE_ALLOCATOR:
+ case OMP_CLAUSE__OMPACC_:
break;
case OMP_CLAUSE__CACHE_:
@@ -2332,6 +2338,21 @@ omp_maybe_offloaded_ctx (omp_context *ctx)
return false;
}
+static bool
+ompacc_ctx_p (omp_context *ctx)
+{
+ if (cgraph_node::get (current_function_decl)->offloadable
+ && lookup_attribute ("ompacc",
+ DECL_ATTRIBUTES (current_function_decl)))
+ return true;
+ for (; ctx; ctx = ctx->outer)
+ if (is_gimple_omp_offloaded (ctx->stmt))
+ return (ctx->ompacc_p
+ || omp_find_clause (gimple_omp_target_clauses (ctx->stmt),
+ OMP_CLAUSE__OMPACC_));
+ return false;
+}
+
/* Build a decl for the omp child function. It'll not contain a body
yet, just the bare decl. */
@@ -2641,8 +2662,28 @@ scan_omp_parallel (gimple_stmt_iterator *gsi, omp_context *outer_ctx)
DECL_NAMELESS (name) = 1;
TYPE_NAME (ctx->record_type) = name;
TYPE_ARTIFICIAL (ctx->record_type) = 1;
- create_omp_child_function (ctx, false);
- gimple_omp_parallel_set_child_fn (stmt, ctx->cb.dst_fn);
+
+ if (flag_openmp_target == OMP_TARGET_MODE_OMPACC
+ && ompacc_ctx_p (ctx))
+ {
+ tree data_name = get_identifier (".omp_data_i_par");
+ tree t = build_decl (gimple_location (stmt), VAR_DECL, data_name,
+ ptr_type_node);
+ DECL_ARTIFICIAL (t) = 1;
+ DECL_NAMELESS (t) = 1;
+ DECL_CONTEXT (t) = current_function_decl;
+ DECL_SEEN_IN_BIND_EXPR_P (t) = 1;
+ DECL_CHAIN (t) = ctx->block_vars;
+ ctx->block_vars = t;
+ TREE_USED (t) = 1;
+ TREE_READONLY (t) = 1;
+ ctx->receiver_decl = t;
+ }
+ else
+ {
+ create_omp_child_function (ctx, false);
+ gimple_omp_parallel_set_child_fn (stmt, ctx->cb.dst_fn);
+ }
scan_sharing_clauses (gimple_omp_parallel_clauses (stmt), ctx);
scan_omp (gimple_omp_body_ptr (stmt), ctx);
@@ -3565,6 +3606,24 @@ scan_omp_target (gomp_target *stmt, omp_context *outer_ctx)
scan_sharing_clauses (clauses, ctx, base_pointers_restrict);
scan_omp (gimple_omp_body_ptr (stmt), ctx);
+ if (offloaded && flag_openmp_target == OMP_TARGET_MODE_OMPACC)
+ {
+ for (tree *cp = gimple_omp_target_clauses_ptr (stmt); *cp;
+ cp = &OMP_CLAUSE_CHAIN (*cp))
+ if (OMP_CLAUSE_CODE (*cp) == OMP_CLAUSE__OMPACC_)
+ {
+ DECL_ATTRIBUTES (gimple_omp_target_child_fn (stmt))
+ = tree_cons (get_identifier ("ompacc"), NULL_TREE,
+ DECL_ATTRIBUTES (gimple_omp_target_child_fn (stmt)));
+ /* Unlink and remove. */
+ *cp = OMP_CLAUSE_CHAIN (*cp);
+
+ /* Set to true. */
+ ctx->ompacc_p = true;
+ break;
+ }
+ }
+
if (TYPE_FIELDS (ctx->record_type) == NULL)
ctx->record_type = ctx->receiver_decl = NULL;
else
@@ -8947,6 +9006,9 @@ lower_oacc_head_mark (location_t loc, tree ddvar, tree clauses,
gcc_unreachable ();
else if (is_oacc_kernels_decomposed_part (tgt))
;
+ else if (flag_openmp_target == OMP_TARGET_MODE_OMPACC
+ && is_omp_target (tgt->stmt))
+ ;
else
gcc_unreachable ();
@@ -8975,7 +9037,13 @@ lower_oacc_head_mark (location_t loc, tree ddvar, tree clauses,
!= GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GRAPHITE);
}
- if (tag & OLF_TILE)
+ if (flag_openmp_target == OMP_TARGET_MODE_OMPACC
+ && gimple_code (ctx->stmt) == GIMPLE_OMP_PARALLEL
+ && tgt
+ && ompacc_ctx_p (tgt))
+ levels = 1;
+ else
+ if (tag & OLF_TILE)
/* Tiling could use all 3 levels. */
levels = 3;
else
@@ -12460,6 +12528,23 @@ lower_omp_for (gimple_stmt_iterator *gsi_p, omp_context *ctx)
push_gimplify_context ();
+ if (flag_openmp_target == OMP_TARGET_MODE_OMPACC && ompacc_ctx_p (ctx))
+ {
+ enum omp_clause_code code = OMP_CLAUSE_ERROR;
+ if (gimple_omp_for_kind (stmt) == GF_OMP_FOR_KIND_FOR)
+ code = OMP_CLAUSE_VECTOR;
+ else if (gimple_omp_for_kind (stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
+ code = OMP_CLAUSE_GANG;
+ if (code)
+ {
+ /* Adjust into OACC loop kind with vector/gang clause. */
+ gimple_omp_for_set_kind (stmt, GF_OMP_FOR_KIND_OACC_LOOP);
+ tree c = build_omp_clause (UNKNOWN_LOCATION, code);
+ OMP_CLAUSE_CHAIN (c) = gimple_omp_for_clauses (stmt);
+ gimple_omp_for_set_clauses (stmt, c);
+ }
+ }
+
if (is_gimple_omp_oacc (ctx->stmt))
oacc_privatization_scan_clause_chain (ctx, gimple_omp_for_clauses (stmt));
@@ -12481,7 +12566,9 @@ lower_omp_for (gimple_stmt_iterator *gsi_p, omp_context *ctx)
gbind *inner_bind
= as_a <gbind *> (gimple_seq_first_stmt (omp_for_body));
tree vars = gimple_bind_vars (inner_bind);
- if (is_gimple_omp_oacc (ctx->stmt))
+ if (is_gimple_omp_oacc (ctx->stmt)
+ || (flag_openmp_target == OMP_TARGET_MODE_OMPACC
+ && ompacc_ctx_p (ctx)))
oacc_privatization_scan_decl_chain (ctx, vars);
gimple_bind_append_vars (new_stmt, vars);
/* bind_vars/BLOCK_VARS are being moved to new_stmt/block, don't
@@ -12597,7 +12684,8 @@ lower_omp_for (gimple_stmt_iterator *gsi_p, omp_context *ctx)
lower_omp (gimple_omp_body_ptr (stmt), ctx);
gcall *private_marker = NULL;
- if (is_gimple_omp_oacc (ctx->stmt)
+ if ((is_gimple_omp_oacc (ctx->stmt)
+ || (flag_openmp_target == OMP_TARGET_MODE_OMPACC && ompacc_ctx_p (ctx)))
&& !gimple_seq_empty_p (omp_for_body))
private_marker = lower_oacc_private_marker (ctx);
@@ -12652,15 +12740,16 @@ lower_omp_for (gimple_stmt_iterator *gsi_p, omp_context *ctx)
/* Once lowered, extract the bounds and clauses. */
omp_extract_for_data (stmt, &fd, NULL);
- bool oacc_kernels_parloops = false;
- if (param_openacc_kernels == OPENACC_KERNELS_DECOMPOSE_PARLOOPS
- || param_openacc_kernels == OPENACC_KERNELS_PARLOOPS)
- oacc_kernels_parloops = ctx_in_oacc_kernels_region (ctx);
- if (is_gimple_omp_oacc (ctx->stmt) && !oacc_kernels_parloops)
+ if (flag_openacc)
{
- lower_oacc_head_tail (gimple_location (stmt),
- gimple_omp_for_clauses (stmt), private_marker,
- NULL, NULL, &oacc_head, &oacc_tail, ctx);
+ bool oacc_kernels_parloops = false;
+ if (param_openacc_kernels == OPENACC_KERNELS_DECOMPOSE_PARLOOPS
+ || param_openacc_kernels == OPENACC_KERNELS_PARLOOPS)
+ oacc_kernels_parloops = ctx_in_oacc_kernels_region (ctx);
+ if (is_gimple_omp_oacc (ctx->stmt) && !oacc_kernels_parloops)
+ lower_oacc_head_tail (gimple_location (stmt),
+ gimple_omp_for_clauses (stmt), private_marker,
+ NULL, NULL, &oacc_head, &oacc_tail, ctx);
}
/* Add OpenACC partitioning and reduction markers just before the loop. */
@@ -13447,9 +13536,20 @@ lower_omp_taskreg (gimple_stmt_iterator *gsi_p, omp_context *ctx)
bind = gimple_build_bind (NULL, NULL, make_node (BLOCK));
else
bind = gimple_build_bind (NULL, NULL, gimple_bind_block (par_bind));
+
+ gimple_seq oacc_head = NULL, oacc_tail = NULL;
+ if (flag_openmp_target == OMP_TARGET_MODE_OMPACC
+ && gimple_code (stmt) == GIMPLE_OMP_PARALLEL
+ && ompacc_ctx_p (ctx))
+ lower_oacc_head_tail (gimple_location (stmt), clauses,
+ NULL, NULL, NULL, &oacc_head, &oacc_tail,
+ ctx);
+
gsi_replace (gsi_p, dep_bind ? dep_bind : bind, true);
gimple_bind_add_seq (bind, ilist);
+ gimple_bind_add_seq (bind, oacc_head);
gimple_bind_add_stmt (bind, stmt);
+ gimple_bind_add_seq (bind, oacc_tail);
gimple_bind_add_seq (bind, olist);
pop_gimplify_context (NULL);
@@ -15320,7 +15420,9 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx)
gimple_seq fork_seq = NULL;
gimple_seq join_seq = NULL;
- if (offloaded && is_gimple_omp_oacc (ctx->stmt))
+ if (offloaded && (is_gimple_omp_oacc (ctx->stmt)
+ || (flag_openmp_target == OMP_TARGET_MODE_OMPACC
+ && ompacc_ctx_p (ctx))))
{
/* If there are reductions on the offloaded region itself, treat
them as a dummy GANG loop. */
@@ -15456,6 +15558,22 @@ lower_omp_teams (gimple_stmt_iterator *gsi_p, omp_context *ctx)
lower_omp (gimple_omp_body_ptr (teams_stmt), ctx);
lower_reduction_clauses (gimple_omp_teams_clauses (teams_stmt), &olist,
NULL, ctx);
+
+ if (flag_openmp_target == OMP_TARGET_MODE_OMPACC && ompacc_ctx_p (ctx))
+ {
+ /* Forward the team/gang-wide variables to outer target region. */
+ struct omp_context *tgt = ctx;
+ while (tgt && !is_gimple_omp_offloaded (tgt->stmt))
+ tgt = tgt->outer;
+ if (tgt)
+ {
+ int i;
+ tree decl;
+ FOR_EACH_VEC_ELT (ctx->oacc_privatization_candidates, i, decl)
+ tgt->oacc_privatization_candidates.safe_push (decl);
+ }
+ }
+
gimple_seq_add_stmt (&bind_body, teams_stmt);
gimple_seq_add_seq (&bind_body, gimple_omp_body (teams_stmt));
@@ -15620,7 +15738,9 @@ lower_omp_1 (gimple_stmt_iterator *gsi_p, omp_context *ctx)
ctx);
break;
case GIMPLE_BIND:
- if (ctx && is_gimple_omp_oacc (ctx->stmt))
+ if (ctx && (is_gimple_omp_oacc (ctx->stmt)
+ || (flag_openmp_target == OMP_TARGET_MODE_OMPACC
+ && ompacc_ctx_p (ctx))))
{
tree vars = gimple_bind_vars (as_a <gbind *> (stmt));
oacc_privatization_scan_decl_chain (ctx, vars);