diff options
34 files changed, 2629 insertions, 943 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 8da8006..30f66bd 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,157 @@ +2006-01-19 Diego Novillo <dnovillo@redhat.com> + + * tree-pretty-print.c (dump_generic_node): Handle + OMP_PARALLEL_FN, OMP_PARALLEL_DATA_ARG and OMP_RETURN_EXPR. + * cgraph.c (cgraph_expand_queue): Rename from + cgraph_analyze_queue. Update all users. + * cgraphunit.c (cgraph_assemble_pending_functions): Process + cgraph_expand_queue. + (cgraph_expand_all_functions): Likewise. + (cgraph_finalize_pending_functions): Remove. Update callers. + + * tree.h (OMP_DIRECTIVE_P): Define. + (OMP_PARALLEL_FN): Define. + (OMP_PARALLEL_DATA_ARG): Define. + (OMP_SECTIONS_SECTIONS): Define. + * tree-pass.h (pass_expand_omp): Declare. + * omp-low.c (struct omp_region): Declare. + (struct omp_context): Remove fields 'parallel_type', + 'parallel_start_ix' and 'parallel_start_additional_args'. + Update all users. + (struct omp_for_data): Rename from struct expand_omp_for_data. + (omp_regions): New static variable. + (root_omp_region): New static variable. + (find_omp_clause): Make static. + (is_in_combined_parallel_ctx): Remove. + (is_combined_parallel): New. + (extract_omp_for_data): Move earlier in the file. + (workshare_safe_to_combine_p): New. + (get_ws_args_for): New. + (determine_parallel_type): Move earlier in the file. + (omp_copy_decl_2): Do not set DECL_CONTEXT of new local to the + child function. + (omp_copy_decl): Likewise. + (create_omp_child_function): Likewise. + (lookup_omp_region): New. + (dump_omp_region): New. + (debug_omp_region): New. + (debug_all_omp_regions): New. + (new_omp_region): New. + (scan_omp_parallel): If parallel_nesting_level > 1, the + directive is nested within another parallel directive. + Set OMP_PARALLEL_FN. + (scan_omp_for): Do not try to handle combined parallel+for + cases. + Remove FIXME comment. + (scan_omp_nested): Remove. + (scan_omp_1): Do not call scan_omp_nested when + parallel_nesting_level is > 1. + Do not change the DECL_CONTEXT of local variables found. + (lookup_decl_in_outer_ctx): New. + (lower_rec_input_clauses): Rename from expand_rec_input_clauses. + (lower_lastprivate_clauses): Rename from expand_lastprivate_clauses. + (lower_reduction_clauses): Rename from expand_reduction_clauses. + (lower_copyprivate_clauses): Rename from expand_copyprivate_clauses. + If CTX is nested, lookup VAR in the outer context when + building copy assignment. + (lower_send_clauses): Rename from expand_send_clauses. + If CTX is nested, lookup VAR in the outer context when + building copy assignments. + (lower_send_shared_vars): Rename from expand_send_shared_vars. + If CTX is nested, lookup VAR in the outer context when + building copy assignments. + (expand_parallel_call): Rename from build_parallel_call. + Handle combined parallel+workshare cases. + Re-implement to emit code into the CFG. + (list2chain): New. + (expand_omp_parallel): Re-implement to emit code into the CFG. + Call move_sese_region_to_fn to outline the sub-graph + containing the parallel region. + (expand_omp_for_1): Remove. + (expand_omp_for_generic): Re-implement to emit code into the + CFG. + (expand_omp_for_static_nochunk): Likewise. + (expand_omp_for_static_chunk): Likewise. + (expand_omp_for): Likewise. + (expand_omp_sections): Likewise. + (remove_exit_barriers): New. + (expand_omp_synch): New. + (expand_omp): New. + (build_omp_regions_1): New. + (build_omp_regions): New. + (execute_expand_omp): New. + (gate_expand_omp): New. + (pass_expand_omp): Define. + (lower_omp_sections): Rename from expand_omp_sections. + Set OMP_SECTIONS_SECTIONS. + (lower_omp_single_simple): Rename from expand_omp_single_simple. + (lower_omp_single_copy): Rename from expand_omp_single_copy. + (lower_omp_single): Rename from expand_omp_simple. + (lower_omp_master): Rename from expand_omp_master. + (lower_omp_ordered): Rename from expand_omp_ordered. + (lower_omp_critical): Rename from expand_omp_critical. + (lower_omp_for_lastprivate): Rename from expand_omp_for_lastprivate. + (lower_omp_for): Re-implement. + (lower_omp_parallel): Re-implement. + (lower_regimplify): Rename from expand_regimplify. + (lower_omp_1): Rename from expand_omp_1. + If there are syntax errors in the program, replace every + OpenMP directive with NOP. + Call lower_omp_* instead of expand_omp_*. + (lower_omp): Rename from expand_omp. + + * tree-gimple.c (is_gimple_stmt): Handle OMP_RETURN_EXPR. + * tree-gimple.h (enum omp_parallel_type): Remove. + (gimple_boolify): Declare extern. + (find_omp_clause, determine_parallel_type): Remove. + + * gimple-low.c (lower_omp_directive): New. + (lower_stmt): Call it. + (record_vars_into): Move from ... + (record_vars): ... here. + Call record_vars_into with current_function_decl. + + * gimplify.c (struct gimplify_ctx): Remove fields + combined_pre_p and combined_ctxp. Update users. + (get_formal_tmp_var): Add documentation. + (gimple_boolify): Make extern. + (gimplify_expr_in_ctx): Remove. Update callers. + (gimplify_omp_parallel): Do not assume that OMP_PARALLEL_BODY + will always be a BIND_EXPR. + (gimplify_expr): Handle OMP_RETURN_EXPR. + * tree.def (BLOCK): Remove documentation about BLOCK_TYPE_TAGS. + (OMP_PARALLEL): Add 3 operands. + (OMP_SECTIONS): Add 1 operand. + (OMP_RETURN_EXPR): Define. + + * tree-inline.c (estimate_num_insns_1): Handle OpenMP directives. + (copy_tree_r): Restore TREE_CHAIN in OMP_CLAUSE_*. + * tree-iterator.c (alloc_stmt_list): Assert that we are not + creating a circular free list. + (free_stmt_list): Assert that we are not freeing stmt_list_cache. + + * tree-flow.h (move_sese_region_to_fn): Declare. + (record_vars_into): Declare. + * tree-cfg.c (make_omp_sections_edges): New. + (make_exit_edges): Handle OMP_PARALLEL, OMP_FOR, OMP_SINGLE, + OMP_MASTER, OMP_ORDERED, OMP_CRITICAL, OMP_RETURN_EXPR, + OMP_SECTIONS and OMP_SECTION. + (is_ctrl_altering_stmt): Return true for OMP_DIRECTIVE_P. + (set_bb_for_stmt): Undo change to check currently_expanding_to_rtl. + (verify_stmt): Do not handle OMP_DIRECTIVE_P. + (gather_blocks_in_sese_region): New. + (struct move_stmt_d): Declare. + (move_stmt_r): New. + (move_block_to_fn): New. + (move_sese_region_to_fn): New. + + * passes.c (init_optimization_passes): Schedule + pass_expand_omp after pass_init_datastructures. + + * tree-ssa-operands.c (get_expr_operands): Handle + OMP_PARALLEL, OMP_SECTIONS, OMP_FOR, OMP_RETURN_EXPR, + OMP_SINGLE, OMP_MASTER, OMP_ORDERED, OMP_CRITICAL. + 2006-01-19 Jeff Law <law@redhat.com> * tree-vrp.c (extract_range_from_assert): Refine the result range diff --git a/gcc/cgraph.c b/gcc/cgraph.c index 4e4add8..a05f76d 100644 --- a/gcc/cgraph.c +++ b/gcc/cgraph.c @@ -113,8 +113,10 @@ struct cgraph_node *cgraph_nodes; /* Queue of cgraph nodes scheduled to be lowered. */ struct cgraph_node *cgraph_nodes_queue; -/* Queue of cgraph nodes scheduled to be analyzed. */ -struct cgraph_node *cgraph_analyze_queue; +/* Queue of cgraph nodes scheduled to be expanded. This is a + secondary queue used during optimization to accomodate passes that + may generate new functions that need to be optimized and expanded. */ +struct cgraph_node *cgraph_expand_queue; /* Number of nodes in existence. */ int cgraph_n_nodes; @@ -1095,19 +1097,23 @@ cgraph_variable_initializer_availability (struct cgraph_varpool_node *node) } -/* Add the function FNDECL to the call graph. This assumes that the - body of FNDECL is in GENERIC form and ready to be processed by - cgraph_finalize_function. */ +/* Add the function FNDECL to the call graph. FNDECL is assumed to be + in low GIMPLE form and ready to be processed by cgraph_finalize_function. + + When operating in unit-at-a-time, a new callgraph node is added to + CGRAPH_EXPAND_QUEUE, which is processed after all the original + functions in the call graph . + + When not in unit-at-a-time, the new callgraph node is added to + CGRAPH_NODES_QUEUE for cgraph_assemble_pending_functions to + process. */ void cgraph_add_new_function (tree fndecl) { - /* We're called while lowering another function. We can't do anything - at this time without recursing. Which would cause a GC at an - inappropriate time. */ struct cgraph_node *n = cgraph_node (fndecl); - n->next_needed = cgraph_analyze_queue; - cgraph_analyze_queue = n; + n->next_needed = cgraph_expand_queue; + cgraph_expand_queue = n; } #include "gt-cgraph.h" diff --git a/gcc/cgraph.h b/gcc/cgraph.h index 600b00e..6e60f8c 100644 --- a/gcc/cgraph.h +++ b/gcc/cgraph.h @@ -152,7 +152,7 @@ struct cgraph_node GTY((chain_next ("%h.next"), chain_prev ("%h.previous"))) /* Set when function is reachable by call from other function that is either reachable or needed. */ bool reachable; - /* Set once the function is lowered (ie it's CFG is built). */ + /* Set once the function is lowered (i.e. its CFG is built). */ bool lowered; /* Set once the function has been instantiated and its callee lists created. */ @@ -239,7 +239,7 @@ extern GTY(()) int cgraph_max_uid; extern bool cgraph_global_info_ready; extern bool cgraph_function_flags_ready; extern GTY(()) struct cgraph_node *cgraph_nodes_queue; -extern GTY(()) struct cgraph_node *cgraph_analyze_queue; +extern GTY(()) struct cgraph_node *cgraph_expand_queue; extern GTY(()) struct cgraph_varpool_node *cgraph_varpool_first_unanalyzed_node; extern GTY(()) struct cgraph_varpool_node *cgraph_varpool_nodes_queue; diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c index 995bcb9..2b79429 100644 --- a/gcc/cgraphunit.c +++ b/gcc/cgraphunit.c @@ -353,8 +353,22 @@ cgraph_assemble_pending_functions (void) } } + /* Process CGRAPH_EXPAND_QUEUE, these are functions created during + the expansion process. Note that this queue may grow as its + being processed, as the new functions may generate new ones. */ + while (cgraph_expand_queue) + { + struct cgraph_node *n = cgraph_expand_queue; + cgraph_expand_queue = cgraph_expand_queue->next_needed; + n->next_needed = NULL; + cgraph_finalize_function (n->decl, false); + output = true; + } + return output; } + + /* As an GCC extension we allow redefinition of the function. The semantics when both copies of bodies differ is not well defined. We replace the old body with new body so in unit at a time mode @@ -418,20 +432,6 @@ cgraph_lower_function (struct cgraph_node *node) node->lowered = true; } -static void -cgraph_finalize_pending_functions (void) -{ - struct cgraph_node *next, *node = cgraph_analyze_queue; - - cgraph_analyze_queue = NULL; - for (; node ; node = next) - { - next = node->next_needed; - node->next_needed = NULL; - cgraph_finalize_function (node->decl, true); - } -} - /* DECL has been parsed. Take it, queue it, compile it at the whim of the logic in effect. If NESTED is true, then our caller cannot stand to have the garbage collector run at the moment. We would need to either create @@ -458,7 +458,6 @@ cgraph_finalize_function (tree decl, bool nested) if (!flag_unit_at_a_time) { cgraph_analyze_function (node); - cgraph_finalize_pending_functions (); cgraph_decide_inlining_incrementally (node, false); } @@ -982,7 +981,6 @@ cgraph_finalize_compilation_unit (void) gcc_assert (DECL_SAVED_TREE (decl)); cgraph_analyze_function (node); - cgraph_finalize_pending_functions (); for (edge = node->callees; edge; edge = edge->next_callee) if (!edge->callee->reachable) @@ -1166,7 +1164,21 @@ cgraph_expand_all_functions (void) cgraph_expand_function (node); } } + free (order); + + /* Process CGRAPH_EXPAND_QUEUE, these are functions created during + the expansion process. Note that this queue may grow as its + being processed, as the new functions may generate new ones. */ + while (cgraph_expand_queue) + { + node = cgraph_expand_queue; + cgraph_expand_queue = cgraph_expand_queue->next_needed; + node->next_needed = NULL; + node->output = 0; + node->lowered = DECL_STRUCT_FUNCTION (node->decl)->cfg != NULL; + cgraph_expand_function (node); + } } /* This is used to sort the node types by the cgraph order number. */ diff --git a/gcc/gimple-low.c b/gcc/gimple-low.c index 2a49eed..8b2581b 100644 --- a/gcc/gimple-low.c +++ b/gcc/gimple-low.c @@ -151,6 +151,32 @@ lower_stmt_body (tree expr, struct lower_data *data) lower_stmt (&tsi, data); } + +/* Lower the OpenMP directive statement pointed by TSI. DATA is + passed through the recursion. */ + +static void +lower_omp_directive (tree_stmt_iterator *tsi, struct lower_data *data) +{ + tree clause, stmt; + + stmt = tsi_stmt (*tsi); + + clause = (TREE_CODE (stmt) >= OMP_PARALLEL && TREE_CODE (stmt) <= OMP_SINGLE) + ? OMP_CLAUSES (stmt) + : NULL_TREE; + + for (; clause; clause = OMP_CLAUSE_CHAIN (clause)) + TREE_BLOCK (clause) = TREE_BLOCK (stmt); + + lower_stmt_body (OMP_BODY (stmt), data); + tsi_link_before (tsi, stmt, TSI_SAME_STMT); + tsi_link_before (tsi, OMP_BODY (stmt), TSI_SAME_STMT); + OMP_BODY (stmt) = NULL_TREE; + tsi_delink (tsi); +} + + /* Lowers statement TSI. DATA is passed through the recursion. */ static void @@ -192,8 +218,20 @@ lower_stmt (tree_stmt_iterator *tsi, struct lower_data *data) case GOTO_EXPR: case LABEL_EXPR: case SWITCH_EXPR: + case OMP_RETURN_EXPR: break; + case OMP_PARALLEL: + case OMP_FOR: + case OMP_SECTIONS: + case OMP_SECTION: + case OMP_SINGLE: + case OMP_MASTER: + case OMP_ORDERED: + case OMP_CRITICAL: + lower_omp_directive (tsi, data); + return; + default: gcc_unreachable (); } @@ -503,11 +541,16 @@ lower_return_expr (tree_stmt_iterator *tsi, struct lower_data *data) } -/* Record the variables in VARS. */ +/* Record the variables in VARS into function FN. */ void -record_vars (tree vars) +record_vars_into (tree vars, tree fn) { + struct function *saved_cfun = cfun; + + if (fn != current_function_decl) + cfun = DECL_STRUCT_FUNCTION (fn); + for (; vars; vars = TREE_CHAIN (vars)) { tree var = vars; @@ -516,6 +559,7 @@ record_vars (tree vars) we don't need to care about. */ if (TREE_CODE (var) != VAR_DECL) continue; + /* Nothing to do in this case. */ if (DECL_EXTERNAL (var)) continue; @@ -524,6 +568,18 @@ record_vars (tree vars) cfun->unexpanded_var_list = tree_cons (NULL_TREE, var, cfun->unexpanded_var_list); } + + if (fn != current_function_decl) + cfun = saved_cfun; +} + + +/* Record the variables in VARS into current_function_decl. */ + +void +record_vars (tree vars) +{ + record_vars_into (vars, current_function_decl); } diff --git a/gcc/gimplify.c b/gcc/gimplify.c index acd0468..7d7b80f 100644 --- a/gcc/gimplify.c +++ b/gcc/gimplify.c @@ -92,16 +92,6 @@ struct gimplify_ctx int conditions; bool save_stack; bool into_ssa; - - /* When gimplifying combined omp parallel directives (omp parallel - loop and omp parallel sections), any prefix code needed to setup - the associated worksharing construct needs to be emitted in the - pre-queue of its parent parallel, otherwise the lowering process - will move that code to the child function. Similarly, we need to - move up to the gimplification context of the parent parallel - directive so temporaries are declared in the right context. */ - tree *combined_pre_p; - struct gimplify_ctx *combined_ctxp; }; static struct gimplify_ctx *gimplify_ctxp; @@ -634,6 +624,10 @@ internal_get_tmp_var (tree val, tree *pre_p, tree *post_p, bool is_formal) return t; } +/* Returns a formal temporary variable initialized with VAL. PRE_P + points to a statement list where side-effects needed to compute VAL + should be stored. */ + tree get_formal_tmp_var (tree val, tree *pre_p) { @@ -2297,7 +2291,7 @@ shortcut_cond_expr (tree expr) /* EXPR is used in a boolean context; make sure it has BOOLEAN_TYPE. */ -static tree +tree gimple_boolify (tree expr) { tree type = TREE_TYPE (expr); @@ -4131,29 +4125,6 @@ gimplify_to_stmt_list (tree *stmt_p) } } -/* Gimplify *EXPR_P as if it had been used inside the gimplification - context CTX_P. The other arguments are as in gimplify_expr. */ - -static enum gimplify_status -gimplify_expr_in_ctx (tree *expr_p, tree *pre_p, tree *post_p, - bool (* gimple_test_f) (tree), fallback_t fallback, - struct gimplify_ctx *ctx_p, - struct gimplify_omp_ctx *omp_ctx_p) -{ - enum gimplify_status ret; - struct gimplify_ctx *prev_ctxp; - struct gimplify_omp_ctx *prev_omp_ctxp; - - prev_ctxp = gimplify_ctxp; - gimplify_ctxp = ctx_p; - prev_omp_ctxp = gimplify_omp_ctxp; - gimplify_omp_ctxp = omp_ctx_p; - ret = gimplify_expr (expr_p, pre_p, post_p, gimple_test_f, fallback); - gimplify_ctxp = prev_ctxp; - gimplify_omp_ctxp = prev_omp_ctxp; - - return ret; -} /* Add FIRSTPRIVATE entries for DECL in the OpenMP the surrounding parallels to CTX. If entries already exist, force them to be some flavor of private. @@ -4531,19 +4502,6 @@ gimplify_scan_omp_clauses (tree *list_p, tree *pre_p, bool in_parallel) break; case OMP_CLAUSE_SCHEDULE: - if (gimplify_ctxp->combined_pre_p) - { - gcc_assert (gimplify_omp_ctxp == outer_ctx); - gs = gimplify_expr_in_ctx (&OMP_CLAUSE_SCHEDULE_CHUNK_EXPR (c), - gimplify_ctxp->combined_pre_p, NULL, - is_gimple_val, fb_rvalue, - gimplify_ctxp->combined_ctxp, - outer_ctx->outer_context); - if (gs == GS_ERROR) - remove = true; - break; - } - /* FALLTHRU */ case OMP_CLAUSE_IF: case OMP_CLAUSE_NUM_THREADS: gs = gimplify_expr (&TREE_OPERAND (c, 0), pre_p, NULL, @@ -4708,17 +4666,12 @@ gimplify_omp_parallel (tree *expr_p, tree *pre_p) push_gimplify_context (); - if (determine_parallel_type (expr) == IS_COMBINED_PARALLEL) - { - gimplify_ctxp->combined_pre_p = pre_p; - gimplify_ctxp->combined_ctxp = gimplify_ctxp->prev_context; - } - gimplify_stmt (&OMP_PARALLEL_BODY (expr)); - pop_gimplify_context (OMP_PARALLEL_BODY (expr)); - gimplify_ctxp->combined_pre_p = NULL; - gimplify_ctxp->combined_ctxp = NULL; + if (TREE_CODE (OMP_PARALLEL_BODY (expr)) == BIND_EXPR) + pop_gimplify_context (OMP_PARALLEL_BODY (expr)); + else + pop_gimplify_context (NULL_TREE); gimplify_adjust_omp_clauses (&OMP_PARALLEL_CLAUSES (expr)); @@ -4732,13 +4685,9 @@ gimplify_omp_for (tree *expr_p, tree *pre_p) { tree for_stmt, decl, t; enum gimplify_status ret = 0; - struct gimplify_omp_ctx *outer_combined_omp_ctxp = NULL; for_stmt = *expr_p; - if (gimplify_ctxp->combined_pre_p) - outer_combined_omp_ctxp = gimplify_omp_ctxp->outer_context; - gimplify_scan_omp_clauses (&OMP_FOR_CLAUSES (for_stmt), pre_p, false); t = OMP_FOR_INIT (for_stmt); @@ -4754,33 +4703,15 @@ gimplify_omp_for (tree *expr_p, tree *pre_p) else omp_add_variable (gimplify_omp_ctxp, decl, GOVD_PRIVATE | GOVD_SEEN); - /* Gimplify inside our parent's context if this is part of a combined - parallel+workshare directive. */ - if (gimplify_ctxp->combined_pre_p) - ret |= gimplify_expr_in_ctx (&TREE_OPERAND (t, 1), - gimplify_ctxp->combined_pre_p, NULL, - is_gimple_val, fb_rvalue, - gimplify_ctxp->combined_ctxp, - outer_combined_omp_ctxp); - else - ret |= gimplify_expr (&TREE_OPERAND (t, 1), &OMP_FOR_PRE_BODY (for_stmt), - NULL, is_gimple_val, fb_rvalue); + ret |= gimplify_expr (&TREE_OPERAND (t, 1), &OMP_FOR_PRE_BODY (for_stmt), + NULL, is_gimple_val, fb_rvalue); t = OMP_FOR_COND (for_stmt); gcc_assert (COMPARISON_CLASS_P (t)); gcc_assert (TREE_OPERAND (t, 0) == decl); - /* Gimplify inside our parent's context if this is part of a combined - parallel+workshare directive. */ - if (gimplify_ctxp->combined_pre_p) - ret |= gimplify_expr_in_ctx (&TREE_OPERAND (t, 1), - gimplify_ctxp->combined_pre_p, NULL, - is_gimple_val, fb_rvalue, - gimplify_ctxp->combined_ctxp, - outer_combined_omp_ctxp); - else - ret |= gimplify_expr (&TREE_OPERAND (t, 1), &OMP_FOR_PRE_BODY (for_stmt), - NULL, is_gimple_val, fb_rvalue); + ret |= gimplify_expr (&TREE_OPERAND (t, 1), &OMP_FOR_PRE_BODY (for_stmt), + NULL, is_gimple_val, fb_rvalue); t = OMP_FOR_INCR (for_stmt); switch (TREE_CODE (t)) @@ -4818,18 +4749,8 @@ gimplify_omp_for (tree *expr_p, tree *pre_p) gcc_unreachable (); } - /* Gimplify inside our parent's context if this is part of a - combined parallel+workshare directive. */ - if (gimplify_ctxp->combined_pre_p) - ret |= gimplify_expr_in_ctx (&TREE_OPERAND (t, 1), - gimplify_ctxp->combined_pre_p, NULL, - is_gimple_val, fb_rvalue, - gimplify_ctxp->combined_ctxp, - outer_combined_omp_ctxp); - else - ret |= gimplify_expr (&TREE_OPERAND (t, 1), - &OMP_FOR_PRE_BODY (for_stmt), NULL, - is_gimple_val, fb_rvalue); + ret |= gimplify_expr (&TREE_OPERAND (t, 1), &OMP_FOR_PRE_BODY (for_stmt), + NULL, is_gimple_val, fb_rvalue); break; default: @@ -5622,6 +5543,10 @@ gimplify_expr (tree *expr_p, tree *pre_p, tree *post_p, ret = gimplify_omp_atomic (expr_p, pre_p); break; + case OMP_RETURN_EXPR: + ret = GS_ALL_DONE; + break; + default: switch (TREE_CODE_CLASS (TREE_CODE (*expr_p))) { diff --git a/gcc/omp-low.c b/gcc/omp-low.c index 65907f0..f5bdcb9 100644 --- a/gcc/omp-low.c +++ b/gcc/omp-low.c @@ -59,6 +59,40 @@ Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA optimal, but lexically nested parallels effectively only happens in test suites. */ +/* Parallel region information. Every parallel and workshare + directive is enclosed between two markers, the OMP_* directive + and a corresponding OMP_RETURN_EXPR statement. */ + +struct omp_region GTY(()) +{ + /* The enclosing region. */ + struct omp_region *outer; + + /* First child region. */ + struct omp_region *inner; + + /* Next peer region. */ + struct omp_region *next; + + /* Entry point to this region. */ + tree entry; + + /* Exit label from this region. */ + tree exit; + + /* Region number. */ + int num; + + /* True if this is a combined parallel+workshare region. */ + bool is_combined_parallel; + + /* If this is a combined parallel+workshare region, this is a list + of additional arguments needed by the combined parallel+workshare + library call. */ + tree ws_args; +}; + + /* Context structure. Used to store information about each parallel directive in the code. */ @@ -94,48 +128,34 @@ typedef struct omp_context reserved for the main body of the function. */ int depth; - /* Type of parallel construct. Used to distinguish regular parallel - regions from combined parallel+workshare directives (parallel, - parallel loop and parallel sections). */ - enum omp_parallel_type parallel_type; - /* True if this parallel directive is nested within another. */ bool is_nested; - - /* For combined parallel constructs, the built-in index for the - library call used to launch the children threads. */ - int parallel_start_ix; - - /* If the combined parallel directive needs additional arguments for - the call to GOMP_parallel_start_foo, they are added here. */ - tree parallel_start_additional_args; } omp_context; -/* A structure describing the main elements of a parallel loop. - Mostly used to communicate between the various subroutines of - expand_omp_for_1. */ +/* A structure describing the main elements of a parallel loop. */ -struct expand_omp_for_data +struct omp_for_data { tree v, n1, n2, step, chunk_size, for_stmt; enum tree_code cond_code; tree pre; - omp_context *ctx; bool have_nowait, have_ordered; enum omp_clause_schedule_kind sched_kind; }; + static splay_tree all_contexts; static int parallel_nesting_level; +static splay_tree omp_regions; +static struct omp_region *root_omp_region; static void scan_omp (tree *, omp_context *); -static void expand_omp (tree *, omp_context *); - +static void lower_omp (tree *, omp_context *); /* Find an OpenMP clause of type KIND within CLAUSES. */ -tree +static tree find_omp_clause (tree clauses, enum tree_code kind) { for (; clauses ; clauses = OMP_CLAUSE_CHAIN (clauses)) @@ -150,17 +170,290 @@ find_omp_clause (tree clauses, enum tree_code kind) static inline bool is_parallel_ctx (omp_context *ctx) { - return ctx->parallel_type != IS_NOT_PARALLEL; + return TREE_CODE (ctx->stmt) == OMP_PARALLEL; } -/* Return true if CTX is inside a combined omp parallel + workshare. */ + +/* Return true if REGION is a combined parallel+workshare region. */ static inline bool -is_in_combined_parallel_ctx (omp_context *ctx) +is_combined_parallel (struct omp_region *region) +{ + return region->is_combined_parallel; +} + + +/* Extract the header elements of parallel loop FOR_STMT and store + them into *FD. */ + +static void +extract_omp_for_data (tree for_stmt, struct omp_for_data *fd) +{ + tree t; + + fd->for_stmt = for_stmt; + fd->pre = NULL; + + t = OMP_FOR_INIT (for_stmt); + gcc_assert (TREE_CODE (t) == MODIFY_EXPR); + fd->v = TREE_OPERAND (t, 0); + gcc_assert (DECL_P (fd->v)); + gcc_assert (TREE_CODE (TREE_TYPE (fd->v)) == INTEGER_TYPE); + fd->n1 = TREE_OPERAND (t, 1); + + t = OMP_FOR_COND (for_stmt); + fd->cond_code = TREE_CODE (t); + gcc_assert (TREE_OPERAND (t, 0) == fd->v); + fd->n2 = TREE_OPERAND (t, 1); + switch (fd->cond_code) + { + case LT_EXPR: + case GT_EXPR: + break; + case LE_EXPR: + fd->n2 = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->n2), fd->n2, + build_int_cst (TREE_TYPE (fd->n2), 1)); + fd->cond_code = LT_EXPR; + break; + case GE_EXPR: + fd->n2 = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->n2), fd->n2, + build_int_cst (TREE_TYPE (fd->n2), 1)); + fd->cond_code = GT_EXPR; + break; + default: + gcc_unreachable (); + } + + t = OMP_FOR_INCR (fd->for_stmt); + gcc_assert (TREE_CODE (t) == MODIFY_EXPR); + gcc_assert (TREE_OPERAND (t, 0) == fd->v); + t = TREE_OPERAND (t, 1); + gcc_assert (TREE_OPERAND (t, 0) == fd->v); + switch (TREE_CODE (t)) + { + case PLUS_EXPR: + fd->step = TREE_OPERAND (t, 1); + break; + case MINUS_EXPR: + fd->step = TREE_OPERAND (t, 1); + fd->step = fold_build1 (NEGATE_EXPR, TREE_TYPE (fd->step), fd->step); + break; + default: + gcc_unreachable (); + } + + fd->have_nowait = fd->have_ordered = false; + fd->sched_kind = OMP_CLAUSE_SCHEDULE_STATIC; + fd->chunk_size = NULL_TREE; + + for (t = OMP_FOR_CLAUSES (for_stmt); t ; t = OMP_CLAUSE_CHAIN (t)) + switch (TREE_CODE (t)) + { + case OMP_CLAUSE_NOWAIT: + fd->have_nowait = true; + break; + case OMP_CLAUSE_ORDERED: + fd->have_ordered = true; + break; + case OMP_CLAUSE_SCHEDULE: + fd->sched_kind = OMP_CLAUSE_SCHEDULE_KIND (t); + fd->chunk_size = OMP_CLAUSE_SCHEDULE_CHUNK_EXPR (t); + break; + default: + break; + } + + if (fd->sched_kind == OMP_CLAUSE_SCHEDULE_RUNTIME) + gcc_assert (fd->chunk_size == NULL); + else if (fd->chunk_size == NULL) + { + /* We only need to compute a default chunk size for ordered + static loops and dynamic loops. */ + if (fd->sched_kind != OMP_CLAUSE_SCHEDULE_STATIC || fd->have_ordered) + fd->chunk_size = (fd->sched_kind == OMP_CLAUSE_SCHEDULE_STATIC) + ? integer_zero_node : integer_one_node; + } +} + + +/* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB + is the immediate dominator of PAR_ENTRY_BB, return true if there + are no data dependencies that would prevent expanding the parallel + directive at PAR_ENTRY_BB as a combined parallel+workshare region. + + When expanding a combined parallel+workshare region, the call to + the child function may need additional arguments in the case of + OMP_FOR regions. In some cases, these arguments are computed out + of variables passed in from the parent to the child via 'struct + .omp_data_s'. For instance: + + #pragma omp parallel for schedule (guided, i * 4) + for (j ...) + + Is lowered into: + + # BLOCK 2 (PAR_ENTRY_BB) + .omp_data_o.i = i; + #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598) + + # BLOCK 3 (WS_ENTRY_BB) + .omp_data_i = &.omp_data_o; + D.1667 = .omp_data_i->i; + D.1598 = D.1667 * 4; + #pragma omp for schedule (guided, D.1598) + + When we outline the parallel region, the call to the child function + 'bar.omp_fn.0' will need the value D.1598 in its argument list, but + that value is computed *after* the call site. So, in principle we + cannot do the transformation. + + To see whether the code in WS_ENTRY_BB blocks the combined + parallel+workshare call, we collect all the variables used in the + OMP_FOR header check whether they appear on the LHS of any + statement in WS_ENTRY_BB. If so, then we cannot emit the combined + call. + + FIXME. If we had the SSA form built at this point, we could merely + hoist the code in block 3 into block 2 and be done with it. But at + this point we don't have dataflow information and though we could + hack something up here, it is really not worth the aggravation. */ + +static bool +workshare_safe_to_combine_p (basic_block par_entry_bb, basic_block ws_entry_bb) { - return ctx->outer && ctx->outer->parallel_type == IS_COMBINED_PARALLEL; + struct omp_for_data fd; + tree par_stmt, ws_stmt; + + par_stmt = last_stmt (par_entry_bb); + ws_stmt = last_stmt (ws_entry_bb); + + if (TREE_CODE (ws_stmt) == OMP_SECTIONS) + return true; + + gcc_assert (TREE_CODE (ws_stmt) == OMP_FOR); + + extract_omp_for_data (ws_stmt, &fd); + + /* FIXME. We give up too easily here. If any of these arguments + are not constants, they will likely involve variables that have + been mapped into fields of .omp_data_s for sharing with the child + function. With appropriate data flow, it would be possible to + see through this. */ + if (!is_gimple_min_invariant (fd.n1) + || !is_gimple_min_invariant (fd.n2) + || !is_gimple_min_invariant (fd.step) + || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size))) + return false; + + return true; } + +/* Collect additional arguments needed to emit a combined + parallel+workshare call. WS_STMT is the workshare directive being + expanded. */ + +static tree +get_ws_args_for (tree ws_stmt) +{ + tree t; + + if (TREE_CODE (ws_stmt) == OMP_FOR) + { + struct omp_for_data fd; + tree ws_args; + + extract_omp_for_data (ws_stmt, &fd); + + ws_args = NULL_TREE; + if (fd.chunk_size) + { + t = fold_convert (long_integer_type_node, fd.chunk_size); + ws_args = tree_cons (NULL, t, ws_args); + } + + t = fold_convert (long_integer_type_node, fd.step); + ws_args = tree_cons (NULL, t, ws_args); + + t = fold_convert (long_integer_type_node, fd.n2); + ws_args = tree_cons (NULL, t, ws_args); + + t = fold_convert (long_integer_type_node, fd.n1); + ws_args = tree_cons (NULL, t, ws_args); + + return ws_args; + } + else if (TREE_CODE (ws_stmt) == OMP_SECTIONS) + { + basic_block bb = bb_for_stmt (ws_stmt); + t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs)); + t = tree_cons (NULL, t, NULL); + return t; + } + + gcc_unreachable (); +} + + +/* Discover whether REGION is a combined parallel+workshare region. */ + +static void +determine_parallel_type (struct omp_region *region) +{ + basic_block par_entry_bb, par_exit_bb; + basic_block ws_entry_bb, ws_exit_bb; + + if (region == NULL || region->inner == NULL) + return; + + /* We only support parallel+for and parallel+sections. */ + if (TREE_CODE (region->entry) != OMP_PARALLEL + || (TREE_CODE (region->inner->entry) != OMP_FOR + && TREE_CODE (region->inner->entry) != OMP_SECTIONS)) + return; + + /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and + WS_EXIT_BB -> PAR_EXIT_BB. */ + par_entry_bb = bb_for_stmt (region->entry); + par_exit_bb = bb_for_stmt (region->exit); + + ws_entry_bb = bb_for_stmt (region->inner->entry); + ws_exit_bb = bb_for_stmt (region->inner->exit); + + if (single_succ (par_entry_bb) == ws_entry_bb + && single_succ (ws_exit_bb) == par_exit_bb + && workshare_safe_to_combine_p (par_entry_bb, ws_entry_bb)) + { + if (TREE_CODE (region->inner->entry) == OMP_FOR) + { + /* If this is a combined parallel loop, we need to determine + whether or not to use the combined library calls. There + are two cases where we do not apply the transformation: + static loops and any kind of ordered loop. In the first + case, we already open code the loop so there is no need + to do anything else. In the latter case, the combined + parallel loop call would still need extra synchronization + to implement ordered semantics, so there would not be any + gain in using the combined call. */ + tree clauses = OMP_FOR_CLAUSES (region->inner->entry); + tree c = find_omp_clause (clauses, OMP_CLAUSE_SCHEDULE); + if (c == NULL + || OMP_CLAUSE_SCHEDULE_KIND (c) == OMP_CLAUSE_SCHEDULE_STATIC + || find_omp_clause (clauses, OMP_CLAUSE_ORDERED)) + { + region->is_combined_parallel = false; + region->inner->is_combined_parallel = false; + return; + } + } + + region->is_combined_parallel = true; + region->inner->is_combined_parallel = true; + region->ws_args = get_ws_args_for (region->inner->entry); + } +} + + /* Return true if EXPR is variable sized. */ static inline bool @@ -261,7 +554,7 @@ omp_copy_decl_2 (tree var, tree name, tree type, omp_context *ctx) DECL_ARTIFICIAL (copy) = DECL_ARTIFICIAL (var); DECL_IGNORED_P (copy) = DECL_IGNORED_P (var); TREE_USED (copy) = 1; - DECL_CONTEXT (copy) = ctx->cb.dst_fn; + DECL_CONTEXT (copy) = current_function_decl; DECL_SEEN_IN_BIND_EXPR_P (copy) = 1; TREE_CHAIN (copy) = ctx->block_vars; @@ -426,7 +719,7 @@ omp_copy_decl (tree var, copy_body_data *cb) if (TREE_CODE (var) == LABEL_DECL) { new_var = create_artificial_label (); - DECL_CONTEXT (new_var) = ctx->cb.dst_fn; + DECL_CONTEXT (new_var) = current_function_decl; insert_decl_map (&ctx->cb, var, new_var); return new_var; } @@ -444,6 +737,99 @@ omp_copy_decl (tree var, copy_body_data *cb) return error_mark_node; } + +/* Return the parallel region associated with STMT. */ + +static inline struct omp_region * +lookup_omp_region (tree stmt) +{ + splay_tree_node n = splay_tree_lookup (omp_regions, (splay_tree_key) stmt); + return n ? (struct omp_region *) n->value : NULL; +} + + +/* Debugging dumps for parallel regions. */ +void dump_omp_region (FILE *, struct omp_region *, int); +void debug_omp_region (struct omp_region *); +void debug_all_omp_regions (void); + +/* Dump the parallel region tree rooted at REGION. */ + +void +dump_omp_region (FILE *file, struct omp_region *region, int indent) +{ + fprintf (file, "%*s", indent, ""); + print_generic_stmt (file, region->entry, TDF_SLIM); + + if (region->inner) + dump_omp_region (file, region->inner, indent + 4); + + fprintf (file, "%*s", indent, ""); + if (region->exit) + print_generic_stmt (file, region->exit, 0); + else + fprintf (file, "[no exit marker]\n"); + + if (region->next) + { + fprintf (file, "\n"); + dump_omp_region (file, region->next, indent); + } +} + +void +debug_omp_region (struct omp_region *region) +{ + dump_omp_region (stderr, region, 0); +} + +void +debug_all_omp_regions (void) +{ + dump_omp_region (stderr, root_omp_region, 0); +} + + +/* Create a new parallel region starting at STMT inside region PARENT. */ + +static struct omp_region * +new_omp_region (tree stmt, struct omp_region *parent) +{ + struct omp_region *region = ggc_alloc_cleared (sizeof (*region)); + static int num = 0; + + region->outer = parent; + region->entry = stmt; + region->num = num++; + + if (parent) + { + /* This is a nested region. Add it to the list of inner + regions in PARENT. */ + region->next = parent->inner; + parent->inner = region; + } + else if (omp_regions) + { + /* This is a toplevel region. Add it to the list of toplevel + regions in ROOT_OMP_REGION. */ + region->next = root_omp_region; + root_omp_region = region; + } + else + { + /* Create a new root region with the first region we find. */ + root_omp_region = region; + omp_regions = splay_tree_new (splay_tree_compare_pointers, 0, 0); + } + + splay_tree_insert (omp_regions, (splay_tree_key) stmt, + (splay_tree_value) region); + + return region; +} + + /* Create a new context, with OUTER_CTX being the surrounding context. */ static omp_context * @@ -742,6 +1128,7 @@ create_omp_child_function (omp_context *ctx) DECL_UNINLINABLE (decl) = 1; DECL_EXTERNAL (decl) = 0; DECL_CONTEXT (decl) = NULL_TREE; + DECL_INITIAL (decl) = make_node (BLOCK); t = build_decl (RESULT_DECL, NULL_TREE, void_type_node); DECL_ARTIFICIAL (t) = 1; @@ -751,13 +1138,13 @@ create_omp_child_function (omp_context *ctx) t = build_decl (PARM_DECL, get_identifier (".omp_data_i"), ptr_type_node); DECL_ARTIFICIAL (t) = 1; DECL_ARG_TYPE (t) = ptr_type_node; - DECL_CONTEXT (t) = decl; + DECL_CONTEXT (t) = current_function_decl; TREE_USED (t) = 1; DECL_ARGUMENTS (decl) = t; ctx->receiver_decl = t; /* Allocate memory for the function structure. The call to - allocate_struct_function clobbers cfun, so we need to restore + allocate_struct_function clobbers CFUN, so we need to restore it afterward. */ allocate_struct_function (decl); DECL_SOURCE_LOCATION (decl) = EXPR_LOCATION (ctx->stmt); @@ -765,30 +1152,6 @@ create_omp_child_function (omp_context *ctx) cfun = ctx->cb.src_cfun; } -/* Given an OMP_PARALLEL statement, determine whether it is a combined - parallel+worksharing directive. This is simply done by examining - the body of the directive. If the body contains a single OMP_FOR - or a single OMP_SECTIONS then this is a combined directive. - Otherwise, it is a regular parallel directive. */ - -enum omp_parallel_type -determine_parallel_type (tree stmt) -{ - enum omp_parallel_type par_type; - tree body = BIND_EXPR_BODY (OMP_PARALLEL_BODY (stmt)); - tree t; - - par_type = IS_PARALLEL; - - t = expr_only (body); - if (t && TREE_CODE (t) == OMP_SECTIONS) - par_type = IS_COMBINED_PARALLEL; - else - par_type = IS_PARALLEL; - - return par_type; -} - /* Scan an OpenMP parallel directive. */ @@ -809,16 +1172,16 @@ scan_omp_parallel (tree *stmt_p, omp_context *outer_ctx) } ctx = new_omp_context (*stmt_p, outer_ctx); + if (parallel_nesting_level > 1) + ctx->is_nested = true; ctx->field_map = splay_tree_new (splay_tree_compare_pointers, 0, 0); - ctx->parallel_type = determine_parallel_type (*stmt_p); ctx->default_kind = OMP_CLAUSE_DEFAULT_SHARED; ctx->record_type = lang_hooks.types.make_type (RECORD_TYPE); - ctx->parallel_start_ix = BUILT_IN_GOMP_PARALLEL_START; - ctx->parallel_start_additional_args = NULL_TREE; name = create_tmp_var_name (".omp_data_s"); name = build_decl (TYPE_DECL, name, ctx->record_type); TYPE_NAME (ctx->record_type) = name; create_omp_child_function (ctx); + OMP_PARALLEL_FN (*stmt_p) = ctx->cb.dst_fn; scan_sharing_clauses (OMP_PARALLEL_CLAUSES (*stmt_p), ctx); scan_omp (&OMP_PARALLEL_BODY (*stmt_p), ctx); @@ -833,143 +1196,19 @@ scan_omp_parallel (tree *stmt_p, omp_context *outer_ctx) } -/* Extract the header elements of parallel loop FOR_STMT and store - them into *FD. */ - -static void -extract_omp_for_data (tree for_stmt, omp_context *ctx, - struct expand_omp_for_data *fd) -{ - tree t; - - fd->for_stmt = for_stmt; - fd->pre = NULL; - fd->ctx = ctx; - - t = OMP_FOR_INIT (for_stmt); - gcc_assert (TREE_CODE (t) == MODIFY_EXPR); - fd->v = TREE_OPERAND (t, 0); - gcc_assert (DECL_P (fd->v)); - gcc_assert (TREE_CODE (TREE_TYPE (fd->v)) == INTEGER_TYPE); - fd->n1 = TREE_OPERAND (t, 1); - - t = OMP_FOR_COND (for_stmt); - fd->cond_code = TREE_CODE (t); - gcc_assert (TREE_OPERAND (t, 0) == fd->v); - fd->n2 = TREE_OPERAND (t, 1); - switch (fd->cond_code) - { - case LT_EXPR: - case GT_EXPR: - break; - case LE_EXPR: - fd->n2 = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->n2), fd->n2, - build_int_cst (TREE_TYPE (fd->n2), 1)); - fd->cond_code = LT_EXPR; - break; - case GE_EXPR: - fd->n2 = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->n2), fd->n2, - build_int_cst (TREE_TYPE (fd->n2), 1)); - fd->cond_code = GT_EXPR; - break; - default: - gcc_unreachable (); - } - - t = OMP_FOR_INCR (fd->for_stmt); - gcc_assert (TREE_CODE (t) == MODIFY_EXPR); - gcc_assert (TREE_OPERAND (t, 0) == fd->v); - t = TREE_OPERAND (t, 1); - gcc_assert (TREE_OPERAND (t, 0) == fd->v); - switch (TREE_CODE (t)) - { - case PLUS_EXPR: - fd->step = TREE_OPERAND (t, 1); - break; - case MINUS_EXPR: - fd->step = TREE_OPERAND (t, 1); - fd->step = fold_build1 (NEGATE_EXPR, TREE_TYPE (fd->step), fd->step); - break; - default: - gcc_unreachable (); - } - - fd->have_nowait = fd->have_ordered = false; - fd->sched_kind = OMP_CLAUSE_SCHEDULE_STATIC; - fd->chunk_size = NULL_TREE; - - for (t = OMP_FOR_CLAUSES (for_stmt); t ; t = OMP_CLAUSE_CHAIN (t)) - switch (TREE_CODE (t)) - { - case OMP_CLAUSE_NOWAIT: - fd->have_nowait = true; - break; - case OMP_CLAUSE_ORDERED: - fd->have_ordered = true; - break; - case OMP_CLAUSE_SCHEDULE: - fd->sched_kind = OMP_CLAUSE_SCHEDULE_KIND (t); - fd->chunk_size = OMP_CLAUSE_SCHEDULE_CHUNK_EXPR (t); - break; - default: - break; - } - - if (fd->sched_kind == OMP_CLAUSE_SCHEDULE_RUNTIME) - gcc_assert (fd->chunk_size == NULL); - else if (fd->chunk_size == NULL) - { - /* We only need to compute a default chunk size for ordered - static loops and dynamic loops. */ - if (fd->sched_kind != OMP_CLAUSE_SCHEDULE_STATIC || fd->have_ordered) - fd->chunk_size = (fd->sched_kind == OMP_CLAUSE_SCHEDULE_STATIC) - ? integer_zero_node : integer_one_node; - } -} - - /* Scan an OpenMP loop directive. */ static void scan_omp_for (tree *stmt_p, omp_context *outer_ctx) { omp_context *ctx; - tree stmt = *stmt_p; + tree stmt; + stmt = *stmt_p; ctx = new_omp_context (stmt, outer_ctx); - /* If this is a combined parallel loop directive, we need to extract - the bounds, step and chunk size for the loop so that we can build - the call to GOMP_parallel_loop_foo_start. Do this before - scanning the loop header to avoid getting the mapped variables - from the child context. */ - if (is_in_combined_parallel_ctx (ctx)) - { - struct expand_omp_for_data fd; - tree t, additional_args; - - extract_omp_for_data (stmt, ctx, &fd); - - additional_args = NULL_TREE; - if (fd.chunk_size) - { - t = fold_convert (long_integer_type_node, fd.chunk_size); - additional_args = tree_cons (NULL, t, additional_args); - } - t = fold_convert (long_integer_type_node, fd.step); - additional_args = tree_cons (NULL, t, additional_args); - t = fold_convert (long_integer_type_node, fd.n2); - additional_args = tree_cons (NULL, t, additional_args); - t = fold_convert (long_integer_type_node, fd.n1); - additional_args = tree_cons (NULL, t, additional_args); - outer_ctx->parallel_start_additional_args = additional_args; - } - scan_sharing_clauses (OMP_FOR_CLAUSES (stmt), ctx); - /* FIXME. When expanding into a combined parallel loop, we may not - need to map some of the variables in the loop header (in - particular, FD.N1 and FD.N2 for dynamic loops). */ scan_omp (&OMP_FOR_PRE_BODY (stmt), ctx); scan_omp (&OMP_FOR_INIT (stmt), ctx); scan_omp (&OMP_FOR_COND (stmt), ctx); @@ -982,9 +1221,10 @@ scan_omp_for (tree *stmt_p, omp_context *outer_ctx) static void scan_omp_sections (tree *stmt_p, omp_context *outer_ctx) { - tree stmt = *stmt_p; + tree stmt; omp_context *ctx; + stmt = *stmt_p; ctx = new_omp_context (stmt, outer_ctx); scan_sharing_clauses (OMP_SECTIONS_CLAUSES (stmt), ctx); scan_omp (&OMP_SECTIONS_BODY (stmt), ctx); @@ -1015,79 +1255,6 @@ scan_omp_single (tree *stmt_p, omp_context *outer_ctx) layout_type (ctx->record_type); } -/* Similar, except this is either a parallel nested within another - parallel, or a workshare construct nested within a nested parallel. - In this case we want to do minimal processing, as the real work - will be done during lowering of the function generated by the - outermost parallel. - - The minimal amount of work is processing private clauses, and simply - scanning the rest. Private clauses are the only ones that don't - also imply a reference in the outer parallel. We must set up a - translation lest the default behaviour in omp_copy_decl substitute - error_mark_node. */ - -static void -scan_omp_nested (tree *stmt_p, omp_context *outer_ctx) -{ - omp_context *ctx; - tree var_sized_list = NULL; - tree c, decl, stmt = *stmt_p; - - ctx = new_omp_context (stmt, outer_ctx); - ctx->is_nested = true; - - for (c = OMP_CLAUSES (stmt); c ; c = OMP_CLAUSE_CHAIN (c)) - { - switch (TREE_CODE (c)) - { - case OMP_CLAUSE_PRIVATE: - decl = OMP_CLAUSE_DECL (c); - if (is_variable_sized (decl)) - var_sized_list = tree_cons (NULL, c, var_sized_list); - OMP_CLAUSE_DECL (c) = install_var_local (decl, ctx); - break; - - case OMP_CLAUSE_FIRSTPRIVATE: - case OMP_CLAUSE_LASTPRIVATE: - case OMP_CLAUSE_REDUCTION: - case OMP_CLAUSE_SHARED: - case OMP_CLAUSE_COPYPRIVATE: - case OMP_CLAUSE_IF: - case OMP_CLAUSE_NUM_THREADS: - case OMP_CLAUSE_SCHEDULE: - scan_omp (&TREE_OPERAND (c, 0), ctx->outer); - break; - - case OMP_CLAUSE_COPYIN: - case OMP_CLAUSE_NOWAIT: - case OMP_CLAUSE_ORDERED: - case OMP_CLAUSE_DEFAULT: - break; - - default: - gcc_unreachable (); - } - } - - /* Instantiate the VALUE_EXPR for variable sized variables. We have - to do this as a separate pass, since we need the pointer and size - decls installed first. */ - for (c = var_sized_list; c ; c = TREE_CHAIN (c)) - fixup_remapped_decl (OMP_CLAUSE_DECL (TREE_VALUE (c)), ctx, - OMP_CLAUSE_PRIVATE_DEBUG (TREE_VALUE (c))); - - scan_omp (&OMP_BODY (stmt), ctx); - - if (TREE_CODE (stmt) == OMP_FOR) - { - scan_omp (&OMP_FOR_PRE_BODY (stmt), ctx); - scan_omp (&OMP_FOR_INIT (stmt), ctx); - scan_omp (&OMP_FOR_COND (stmt), ctx); - scan_omp (&OMP_FOR_INCR (stmt), ctx); - } -} - /* Callback for walk_stmts used to scan for OpenMP directives at TP. */ @@ -1105,32 +1272,21 @@ scan_omp_1 (tree *tp, int *walk_subtrees, void *data) switch (TREE_CODE (t)) { case OMP_PARALLEL: - if (++parallel_nesting_level == 1) - scan_omp_parallel (tp, ctx); - else - scan_omp_nested (tp, ctx); + parallel_nesting_level++; + scan_omp_parallel (tp, ctx); parallel_nesting_level--; break; case OMP_FOR: - if (parallel_nesting_level <= 1) - scan_omp_for (tp, ctx); - else - scan_omp_nested (tp, ctx); + scan_omp_for (tp, ctx); break; case OMP_SECTIONS: - if (parallel_nesting_level <= 1) - scan_omp_sections (tp, ctx); - else - scan_omp_nested (tp, ctx); + scan_omp_sections (tp, ctx); break; case OMP_SINGLE: - if (parallel_nesting_level <= 1) - scan_omp_single (tp, ctx); - else - scan_omp_nested (tp, ctx); + scan_omp_single (tp, ctx); break; case OMP_SECTION: @@ -1147,11 +1303,7 @@ scan_omp_1 (tree *tp, int *walk_subtrees, void *data) *walk_subtrees = 1; for (var = BIND_EXPR_VARS (t); var ; var = TREE_CHAIN (var)) - { - if (DECL_CONTEXT (var) == ctx->cb.src_fn) - DECL_CONTEXT (var) = ctx->cb.dst_fn; - insert_decl_map (&ctx->cb, var, var); - } + insert_decl_map (&ctx->cb, var, var); } break; @@ -1219,6 +1371,73 @@ maybe_lookup_ctx (tree stmt) return n ? (omp_context *) n->value : NULL; } + +/* Find the mapping for DECL in CTX or the immediately enclosing + context that has a mapping for DECL. + + If CTX is a nested parallel directive, we may have to use the decl + mappings created in CTX's parent context. Suppose that we have the + following parallel nesting (variable UIDs showed for clarity): + + iD.1562 = 0; + #omp parallel shared(iD.1562) -> outer parallel + iD.1562 = iD.1562 + 1; + + #omp parallel shared (iD.1562) -> inner parallel + iD.1562 = iD.1562 - 1; + + Each parallel structure will create a distinct .omp_data_s structure + for copying iD.1562 in/out of the directive: + + outer parallel .omp_data_s.1.i -> iD.1562 + inner parallel .omp_data_s.2.i -> iD.1562 + + A shared variable mapping will produce a copy-out operation before + the parallel directive and a copy-in operation after it. So, in + this case we would have: + + iD.1562 = 0; + .omp_data_o.1.i = iD.1562; + #omp parallel shared(iD.1562) -> outer parallel + .omp_data_i.1 = &.omp_data_o.1 + .omp_data_i.1->i = .omp_data_i.1->i + 1; + + .omp_data_o.2.i = iD.1562; -> ** + #omp parallel shared(iD.1562) -> inner parallel + .omp_data_i.2 = &.omp_data_o.2 + .omp_data_i.2->i = .omp_data_i.2->i - 1; + + + ** This is a problem. The symbol iD.1562 cannot be referenced + inside the body of the outer parallel region. But since we are + emitting this copy operation while expanding the inner parallel + directive, we need to access the CTX structure of the outer + parallel directive to get the correct mapping: + + .omp_data_o.2.i = .omp_data_i.1->i + + Since there may be other workshare or parallel directives enclosing + the parallel directive, it may be necessary to walk up the context + parent chain. This is not a problem in general because nested + parallelism happens only rarely. */ + +static tree +lookup_decl_in_outer_ctx (tree decl, omp_context *ctx) +{ + tree t; + omp_context *up; + + gcc_assert (ctx->is_nested); + + for (up = ctx->outer, t = NULL; up && t == NULL; up = up->outer) + t = maybe_lookup_decl (decl, up); + + gcc_assert (t); + + return t; +} + + /* Construct the initialization value for reduction CLAUSE. */ tree @@ -1291,7 +1510,7 @@ omp_reduction_init (tree clause, tree type) to destructors go in DLIST. */ static void -expand_rec_input_clauses (tree clauses, tree *ilist, tree *dlist, +lower_rec_input_clauses (tree clauses, tree *ilist, tree *dlist, omp_context *ctx) { tree_stmt_iterator diter; @@ -1340,11 +1559,11 @@ expand_rec_input_clauses (tree clauses, tree *ilist, tree *dlist, if (pass != 0) continue; } - /* For variable sized types, we need to allocate the actual - storage here. Call alloca and store the result in the pointer - decl that we created elsewhere. */ else if (is_variable_sized (var)) { + /* For variable sized types, we need to allocate the + actual storage here. Call alloca and store the + result in the pointer decl that we created elsewhere. */ if (pass == 0) continue; @@ -1361,14 +1580,15 @@ expand_rec_input_clauses (tree clauses, tree *ilist, tree *dlist, x = build2 (MODIFY_EXPR, void_type_node, ptr, x); gimplify_and_add (x, ilist); } - /* For references that are being privatized for Fortran, allocate - new backing storage for the new pointer variable. This allows - us to avoid changing all the code that expects a pointer to - something that expects a direct variable. Note that this - doesn't apply to C++, since reference types are disallowed in - data sharing clauses there. */ else if (is_reference (var)) { + /* For references that are being privatized for Fortran, + allocate new backing storage for the new pointer + variable. This allows us to avoid changing all the + code that expects a pointer to something that expects + a direct variable. Note that this doesn't apply to + C++, since reference types are disallowed in data + sharing clauses there. */ if (pass == 0) continue; @@ -1501,12 +1721,13 @@ expand_rec_input_clauses (tree clauses, tree *ilist, tree *dlist, build_omp_barrier (ilist); } + /* Generate code to implement the LASTPRIVATE clauses. This is used for both parallel and workshare constructs. PREDICATE may be NULL if it's always true. */ static void -expand_lastprivate_clauses (tree clauses, tree predicate, tree *stmt_list, +lower_lastprivate_clauses (tree clauses, tree predicate, tree *stmt_list, omp_context *ctx) { tree sub_list, x, c; @@ -1554,13 +1775,15 @@ expand_lastprivate_clauses (tree clauses, tree predicate, tree *stmt_list, x = build3 (COND_EXPR, void_type_node, predicate, sub_list, NULL); else x = sub_list; + gimplify_and_add (x, stmt_list); } + /* Generate code to implement the REDUCTION clauses. */ static void -expand_reduction_clauses (tree clauses, tree *stmt_list, omp_context *ctx) +lower_reduction_clauses (tree clauses, tree *stmt_list, omp_context *ctx) { tree sub_list = NULL, x, c; int count = 0; @@ -1596,8 +1819,9 @@ expand_reduction_clauses (tree clauses, tree *stmt_list, omp_context *ctx) new_var = build_fold_indirect_ref (new_var); ref = build_outer_var_ref (var, ctx); code = OMP_CLAUSE_REDUCTION_CODE (c); - /* reduction(-:var) sums up the partial results, so it acts identically - to reduction(+:var). */ + + /* reduction(-:var) sums up the partial results, so it acts + identically to reduction(+:var). */ if (code == MINUS_EXPR) code = PLUS_EXPR; @@ -1645,10 +1869,11 @@ expand_reduction_clauses (tree clauses, tree *stmt_list, omp_context *ctx) gimplify_and_add (x, stmt_list); } + /* Generate code to implement the COPYPRIVATE clauses. */ static void -expand_copyprivate_clauses (tree clauses, tree *slist, tree *rlist, +lower_copyprivate_clauses (tree clauses, tree *slist, tree *rlist, omp_context *ctx) { tree c; @@ -1665,7 +1890,8 @@ expand_copyprivate_clauses (tree clauses, tree *slist, tree *rlist, by_ref = use_pointer_for_field (var, false); ref = build_sender_ref (var, ctx); - x = by_ref ? build_fold_addr_expr (var) : var; + x = (ctx->is_nested) ? lookup_decl_in_outer_ctx (var, ctx) : var; + x = by_ref ? build_fold_addr_expr (x) : x; x = build2 (MODIFY_EXPR, void_type_node, ref, x); gimplify_and_add (x, slist); @@ -1680,17 +1906,18 @@ expand_copyprivate_clauses (tree clauses, tree *slist, tree *rlist, } } + /* Generate code to implement the clauses, FIRSTPRIVATE, COPYIN, LASTPRIVATE, and REDUCTION from the sender (aka parent) side. */ static void -expand_send_clauses (tree clauses, tree *ilist, tree *olist, omp_context *ctx) +lower_send_clauses (tree clauses, tree *ilist, tree *olist, omp_context *ctx) { tree c; for (c = clauses; c ; c = OMP_CLAUSE_CHAIN (c)) { - tree val, ref, x; + tree val, ref, x, var; bool by_ref, do_in = false, do_out = false; switch (TREE_CODE (c)) @@ -1704,7 +1931,10 @@ expand_send_clauses (tree clauses, tree *ilist, tree *olist, omp_context *ctx) continue; } - val = OMP_CLAUSE_DECL (c); + var = val = OMP_CLAUSE_DECL (c); + if (ctx->is_nested) + var = lookup_decl_in_outer_ctx (val, ctx); + if (is_variable_sized (val)) continue; by_ref = use_pointer_for_field (val, false); @@ -1739,14 +1969,15 @@ expand_send_clauses (tree clauses, tree *ilist, tree *olist, omp_context *ctx) if (do_in) { ref = build_sender_ref (val, ctx); - x = by_ref ? build_fold_addr_expr (val) : val; + x = by_ref ? build_fold_addr_expr (var) : var; x = build2 (MODIFY_EXPR, void_type_node, ref, x); gimplify_and_add (x, ilist); } + if (do_out) { ref = build_sender_ref (val, ctx); - x = build2 (MODIFY_EXPR, void_type_node, val, ref); + x = build2 (MODIFY_EXPR, void_type_node, var, ref); gimplify_and_add (x, olist); } } @@ -1757,13 +1988,13 @@ expand_send_clauses (tree clauses, tree *ilist, tree *olist, omp_context *ctx) got automatically shared. */ static void -expand_send_shared_vars (tree *ilist, tree *olist, omp_context *ctx) +lower_send_shared_vars (tree *ilist, tree *olist, omp_context *ctx) { - tree ovar, nvar, f, x; + tree var, ovar, nvar, f, x; if (ctx->record_type == NULL) return; - + for (f = TYPE_FIELDS (ctx->record_type); f ; f = TREE_CHAIN (f)) { ovar = DECL_ABSTRACT_ORIGIN (f); @@ -1771,33 +2002,69 @@ expand_send_shared_vars (tree *ilist, tree *olist, omp_context *ctx) if (!nvar || !DECL_HAS_VALUE_EXPR_P (nvar)) continue; + var = ovar; + + /* If CTX is a nested parallel directive. Find the immediately + enclosing parallel or workshare construct that contains a + mapping for OVAR. */ + if (ctx->is_nested) + var = lookup_decl_in_outer_ctx (ovar, ctx); + if (use_pointer_for_field (ovar, true)) { x = build_sender_ref (ovar, ctx); - ovar = build_fold_addr_expr (ovar); - x = build2 (MODIFY_EXPR, void_type_node, x, ovar); + var = build_fold_addr_expr (var); + x = build2 (MODIFY_EXPR, void_type_node, x, var); gimplify_and_add (x, ilist); } else { x = build_sender_ref (ovar, ctx); - x = build2 (MODIFY_EXPR, void_type_node, x, ovar); + x = build2 (MODIFY_EXPR, void_type_node, x, var); gimplify_and_add (x, ilist); x = build_sender_ref (ovar, ctx); - x = build2 (MODIFY_EXPR, void_type_node, ovar, x); + x = build2 (MODIFY_EXPR, void_type_node, var, x); gimplify_and_add (x, olist); } } } /* Build the function calls to GOMP_parallel_start etc to actually - generate the parallel operation. */ + generate the parallel operation. REGION is the parallel region + being expanded. BB is the block where to insert the code. WS_ARGS + will be set if this is a call to a combined parallel+workshare + construct, it contains the list of additional arguments needed by + the workshare construct. */ static void -build_parallel_call (tree clauses, tree *stmt_list, omp_context *ctx) +expand_parallel_call (struct omp_region *region, basic_block bb, tree ws_args) { - tree t, args, val, cond, c; + tree t, args, val, cond, c, list, clauses; + block_stmt_iterator si; + int start_ix; + + clauses = OMP_PARALLEL_CLAUSES (region->entry); + push_gimplify_context (); + + /* Determine what flavour of GOMP_parallel_start we will be + emitting. */ + start_ix = BUILT_IN_GOMP_PARALLEL_START; + if (is_combined_parallel (region)) + { + tree stmt = region->inner->entry; + + if (TREE_CODE (stmt) == OMP_FOR) + { + struct omp_for_data fd; + extract_omp_for_data (stmt, &fd); + start_ix = BUILT_IN_GOMP_PARALLEL_LOOP_STATIC_START + fd.sched_kind; + } + else if (TREE_CODE (stmt) == OMP_SECTIONS) + start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS_START; + else + gcc_unreachable (); + } /* By default, the value of NUM_THREADS is zero (selected at run time) and there is no conditional. */ @@ -1819,43 +2086,103 @@ build_parallel_call (tree clauses, tree *stmt_list, omp_context *ctx) (cond != 0) or (cond ? val : 1u). */ if (cond) { + block_stmt_iterator si; + + cond = gimple_boolify (cond); + if (integer_zerop (val)) val = build2 (EQ_EXPR, unsigned_type_node, cond, build_int_cst (TREE_TYPE (cond), 0)); else - val = build3 (COND_EXPR, unsigned_type_node, cond, val, - build_int_cst (unsigned_type_node, 1)); + { + basic_block cond_bb, then_bb, else_bb; + edge e; + tree t, then_lab, else_lab, tmp; + + tmp = create_tmp_var (TREE_TYPE (val), NULL); + e = split_block (bb, NULL); + cond_bb = e->src; + bb = e->dest; + remove_edge (e); + + then_bb = create_empty_bb (cond_bb); + else_bb = create_empty_bb (then_bb); + then_lab = create_artificial_label (); + else_lab = create_artificial_label (); + + t = build3 (COND_EXPR, void_type_node, + cond, + build_and_jump (&then_lab), + build_and_jump (&else_lab)); + + si = bsi_start (cond_bb); + bsi_insert_after (&si, t, BSI_CONTINUE_LINKING); + + si = bsi_start (then_bb); + t = build1 (LABEL_EXPR, void_type_node, then_lab); + bsi_insert_after (&si, t, BSI_CONTINUE_LINKING); + t = build2 (MODIFY_EXPR, void_type_node, tmp, val); + bsi_insert_after (&si, t, BSI_CONTINUE_LINKING); + + si = bsi_start (else_bb); + t = build1 (LABEL_EXPR, void_type_node, else_lab); + bsi_insert_after (&si, t, BSI_CONTINUE_LINKING); + t = build2 (MODIFY_EXPR, void_type_node, tmp, + build_int_cst (unsigned_type_node, 1)); + bsi_insert_after (&si, t, BSI_CONTINUE_LINKING); + + make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE); + make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE); + make_edge (then_bb, bb, EDGE_FALLTHRU); + make_edge (else_bb, bb, EDGE_FALLTHRU); + + val = tmp; + } + + list = NULL_TREE; + val = get_formal_tmp_var (val, &list); + si = bsi_start (bb); + bsi_insert_after (&si, list, BSI_CONTINUE_LINKING); } + list = NULL_TREE; args = tree_cons (NULL, val, NULL); - t = ctx->sender_decl; + t = OMP_PARALLEL_DATA_ARG (region->entry); if (t == NULL) t = null_pointer_node; else t = build_fold_addr_expr (t); args = tree_cons (NULL, t, args); - t = build_fold_addr_expr (ctx->cb.dst_fn); + t = build_fold_addr_expr (OMP_PARALLEL_FN (region->entry)); args = tree_cons (NULL, t, args); - if (ctx->parallel_start_additional_args) - args = chainon (args, ctx->parallel_start_additional_args); - t = built_in_decls[ctx->parallel_start_ix]; + + if (ws_args) + args = chainon (args, ws_args); + + t = built_in_decls[start_ix]; t = build_function_call_expr (t, args); - gimplify_and_add (t, stmt_list); + gimplify_and_add (t, &list); - t = ctx->sender_decl; + t = OMP_PARALLEL_DATA_ARG (region->entry); if (t == NULL) t = null_pointer_node; else t = build_fold_addr_expr (t); args = tree_cons (NULL, t, NULL); - t = build_function_call_expr (ctx->cb.dst_fn, args); - gimplify_and_add (t, stmt_list); + t = build_function_call_expr (OMP_PARALLEL_FN (region->entry), args); + gimplify_and_add (t, &list); t = built_in_decls[BUILT_IN_GOMP_PARALLEL_END]; t = build_function_call_expr (t, NULL); - gimplify_and_add (t, stmt_list); + gimplify_and_add (t, &list); + + si = bsi_last (bb); + bsi_insert_after (&si, list, BSI_CONTINUE_LINKING); + + pop_gimplify_context (NULL_TREE); } + /* If exceptions are enabled, wrap *STMT_P in a MUST_NOT_THROW catch handler. This prevents programs from violating the structured block semantics with throws. */ @@ -1886,112 +2213,185 @@ maybe_catch_exception (tree *stmt_p) append_to_statement_list (t, stmt_p); } +/* Chain all the DECLs in LIST by their TREE_CHAIN fields. */ -/* Expand the OpenMP parallel directive pointed to by STMT_P. CTX - holds context information for *STMT_P. Expansion proceeds in - two main phases: - - (1) The body of the parallel is expanded in-situ. - All the input and reduction clauses are expanded (from the - child's perspective). The body of the parallel is then - inserted as the body of CTX->CB.DST_FUN (the function spawned - to execute each child thread). - - (2) Back in the original function, the original body of the - directive is replaced with the expansion of clauses (from the - parent's perspective), and the thread library call to launch - all the children threads. */ - -static void -expand_omp_parallel (tree *stmt_p, omp_context *ctx) +static tree +list2chain (tree list) { - tree clauses, block, bind, body, olist; - - current_function_decl = ctx->cb.dst_fn; - cfun = DECL_STRUCT_FUNCTION (current_function_decl); - - push_gimplify_context (); - - /* First phase. Expand the body of the children threads, emit - receiving code for data copying clauses. */ - clauses = OMP_PARALLEL_CLAUSES (*stmt_p); - bind = OMP_PARALLEL_BODY (*stmt_p); - block = BIND_EXPR_BLOCK (bind); - body = BIND_EXPR_BODY (bind); - BIND_EXPR_BODY (bind) = alloc_stmt_list (); - - expand_rec_input_clauses (clauses, &BIND_EXPR_BODY (bind), &olist, ctx); - - expand_omp (&body, ctx); - append_to_statement_list (body, &BIND_EXPR_BODY (bind)); + tree t; - expand_reduction_clauses (clauses, &BIND_EXPR_BODY (bind), ctx); - append_to_statement_list (olist, &BIND_EXPR_BODY (bind)); - maybe_catch_exception (&BIND_EXPR_BODY (bind)); + for (t = list; t; t = TREE_CHAIN (t)) + { + tree var = TREE_VALUE (t); + if (TREE_CHAIN (t)) + TREE_CHAIN (var) = TREE_VALUE (TREE_CHAIN (t)); + else + TREE_CHAIN (var) = NULL_TREE; + } - pop_gimplify_context (bind); - BIND_EXPR_VARS (bind) = chainon (BIND_EXPR_VARS (bind), ctx->block_vars); - BLOCK_VARS (block) = BIND_EXPR_VARS (bind); + return list ? TREE_VALUE (list) : NULL_TREE; +} - DECL_INITIAL (ctx->cb.dst_fn) = block; - DECL_SAVED_TREE (ctx->cb.dst_fn) = bind; - cgraph_add_new_function (ctx->cb.dst_fn); - current_function_decl = ctx->cb.src_fn; - cfun = DECL_STRUCT_FUNCTION (current_function_decl); +/* Remove barriers in REGION->EXIT's block. Note that this is only + valid for OMP_PARALLEL regions. Since the end of a parallel region + is an implicit barrier, any workshare inside the OMP_PARALLEL that + left a barrier at the end of the OMP_PARALLEL region can now be + removed. */ - block = make_node (BLOCK); - bind = build3 (BIND_EXPR, void_type_node, NULL, NULL, block); - *stmt_p = bind; +static void +remove_exit_barrier (struct omp_region *region) +{ + block_stmt_iterator si; + basic_block exit_bb; + tree t; - push_gimplify_context (); + gcc_assert (TREE_CODE (region->entry) == OMP_PARALLEL); - /* Second phase. Build the sender decl now that we're in the - correct context. Replace the original body of the directive with - sending code for data copying clauses and the parallel call to - launch children threads. */ - if (ctx->record_type) - ctx->sender_decl = create_tmp_var (ctx->record_type, ".omp_data_o"); + exit_bb = bb_for_stmt (region->exit); - olist = NULL; - expand_send_clauses (clauses, &BIND_EXPR_BODY (bind), &olist, ctx); - expand_send_shared_vars (&BIND_EXPR_BODY (bind), &olist, ctx); - build_parallel_call (clauses, &BIND_EXPR_BODY (bind), ctx); - append_to_statement_list (olist, &BIND_EXPR_BODY (bind)); + /* The barrier should be immediately before OMP_RETURN_EXPR. + Otherwise, we cannot remove it. */ + si = bsi_last (exit_bb); + t = bsi_stmt (si); + gcc_assert (TREE_CODE (t) == OMP_RETURN_EXPR); + bsi_prev (&si); + if (bsi_end_p (si)) + return; - pop_gimplify_context (bind); - BLOCK_VARS (block) = BIND_EXPR_VARS (bind); + t = bsi_stmt (si); + if (TREE_CODE (t) == CALL_EXPR + && get_callee_fndecl (t) == built_in_decls[BUILT_IN_GOMP_BARRIER]) + bsi_remove (&si, true); } -/* A subroutine of expand_omp_for_1. Generate code to emit the - for for a lastprivate clause. Given a loop control predicate - of (V cond N2), we gate the clause on (!(V cond N2)). */ + +/* Expand the OpenMP parallel directive starting at REGION. */ static void -expand_omp_for_lastprivate (struct expand_omp_for_data *fd) +expand_omp_parallel (struct omp_region *region) { - tree clauses, cond; - enum tree_code cond_code; - - cond_code = fd->cond_code; - cond_code = cond_code == LT_EXPR ? GE_EXPR : LE_EXPR; + basic_block entry_bb, exit_bb, new_bb; + struct function *child_cfun, *saved_cfun; + tree child_fn, block, t, ws_args; + block_stmt_iterator si; + edge e; + + child_fn = OMP_PARALLEL_FN (region->entry); + child_cfun = DECL_STRUCT_FUNCTION (child_fn); + saved_cfun = cfun; + + entry_bb = bb_for_stmt (region->entry); + exit_bb = bb_for_stmt (region->exit); + + /* Barriers at the end of the function are not necessary and can be + removed. Since the caller will have a barrier of its own, this + one is superfluous. */ + remove_exit_barrier (region); + + if (is_combined_parallel (region)) + ws_args = region->ws_args; + else + ws_args = NULL_TREE; - /* When possible, use a strict equality expression. This can let VRP - type optimizations deduce the value and remove a copy. */ - if (host_integerp (fd->step, 0)) + if (DECL_STRUCT_FUNCTION (OMP_PARALLEL_FN (region->entry))->cfg) { - HOST_WIDE_INT step = TREE_INT_CST_LOW (fd->step); - if (step == 1 || step == -1) - cond_code = EQ_EXPR; + /* Due to inlining, it may happen that we have already outlined + the region, in which case all we need to do is make the + sub-graph unreachable and emit the parallel call. */ + edge entry_succ_e, exit_succ_e; + block_stmt_iterator si; + + entry_succ_e = single_succ_edge (entry_bb); + exit_succ_e = single_succ_edge (exit_bb); + + si = bsi_last (entry_bb); + gcc_assert (!bsi_end_p (si) && TREE_CODE (bsi_stmt (si)) == OMP_PARALLEL); + bsi_remove (&si, true); + + new_bb = entry_bb; + remove_edge (entry_succ_e); + make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU); } + else + { + /* If the parallel region needs data sent from the parent + function, then the very first statement of the parallel body + is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since + &.OMP_DATA_O is passed as an argument to the child function, + we need to replace it with the argument as seen by the child + function. + + In most cases, this will end up being the identity assignment + .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had + a function call that has been inlined, the original PARM_DECL + .OMP_DATA_I may have been converted into a different local + variable. In which case, we need to keep the assignment. */ + if (OMP_PARALLEL_DATA_ARG (region->entry)) + { + basic_block entry_succ_bb = single_succ (entry_bb); + block_stmt_iterator si = bsi_start (entry_succ_bb); + tree stmt; - cond = build2 (cond_code, boolean_type_node, fd->v, fd->n2); + gcc_assert (!bsi_end_p (si)); - clauses = OMP_FOR_CLAUSES (fd->for_stmt); - expand_lastprivate_clauses (clauses, cond, &fd->pre, fd->ctx); + stmt = bsi_stmt (si); + gcc_assert (TREE_CODE (stmt) == MODIFY_EXPR + && TREE_CODE (TREE_OPERAND (stmt, 1)) == ADDR_EXPR + && TREE_OPERAND (TREE_OPERAND (stmt, 1), 0) + == OMP_PARALLEL_DATA_ARG (region->entry)); + + if (TREE_OPERAND (stmt, 0) == DECL_ARGUMENTS (child_fn)) + bsi_remove (&si, true); + else + TREE_OPERAND (stmt, 1) = DECL_ARGUMENTS (child_fn); + } + + /* Declare local variables needed in CHILD_CFUN. */ + block = DECL_INITIAL (child_fn); + BLOCK_VARS (block) = list2chain (child_cfun->unexpanded_var_list); + DECL_SAVED_TREE (child_fn) = single_succ (entry_bb)->stmt_list; + + /* Reset DECL_CONTEXT on locals and function arguments. */ + for (t = BLOCK_VARS (block); t; t = TREE_CHAIN (t)) + DECL_CONTEXT (t) = child_fn; + + for (t = DECL_ARGUMENTS (child_fn); t; t = TREE_CHAIN (t)) + DECL_CONTEXT (t) = child_fn; + + /* Split ENTRY_BB at OMP_PARALLEL so that it can be moved to the + child function. */ + si = bsi_last (entry_bb); + t = bsi_stmt (si); + gcc_assert (t && TREE_CODE (t) == OMP_PARALLEL); + bsi_remove (&si, true); + e = split_block (entry_bb, t); + entry_bb = e->dest; + single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU; + + /* Move the parallel region into CHILD_CFUN. We need to reset + dominance information because the expansion of the inner + regions has invalidated it. */ + free_dominance_info (CDI_DOMINATORS); + new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb); + single_succ_edge (new_bb)->flags = EDGE_FALLTHRU; + cgraph_add_new_function (child_fn); + + /* Convert OMP_RETURN into a RETURN_EXPR. */ + si = bsi_last (exit_bb); + gcc_assert (!bsi_end_p (si) + && TREE_CODE (bsi_stmt (si)) == OMP_RETURN_EXPR); + t = build1 (RETURN_EXPR, void_type_node, NULL); + bsi_insert_after (&si, t, TSI_SAME_STMT); + bsi_remove (&si, true); + } + + /* Emit a library call to launch the children threads. */ + expand_parallel_call (region, new_bb, ws_args); } -/* A subroutine of expand_omp_for_1. Generate code for a parallel + +/* A subroutine of expand_omp_for. Generate code for a parallel loop with any schedule. Given parameters: for (V = N1; V cond N2; V += STEP) BODY; @@ -1999,44 +2399,34 @@ expand_omp_for_lastprivate (struct expand_omp_for_data *fd) where COND is "<" or ">", we generate pseudocode more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0); - if (more) goto L0; else goto L2; + if (more) goto L0; else goto L3; L0: V = istart0; iend = iend0; L1: BODY; V += STEP; - if (V cond iend) goto L1; - more = GOMP_loop_foo_next (&istart0, &iend0); - if (more) goto L0; - lastprivate; + if (V cond iend) goto L1; else goto L2; L2: + if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3; + L3: - If this is a combined omp parallel loop, we can skip the call - to GOMP_loop_foo_start and generate + If this is a combined omp parallel loop, instead of the call to + GOMP_loop_foo_start, we emit 'goto L3'. */ - L0: - if (!GOMP_loop_foo_next (&istart0, &iend0)) goto L2; - V = istart0; - iend = iend0; - L1: - BODY; - V += STEP; - if (V cond iend) goto L1; - goto L0; - L2: - lastprivate; -*/ - -static void -expand_omp_for_generic (struct expand_omp_for_data *fd, +static basic_block +expand_omp_for_generic (struct omp_region *region, + struct omp_for_data *fd, enum built_in_function start_fn, enum built_in_function next_fn) { - tree l0, l1, l2; + tree l0, l1, l2, l3; tree type, istart0, iend0, iend; - tree t, args; - bool in_combined_parallel = is_in_combined_parallel_ctx (fd->ctx); + tree t, args, list; + basic_block entry_bb, exit_bb, l0_bb, l1_bb, l2_bb; + edge exit_edge; + block_stmt_iterator si; + bool in_combined_parallel = is_combined_parallel (region); type = TREE_TYPE (fd->v); @@ -2046,25 +2436,22 @@ expand_omp_for_generic (struct expand_omp_for_data *fd, l0 = create_artificial_label (); l1 = create_artificial_label (); l2 = create_artificial_label (); + l3 = create_artificial_label (); iend = create_tmp_var (type, NULL); - /* If this is a combined parallel loop, skip the call to - GOMP_loop_foo_start and call GOMP_loop_foo_next directly. */ - if (in_combined_parallel) - { - t = build1 (LABEL_EXPR, void_type_node, l0); - gimplify_and_add (t, &fd->pre); - t = build_fold_addr_expr (iend0); - args = tree_cons (NULL, t, NULL); - t = build_fold_addr_expr (istart0); - args = tree_cons (NULL, t, args); - t = build_function_call_expr (built_in_decls[next_fn], args); - t = build1 (TRUTH_NOT_EXPR, TREE_TYPE (t), t); - t = build3 (COND_EXPR, void_type_node, t, build_and_jump (&l2), NULL); - gimplify_and_add (t, &fd->pre); - } - else + entry_bb = bb_for_stmt (region->entry); + l1_bb = single_succ (entry_bb); + exit_bb = bb_for_stmt (region->exit); + + si = bsi_last (entry_bb); + gcc_assert (bsi_stmt (si) && TREE_CODE (bsi_stmt (si)) == OMP_FOR); + bsi_remove (&si, true); + list = alloc_stmt_list (); + + if (!in_combined_parallel) { + /* If this is not a combined parallel loop, emit a call to + GOMP_loop_foo_start in ENTRY_BB. */ t = build_fold_addr_expr (iend0); args = tree_cons (NULL, t, NULL); t = build_fold_addr_expr (istart0); @@ -2081,61 +2468,109 @@ expand_omp_for_generic (struct expand_omp_for_data *fd, t = fold_convert (long_integer_type_node, fd->n1); args = tree_cons (NULL, t, args); t = build_function_call_expr (built_in_decls[start_fn], args); - t = build3 (COND_EXPR, void_type_node, t, - build_and_jump (&l0), build_and_jump (&l2)); - gimplify_and_add (t, &fd->pre); - t = build1 (LABEL_EXPR, void_type_node, l0); - gimplify_and_add (t, &fd->pre); + t = get_formal_tmp_var (t, &list); + t = build3 (COND_EXPR, void_type_node, t, build_and_jump (&l0), + build_and_jump (&l3)); + append_to_statement_list (t, &list); + si = bsi_last (entry_bb); + bsi_insert_after (&si, list, BSI_CONTINUE_LINKING); } + /* Iteration setup for sequential loop goes in L0_BB. */ + list = alloc_stmt_list (); + t = build1 (LABEL_EXPR, void_type_node, l0); + gimplify_and_add (t, &list); + t = fold_convert (type, istart0); t = build2 (MODIFY_EXPR, void_type_node, fd->v, t); - gimplify_and_add (t, &fd->pre); + gimplify_and_add (t, &list); t = fold_convert (type, iend0); t = build2 (MODIFY_EXPR, void_type_node, iend, t); - gimplify_and_add (t, &fd->pre); + gimplify_and_add (t, &list); - t = build1 (LABEL_EXPR, void_type_node, l1); - gimplify_and_add (t, &fd->pre); + l0_bb = create_empty_bb (entry_bb); + si = bsi_start (l0_bb); + bsi_insert_after (&si, list, BSI_CONTINUE_LINKING); + + /* Loop body goes in L1_BB. */ + list = alloc_stmt_list (); + si = bsi_start (l1_bb); + bsi_insert_before (&si, build1 (LABEL_EXPR, void_type_node, l1), + BSI_CONTINUE_LINKING); - append_to_statement_list (OMP_FOR_BODY (fd->for_stmt), &fd->pre); + /* Code to control the increment and predicate for the sequential + loop goes in the first half of EXIT_BB (we split EXIT_BB so + that we can inherit all the edges going out of the loop + body). */ + list = alloc_stmt_list (); t = build2 (PLUS_EXPR, type, fd->v, fd->step); t = build2 (MODIFY_EXPR, void_type_node, fd->v, t); - gimplify_and_add (t, &fd->pre); + gimplify_and_add (t, &list); t = build2 (fd->cond_code, boolean_type_node, fd->v, iend); - t = build3 (COND_EXPR, void_type_node, t, build_and_jump (&l1), NULL); - gimplify_and_add (t, &fd->pre); + t = get_formal_tmp_var (t, &list); + t = build3 (COND_EXPR, void_type_node, t, build_and_jump (&l1), + build_and_jump (&l2)); + append_to_statement_list (t, &list); + + si = bsi_last (exit_bb); + t = bsi_stmt (si); + gcc_assert (t && TREE_CODE (t) == OMP_RETURN_EXPR); + bsi_remove (&si, true); + exit_edge = split_block (exit_bb, t); + exit_edge->flags = EDGE_FALSE_VALUE; + + si = bsi_last (exit_bb); + bsi_insert_after (&si, list, BSI_CONTINUE_LINKING); + + /* Emit code to get the next parallel iteration in L2_BB. */ + list = alloc_stmt_list (); + t = build1 (LABEL_EXPR, void_type_node, l2); + gimplify_and_add (t, &list); - /* If emitting a combined parallel loop, we only need to emit a jump - back to L0 to call GOMP_loop_foo_next again. */ + t = build_fold_addr_expr (iend0); + args = tree_cons (NULL, t, NULL); + t = build_fold_addr_expr (istart0); + args = tree_cons (NULL, t, args); + t = build_function_call_expr (built_in_decls[next_fn], args); + t = get_formal_tmp_var (t, &list); + t = build3 (COND_EXPR, void_type_node, t, build_and_jump (&l0), + build_and_jump (&l3)); + append_to_statement_list (t, &list); + + l2_bb = exit_edge->dest; + si = bsi_start (l2_bb); + bsi_insert_after (&si, list, BSI_CONTINUE_LINKING); + + /* Insert exit label on EXIT_EDGE. */ + exit_edge = single_succ_edge (l2_bb); + t = build1 (LABEL_EXPR, void_type_node, l3); + bsi_insert_on_edge_immediate (exit_edge, t); + exit_edge->flags = EDGE_FALSE_VALUE; + + /* Connect the new blocks. */ + remove_edge (single_succ_edge (entry_bb)); if (in_combined_parallel) - { - t = build_and_jump (&l0); - gimplify_and_add (t, &fd->pre); - } + make_edge (entry_bb, l2_bb, EDGE_FALLTHRU); else { - t = build_fold_addr_expr (iend0); - args = tree_cons (NULL, t, NULL); - t = build_fold_addr_expr (istart0); - args = tree_cons (NULL, t, args); - t = build_function_call_expr (built_in_decls[next_fn], args); - t = build3 (COND_EXPR, void_type_node, t, build_and_jump (&l0), NULL); - gimplify_and_add (t, &fd->pre); + make_edge (entry_bb, l0_bb, EDGE_TRUE_VALUE); + make_edge (entry_bb, exit_edge->dest, EDGE_FALSE_VALUE); } - expand_omp_for_lastprivate (fd); - - t = build1 (LABEL_EXPR, void_type_node, l2); - gimplify_and_add (t, &fd->pre); + make_edge (l0_bb, l1_bb, EDGE_FALLTHRU); + make_edge (exit_bb, l1_bb, EDGE_TRUE_VALUE); + make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE); + + return exit_edge->dest; } -/* A subroutine of expand_omp_for_1. Generate code for a parallel - loop with static schedule and no specified chunk size. Given parameters: +/* A subroutine of expand_omp_for. Generate code for a parallel + loop with static schedule and no specified chunk size. Given + parameters: for (V = N1; V cond N2; V += STEP) BODY; @@ -2158,15 +2593,18 @@ expand_omp_for_generic (struct expand_omp_for_data *fd, BODY; V += STEP; if (V cond e) goto L1; - lastprivate; L2: */ -static void -expand_omp_for_static_nochunk (struct expand_omp_for_data *fd) +static basic_block +expand_omp_for_static_nochunk (struct omp_region *region, + struct omp_for_data *fd) { tree l0, l1, l2, n, q, s0, e0, e, t, nthreads, threadid; - tree type, utype; + tree type, utype, list; + basic_block entry_bb, exit_bb, seq_start_bb, body_bb, new_exit_bb; + block_stmt_iterator si; + edge exit_edge; l0 = create_artificial_label (); l1 = create_artificial_label (); @@ -2175,27 +2613,33 @@ expand_omp_for_static_nochunk (struct expand_omp_for_data *fd) type = TREE_TYPE (fd->v); utype = lang_hooks.types.unsigned_type (type); + entry_bb = bb_for_stmt (region->entry); + body_bb = single_succ (entry_bb); + exit_bb = bb_for_stmt (region->exit); + + /* Iteration space partitioning goes in ENTRY_BB. */ + list = alloc_stmt_list (); t = built_in_decls[BUILT_IN_OMP_GET_NUM_THREADS]; t = build_function_call_expr (t, NULL); t = fold_convert (utype, t); - nthreads = get_formal_tmp_var (t, &fd->pre); + nthreads = get_formal_tmp_var (t, &list); t = built_in_decls[BUILT_IN_OMP_GET_THREAD_NUM]; t = build_function_call_expr (t, NULL); t = fold_convert (utype, t); - threadid = get_formal_tmp_var (t, &fd->pre); + threadid = get_formal_tmp_var (t, &list); fd->n1 = fold_convert (type, fd->n1); if (!is_gimple_val (fd->n1)) - fd->n1 = get_formal_tmp_var (fd->n1, &fd->pre); + fd->n1 = get_formal_tmp_var (fd->n1, &list); fd->n2 = fold_convert (type, fd->n2); if (!is_gimple_val (fd->n2)) - fd->n2 = get_formal_tmp_var (fd->n2, &fd->pre); + fd->n2 = get_formal_tmp_var (fd->n2, &list); fd->step = fold_convert (type, fd->step); if (!is_gimple_val (fd->step)) - fd->step = get_formal_tmp_var (fd->step, &fd->pre); + fd->step = get_formal_tmp_var (fd->step, &list); t = build_int_cst (type, (fd->cond_code == LT_EXPR ? -1 : 1)); t = fold_build2 (PLUS_EXPR, type, fd->step, t); @@ -2206,63 +2650,107 @@ expand_omp_for_static_nochunk (struct expand_omp_for_data *fd) if (is_gimple_val (t)) n = t; else - n = get_formal_tmp_var (t, &fd->pre); + n = get_formal_tmp_var (t, &list); t = build2 (TRUNC_DIV_EXPR, utype, n, nthreads); - q = get_formal_tmp_var (t, &fd->pre); + q = get_formal_tmp_var (t, &list); t = build2 (MULT_EXPR, utype, q, nthreads); t = build2 (NE_EXPR, utype, t, n); t = build2 (PLUS_EXPR, utype, q, t); - q = get_formal_tmp_var (t, &fd->pre); + q = get_formal_tmp_var (t, &list); t = build2 (MULT_EXPR, utype, q, threadid); - s0 = get_formal_tmp_var (t, &fd->pre); + s0 = get_formal_tmp_var (t, &list); t = build2 (PLUS_EXPR, utype, s0, q); t = build2 (MIN_EXPR, utype, t, n); - e0 = get_formal_tmp_var (t, &fd->pre); + e0 = get_formal_tmp_var (t, &list); t = build2 (GE_EXPR, boolean_type_node, s0, e0); - t = build3 (COND_EXPR, void_type_node, t, - build_and_jump (&l2), build_and_jump (&l0)); - gimplify_and_add (t, &fd->pre); + t = build3 (COND_EXPR, void_type_node, t, build_and_jump (&l2), + build_and_jump (&l0)); + append_to_statement_list (t, &list); + + si = bsi_last (entry_bb); + gcc_assert (bsi_stmt (si) && TREE_CODE (bsi_stmt (si)) == OMP_FOR); + bsi_remove (&si, true); + si = bsi_last (entry_bb); + bsi_insert_after (&si, list, BSI_CONTINUE_LINKING); + + /* Setup code for sequential iteration goes in SEQ_START_BB. */ + list = alloc_stmt_list (); t = build1 (LABEL_EXPR, void_type_node, l0); - gimplify_and_add (t, &fd->pre); + gimplify_and_add (t, &list); t = fold_convert (type, s0); t = build2 (MULT_EXPR, type, t, fd->step); t = build2 (PLUS_EXPR, type, t, fd->n1); t = build2 (MODIFY_EXPR, void_type_node, fd->v, t); - gimplify_and_add (t, &fd->pre); + gimplify_and_add (t, &list); t = fold_convert (type, e0); t = build2 (MULT_EXPR, type, t, fd->step); t = build2 (PLUS_EXPR, type, t, fd->n1); - e = get_formal_tmp_var (t, &fd->pre); + e = get_formal_tmp_var (t, &list); - t = build1 (LABEL_EXPR, void_type_node, l1); - gimplify_and_add (t, &fd->pre); + seq_start_bb = create_empty_bb (entry_bb); + si = bsi_start (seq_start_bb); + bsi_insert_after (&si, list, BSI_CONTINUE_LINKING); - append_to_statement_list (OMP_FOR_BODY (fd->for_stmt), &fd->pre); + /* Original body goes in BODY_BB. */ + si = bsi_start (body_bb); + t = build1 (LABEL_EXPR, void_type_node, l1); + bsi_insert_before (&si, t, BSI_CONTINUE_LINKING); + + /* Split EXIT_BB at the OMP_RETURN. The code controlling the + sequential loop goes in the original EXIT_BB. The exit out of + the parallel loop goes in the new block (NEW_EXIT_BB). */ + si = bsi_last (exit_bb); + t = bsi_stmt (si); + bsi_remove (&si, true); + gcc_assert (t && TREE_CODE (t) == OMP_RETURN_EXPR); + exit_edge = split_block (exit_bb, t); + new_exit_bb = exit_edge->dest; + list = alloc_stmt_list (); t = build2 (PLUS_EXPR, type, fd->v, fd->step); t = build2 (MODIFY_EXPR, void_type_node, fd->v, t); - gimplify_and_add (t, &fd->pre); + gimplify_and_add (t, &list); t = build2 (fd->cond_code, boolean_type_node, fd->v, e); - t = build3 (COND_EXPR, void_type_node, t, build_and_jump (&l1), NULL); - gimplify_and_add (t, &fd->pre); + t = get_formal_tmp_var (t, &list); + t = build3 (COND_EXPR, void_type_node, t, build_and_jump (&l1), + build_and_jump (&l2)); + append_to_statement_list (t, &list); - expand_omp_for_lastprivate (fd); - + si = bsi_last (exit_bb); + bsi_insert_after (&si, list, BSI_CONTINUE_LINKING); + + /* Add the exit label to NEW_EXIT_BB. */ + si = bsi_start (new_exit_bb); t = build1 (LABEL_EXPR, void_type_node, l2); - gimplify_and_add (t, &fd->pre); + bsi_insert_after (&si, t, BSI_CONTINUE_LINKING); + single_succ_edge (new_exit_bb)->flags = EDGE_FALLTHRU; + + /* Connect all the blocks. */ + make_edge (seq_start_bb, body_bb, EDGE_FALLTHRU); + + remove_edge (single_succ_edge (entry_bb)); + make_edge (entry_bb, new_exit_bb, EDGE_TRUE_VALUE); + make_edge (entry_bb, seq_start_bb, EDGE_FALSE_VALUE); + + make_edge (exit_bb, body_bb, EDGE_TRUE_VALUE); + find_edge (exit_bb, new_exit_bb)->flags = EDGE_FALSE_VALUE; + + return new_exit_bb; } -/* A subroutine of expand_omp_for_1. Generate code for a parallel - loop with static schedule and a specified chunk size. Given parameters: + +/* A subroutine of expand_omp_for. Generate code for a parallel + loop with static schedule and a specified chunk size. Given + parameters: for (V = N1; V cond N2; V += STEP) BODY; @@ -2289,53 +2777,62 @@ expand_omp_for_static_nochunk (struct expand_omp_for_data *fd) trip += 1; goto L0; L4: - if (trip == 0) goto L5; - lastprivate; - L5: */ -static void -expand_omp_for_static_chunk (struct expand_omp_for_data *fd) +static basic_block +expand_omp_for_static_chunk (struct omp_region *region, struct omp_for_data *fd) { - tree l0, l1, l2, l3, l4, l5, n, s0, e0, e, t; + tree l0, l1, l2, l3, l4, n, s0, e0, e, t; tree trip, nthreads, threadid; tree type, utype; + basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb; + basic_block trip_update_bb, new_exit_bb; + edge exit_edge; + tree list; + block_stmt_iterator si; l0 = create_artificial_label (); l1 = create_artificial_label (); l2 = create_artificial_label (); l3 = create_artificial_label (); l4 = create_artificial_label (); - l5 = create_artificial_label (); type = TREE_TYPE (fd->v); utype = lang_hooks.types.unsigned_type (type); + entry_bb = bb_for_stmt (region->entry); + body_bb = single_succ (entry_bb); + + exit_bb = bb_for_stmt (region->exit); + + /* Trip and adjustment setup goes in ENTRY_BB. */ + list = alloc_stmt_list (); + t = built_in_decls[BUILT_IN_OMP_GET_NUM_THREADS]; t = build_function_call_expr (t, NULL); t = fold_convert (utype, t); - nthreads = get_formal_tmp_var (t, &fd->pre); + nthreads = get_formal_tmp_var (t, &list); t = built_in_decls[BUILT_IN_OMP_GET_THREAD_NUM]; t = build_function_call_expr (t, NULL); t = fold_convert (utype, t); - threadid = get_formal_tmp_var (t, &fd->pre); + threadid = get_formal_tmp_var (t, &list); fd->n1 = fold_convert (type, fd->n1); if (!is_gimple_val (fd->n1)) - fd->n1 = get_formal_tmp_var (fd->n1, &fd->pre); + fd->n1 = get_formal_tmp_var (fd->n1, &list); fd->n2 = fold_convert (type, fd->n2); if (!is_gimple_val (fd->n2)) - fd->n2 = get_formal_tmp_var (fd->n2, &fd->pre); + fd->n2 = get_formal_tmp_var (fd->n2, &list); fd->step = fold_convert (type, fd->step); if (!is_gimple_val (fd->step)) - fd->step = get_formal_tmp_var (fd->step, &fd->pre); + fd->step = get_formal_tmp_var (fd->step, &list); fd->chunk_size = fold_convert (utype, fd->chunk_size); if (!is_gimple_val (fd->chunk_size)) - fd->chunk_size = get_formal_tmp_var (fd->chunk_size, &fd->pre); + fd->chunk_size = get_formal_tmp_var (fd->chunk_size, &list); t = build_int_cst (type, (fd->cond_code == LT_EXPR ? -1 : 1)); t = fold_build2 (PLUS_EXPR, type, fd->step, t); @@ -2346,155 +2843,164 @@ expand_omp_for_static_chunk (struct expand_omp_for_data *fd) if (is_gimple_val (t)) n = t; else - n = get_formal_tmp_var (t, &fd->pre); + n = get_formal_tmp_var (t, &list); t = build_int_cst (utype, 0); - trip = get_initialized_tmp_var (t, &fd->pre, NULL); + trip = get_initialized_tmp_var (t, &list, NULL); + + si = bsi_last (entry_bb); + gcc_assert (bsi_stmt (si) && TREE_CODE (bsi_stmt (si)) == OMP_FOR); + bsi_remove (&si, true); + si = bsi_last (entry_bb); + bsi_insert_after (&si, list, BSI_CONTINUE_LINKING); + + /* Iteration space partitioning goes in ITER_PART_BB. */ + list = alloc_stmt_list (); t = build1 (LABEL_EXPR, void_type_node, l0); - gimplify_and_add (t, &fd->pre); + gimplify_and_add (t, &list); t = build2 (MULT_EXPR, utype, trip, nthreads); t = build2 (PLUS_EXPR, utype, t, threadid); t = build2 (MULT_EXPR, utype, t, fd->chunk_size); - s0 = get_formal_tmp_var (t, &fd->pre); + s0 = get_formal_tmp_var (t, &list); t = build2 (PLUS_EXPR, utype, s0, fd->chunk_size); t = build2 (MIN_EXPR, utype, t, n); - e0 = get_formal_tmp_var (t, &fd->pre); + e0 = get_formal_tmp_var (t, &list); t = build2 (LT_EXPR, boolean_type_node, s0, n); t = build3 (COND_EXPR, void_type_node, t, build_and_jump (&l1), build_and_jump (&l4)); - gimplify_and_add (t, &fd->pre); + append_to_statement_list (t, &list); + + iter_part_bb = create_empty_bb (entry_bb); + si = bsi_start (iter_part_bb); + bsi_insert_after (&si, list, BSI_CONTINUE_LINKING); + + /* Setup code for sequential iteration goes in SEQ_START_BB. */ + list = alloc_stmt_list (); t = build1 (LABEL_EXPR, void_type_node, l1); - gimplify_and_add (t, &fd->pre); + gimplify_and_add (t, &list); t = fold_convert (type, s0); t = build2 (MULT_EXPR, type, t, fd->step); t = build2 (PLUS_EXPR, type, t, fd->n1); t = build2 (MODIFY_EXPR, void_type_node, fd->v, t); - gimplify_and_add (t, &fd->pre); + gimplify_and_add (t, &list); t = fold_convert (type, e0); t = build2 (MULT_EXPR, type, t, fd->step); t = build2 (PLUS_EXPR, type, t, fd->n1); - e = get_formal_tmp_var (t, &fd->pre); + e = get_formal_tmp_var (t, &list); + + seq_start_bb = create_empty_bb (iter_part_bb); + si = bsi_start (seq_start_bb); + bsi_insert_after (&si, list, BSI_CONTINUE_LINKING); + /* Main loop body goes in BODY_BB. */ + si = bsi_start (body_bb); t = build1 (LABEL_EXPR, void_type_node, l2); - gimplify_and_add (t, &fd->pre); + bsi_insert_before (&si, t, BSI_CONTINUE_LINKING); - append_to_statement_list (OMP_FOR_BODY (fd->for_stmt), &fd->pre); + /* Split EXIT_BB. The code controlling the sequential loop goes in + the first half. The trip update code goes into the second half + (TRIP_UPDATE_BB). */ + list = alloc_stmt_list (); t = build2 (PLUS_EXPR, type, fd->v, fd->step); t = build2 (MODIFY_EXPR, void_type_node, fd->v, t); - gimplify_and_add (t, &fd->pre); + gimplify_and_add (t, &list); t = build2 (fd->cond_code, boolean_type_node, fd->v, e); + t = get_formal_tmp_var (t, &list); t = build3 (COND_EXPR, void_type_node, t, build_and_jump (&l2), build_and_jump (&l3)); - gimplify_and_add (t, &fd->pre); + append_to_statement_list (t, &list); + + si = bsi_last (exit_bb); + t = bsi_stmt (si); + gcc_assert (t && TREE_CODE (t) == OMP_RETURN_EXPR); + bsi_remove (&si, true); + exit_edge = split_block (exit_bb, t); + si = bsi_last (exit_bb); + bsi_insert_after (&si, list, BSI_CONTINUE_LINKING); + + /* Trip update code goes into TRIP_UPDATE_BB. */ + trip_update_bb = exit_edge->dest; + list = alloc_stmt_list (); t = build1 (LABEL_EXPR, void_type_node, l3); - gimplify_and_add (t, &fd->pre); + gimplify_and_add (t, &list); t = build_int_cst (utype, 1); t = build2 (PLUS_EXPR, utype, trip, t); t = build2 (MODIFY_EXPR, void_type_node, trip, t); - gimplify_and_add (t, &fd->pre); + gimplify_and_add (t, &list); - t = build1 (GOTO_EXPR, void_type_node, l0); - gimplify_and_add (t, &fd->pre); + si = bsi_start (trip_update_bb); + bsi_insert_after (&si, list, BSI_CONTINUE_LINKING); + exit_edge = single_succ_edge (trip_update_bb); + exit_edge->flags = EDGE_FALLTHRU; + new_exit_bb = exit_edge->dest; + /* Insert exit label on EXIT_EDGE. */ t = build1 (LABEL_EXPR, void_type_node, l4); - gimplify_and_add (t, &fd->pre); + bsi_insert_on_edge_immediate (exit_edge, t); - t = build_int_cst (utype, 0); - t = build2 (EQ_EXPR, boolean_type_node, trip, t); - t = build3 (COND_EXPR, void_type_node, t, build_and_jump (&l5), NULL); + /* Connect the new blocks. */ + remove_edge (single_succ_edge (entry_bb)); + make_edge (entry_bb, iter_part_bb, EDGE_FALLTHRU); - expand_omp_for_lastprivate (fd); - - t = build1 (LABEL_EXPR, void_type_node, l5); - gimplify_and_add (t, &fd->pre); + make_edge (iter_part_bb, seq_start_bb, EDGE_TRUE_VALUE); + make_edge (iter_part_bb, new_exit_bb, EDGE_FALSE_VALUE); + remove_edge (exit_edge); + + make_edge (seq_start_bb, body_bb, EDGE_FALLTHRU); + + make_edge (exit_bb, body_bb, EDGE_TRUE_VALUE); + find_edge (exit_bb, trip_update_bb)->flags = EDGE_FALSE_VALUE; + + make_edge (trip_update_bb, iter_part_bb, EDGE_FALLTHRU); + + return new_exit_bb; } -/* A subroutine of expand_omp_for. Expand the logic of the loop itself. */ -static tree -expand_omp_for_1 (tree *stmt_p, omp_context *ctx) -{ - struct expand_omp_for_data fd; - tree dlist; +/* Expand the OpenMP loop defined by REGION. */ - extract_omp_for_data (*stmt_p, ctx, &fd); +static void +expand_omp_for (struct omp_region *region) +{ + struct omp_for_data fd; + basic_block last_bb = NULL; - expand_rec_input_clauses (OMP_FOR_CLAUSES (fd.for_stmt), - &fd.pre, &dlist, ctx); + push_gimplify_context (); - expand_omp (&OMP_FOR_PRE_BODY (fd.for_stmt), ctx); - append_to_statement_list (OMP_FOR_PRE_BODY (fd.for_stmt), &fd.pre); + extract_omp_for_data (region->entry, &fd); if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC && !fd.have_ordered) { if (fd.chunk_size == NULL) - expand_omp_for_static_nochunk (&fd); + last_bb = expand_omp_for_static_nochunk (region, &fd); else - expand_omp_for_static_chunk (&fd); + last_bb = expand_omp_for_static_chunk (region, &fd); } else { - int fn_index; - - fn_index = fd.sched_kind + fd.have_ordered * 4; - - expand_omp_for_generic (&fd, BUILT_IN_GOMP_LOOP_STATIC_START + fn_index, - BUILT_IN_GOMP_LOOP_STATIC_NEXT + fn_index); + int fn_index = fd.sched_kind + fd.have_ordered * 4; + int start_ix = BUILT_IN_GOMP_LOOP_STATIC_START + fn_index; + int next_ix = BUILT_IN_GOMP_LOOP_STATIC_NEXT + fn_index; + last_bb = expand_omp_for_generic (region, &fd, start_ix, next_ix); } - expand_reduction_clauses (OMP_FOR_CLAUSES (fd.for_stmt), &fd.pre, ctx); - append_to_statement_list (dlist, &fd.pre); - - /* If this parallel loop was part of a combined parallel loop - directive, inform the parent parallel what flavour of - GOMP_parallel_loop_XXX_start to use. */ - if (is_in_combined_parallel_ctx (ctx)) - { - int start_ix = BUILT_IN_GOMP_PARALLEL_LOOP_STATIC_START + fd.sched_kind; - ctx->outer->parallel_start_ix = start_ix; - } - else if (!fd.have_nowait) - build_omp_barrier (&fd.pre); - - return fd.pre; + pop_gimplify_context (NULL); } -/* Expand code for an OpenMP loop directive. */ - -static void -expand_omp_for (tree *stmt_p, omp_context *ctx) -{ - tree bind, block, stmt_list; - - push_gimplify_context (); - - expand_omp (&OMP_FOR_BODY (*stmt_p), ctx); - - stmt_list = expand_omp_for_1 (stmt_p, ctx); - block = make_node (BLOCK); - bind = build3 (BIND_EXPR, void_type_node, NULL, stmt_list, block); - maybe_catch_exception (&BIND_EXPR_BODY (bind)); - *stmt_p = bind; - - pop_gimplify_context (bind); - BIND_EXPR_VARS (bind) = chainon (BIND_EXPR_VARS (bind), ctx->block_vars); - BLOCK_VARS (block) = BIND_EXPR_VARS (bind); -} /* Expand code for an OpenMP sections directive. In pseudo code, we generate - firstprivate; v = GOMP_sections_start (n); L0: switch (v) @@ -2508,7 +3014,6 @@ expand_omp_for (tree *stmt_p, omp_context *ctx) ... case n: ... - lastprivate; default: abort (); } @@ -2518,156 +3023,468 @@ expand_omp_for (tree *stmt_p, omp_context *ctx) L2: reduction; - If this is a combined parallel sections skip the call to - GOMP_sections_start and emit the call to GOMP_sections_next right - before the switch(). */ + If this is a combined parallel sections, replace the call to + GOMP_sections_start with 'goto L1'. */ static void -expand_omp_sections (tree *stmt_p, omp_context *ctx) +expand_omp_sections (struct omp_region *region) { - tree sec_stmt, label_vec, bind, block, stmt_list, l0, l1, l2, t, u, v; - tree_stmt_iterator tsi; - tree dlist; + tree label_vec, l0, l1, l2, t, u, v; unsigned i, len; - bool in_combined_parallel = is_in_combined_parallel_ctx (ctx); - - sec_stmt = *stmt_p; - stmt_list = NULL; - - push_gimplify_context (); + basic_block entry_bb, exit_bb, l0_bb, l1_bb, default_bb; + edge e, entry_edge, exit_edge; + edge_iterator ei; + block_stmt_iterator si; - expand_rec_input_clauses (OMP_SECTIONS_CLAUSES (sec_stmt), - &stmt_list, &dlist, ctx); - - tsi = tsi_start (OMP_SECTIONS_BODY (sec_stmt)); - for (len = 0; !tsi_end_p (tsi); len++, tsi_next (&tsi)) - continue; + entry_bb = bb_for_stmt (region->entry); + exit_bb = bb_for_stmt (region->exit); l0 = create_artificial_label (); l1 = create_artificial_label (); l2 = create_artificial_label (); + v = create_tmp_var (unsigned_type_node, ".section"); + + /* We will build a switch() with enough cases for all the + OMP_SECTION regions, a '0' case to handle the end of more work + and a default case to abort if something goes wrong. */ + len = EDGE_COUNT (entry_bb->succs); label_vec = make_tree_vec (len + 2); - t = build_int_cst (unsigned_type_node, len); - t = tree_cons (NULL, t, NULL); + /* Split ENTRY_BB. The call to GOMP_sections_start goes in the + first half. The second half contains the switch(). */ + si = bsi_last (entry_bb); + t = bsi_stmt (si); + gcc_assert (t && TREE_CODE (t) == OMP_SECTIONS); + bsi_remove (&si, true); + entry_edge = split_block (entry_bb, t); + l0_bb = entry_edge->dest; - if (in_combined_parallel) - { - /* Nothing to do. Just inform our parent of the additional - arguments to invoke GOMP_parallel_sections_start. */ - ctx->outer->parallel_start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS_START; - ctx->outer->parallel_start_additional_args = t; - } - else + if (!is_combined_parallel (region)) { + /* If we are not inside a combined parallel+sections region, + call GOMP_sections_start. */ + t = build_int_cst (unsigned_type_node, len); + t = tree_cons (NULL, t, NULL); u = built_in_decls[BUILT_IN_GOMP_SECTIONS_START]; t = build_function_call_expr (u, t); t = build2 (MODIFY_EXPR, void_type_node, v, t); - gimplify_and_add (t, &stmt_list); + si = bsi_last (entry_bb); + bsi_insert_after (&si, t, BSI_CONTINUE_LINKING); } - t = build1 (LABEL_EXPR, void_type_node, l0); - gimplify_and_add (t, &stmt_list); + /* The switch() statement replacing OMP_SECTIONS goes in L0_BB. */ + si = bsi_last (l0_bb); - if (in_combined_parallel) - { - /* Combined parallel sections need the call to GOMP_sections_next - before the switch(). */ - t = built_in_decls[BUILT_IN_GOMP_SECTIONS_NEXT]; - t = build_function_call_expr (t, NULL); - t = build2 (MODIFY_EXPR, void_type_node, v, t); - gimplify_and_add (t, &stmt_list); - } + t = build1 (LABEL_EXPR, void_type_node, l0); + bsi_insert_after (&si, t, BSI_CONTINUE_LINKING); t = build3 (SWITCH_EXPR, void_type_node, v, NULL, label_vec); - gimplify_and_add (t, &stmt_list); + bsi_insert_after (&si, t, BSI_CONTINUE_LINKING); t = build3 (CASE_LABEL_EXPR, void_type_node, build_int_cst (unsigned_type_node, 0), NULL, l2); TREE_VEC_ELT (label_vec, 0) = t; - tsi = tsi_start (OMP_SECTIONS_BODY (sec_stmt)); - for (i = 0; i < len; i++, tsi_next (&tsi)) + /* Convert each OMP_SECTION into a CASE_LABEL_EXPR. */ + i = 1; + FOR_EACH_EDGE (e, ei, l0_bb->succs) { - omp_context *sctx; + basic_block s_entry_bb, s_exit_bb; + + e->flags = 0; + s_entry_bb = e->dest; + si = bsi_last (s_entry_bb); + t = bsi_stmt (si); + gcc_assert (t && TREE_CODE (t) == OMP_SECTION); + s_exit_bb = bb_for_stmt (lookup_omp_region (t)->exit); + bsi_remove (&si, true); t = create_artificial_label (); - u = build_int_cst (unsigned_type_node, i + 1); + u = build_int_cst (unsigned_type_node, i); u = build3 (CASE_LABEL_EXPR, void_type_node, u, NULL, t); - TREE_VEC_ELT (label_vec, i + 1) = u; + TREE_VEC_ELT (label_vec, i) = u; t = build1 (LABEL_EXPR, void_type_node, t); - gimplify_and_add (t, &stmt_list); - - t = tsi_stmt (tsi); - sctx = maybe_lookup_ctx (t); - gcc_assert (sctx); - expand_omp (&OMP_SECTION_BODY (t), sctx); - append_to_statement_list (OMP_SECTION_BODY (t), &stmt_list); - - if (i == len - 1) - expand_lastprivate_clauses (OMP_SECTIONS_CLAUSES (sec_stmt), - NULL, &stmt_list, ctx); - - t = build1 (GOTO_EXPR, void_type_node, l1); - gimplify_and_add (t, &stmt_list); + si = bsi_last (s_entry_bb); + bsi_insert_after (&si, t, BSI_CONTINUE_LINKING); + i++; + single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU; + + si = bsi_last (s_exit_bb); + t = bsi_stmt (si); + gcc_assert (t && TREE_CODE (t) == OMP_RETURN_EXPR); + bsi_remove (&si, true); + single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU; } + /* Error handling code goes in DEFAULT_BB. */ + default_bb = create_empty_bb (entry_bb); + si = bsi_start (default_bb); t = create_artificial_label (); u = build3 (CASE_LABEL_EXPR, void_type_node, NULL, NULL, t); TREE_VEC_ELT (label_vec, len + 1) = u; t = build1 (LABEL_EXPR, void_type_node, t); - gimplify_and_add (t, &stmt_list); + bsi_insert_after (&si, t, BSI_CONTINUE_LINKING); t = built_in_decls[BUILT_IN_TRAP]; t = build_function_call_expr (t, NULL); - gimplify_and_add (t, &stmt_list); - + bsi_insert_after (&si, t, BSI_CONTINUE_LINKING); + + make_edge (l0_bb, default_bb, 0); + + /* Code to get the next section goes in L1_BB. */ + si = bsi_last (exit_bb); + t = bsi_stmt (si); + gcc_assert (t && TREE_CODE (t) == OMP_RETURN_EXPR); + bsi_remove (&si, true); + exit_edge = split_block (exit_bb, t); + l1_bb = exit_edge->src; + exit_bb = exit_edge->dest; + si = bsi_start (l1_bb); t = build1 (LABEL_EXPR, void_type_node, l1); - gimplify_and_add (t, &stmt_list); + bsi_insert_after (&si, t, BSI_CONTINUE_LINKING); - if (!in_combined_parallel) + t = built_in_decls[BUILT_IN_GOMP_SECTIONS_NEXT]; + t = build_function_call_expr (t, NULL); + t = build2 (MODIFY_EXPR, void_type_node, v, t); + bsi_insert_after (&si, t, BSI_CONTINUE_LINKING); + + remove_edge (single_succ_edge (l1_bb)); + make_edge (l1_bb, l0_bb, EDGE_FALLTHRU); + + /* Exit label in EXIT_BB. */ + si = bsi_last (exit_bb); + t = build1 (LABEL_EXPR, void_type_node, l2); + bsi_insert_after (&si, t, BSI_CONTINUE_LINKING); + + make_edge (l0_bb, exit_bb, 0); + single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU; + + if (is_combined_parallel (region)) { - t = built_in_decls[BUILT_IN_GOMP_SECTIONS_NEXT]; - t = build_function_call_expr (t, NULL); - t = build2 (MODIFY_EXPR, void_type_node, v, t); - gimplify_and_add (t, &stmt_list); + /* If this was a combined parallel+sections region, we did not + emit a GOMP_sections_start in the entry block, so we just + need to jump to L1_BB to get the next section. */ + remove_edge (single_succ_edge (entry_bb)); + make_edge (entry_bb, l1_bb, EDGE_FALLTHRU); } +} - t = build1 (GOTO_EXPR, void_type_node, l0); - gimplify_and_add (t, &stmt_list); - t = build1 (LABEL_EXPR, void_type_node, l2); - gimplify_and_add (t, &stmt_list); +/* Generic expansion for OpenMP synchronization directives: single, + master, ordered and critical. All we need to do here is remove the + entry and exit markers for REGION. */ - expand_reduction_clauses (OMP_SECTIONS_CLAUSES (sec_stmt), &stmt_list, ctx); - append_to_statement_list (dlist, &stmt_list); +static void +expand_omp_synch (struct omp_region *region) +{ + basic_block entry_bb, exit_bb; + block_stmt_iterator si; + tree t; - /* Unless there's a nowait clause, add a barrier afterward. */ - if (!find_omp_clause (OMP_SECTIONS_CLAUSES (sec_stmt), OMP_CLAUSE_NOWAIT)) - build_omp_barrier (&stmt_list); + entry_bb = bb_for_stmt (region->entry); + exit_bb = bb_for_stmt (region->exit); + + si = bsi_last (entry_bb); + t = bsi_stmt (si); + gcc_assert (t + && (TREE_CODE (t) == OMP_SINGLE + || TREE_CODE (t) == OMP_MASTER + || TREE_CODE (t) == OMP_ORDERED + || TREE_CODE (t) == OMP_CRITICAL)); + bsi_remove (&si, true); + single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU; + + si = bsi_last (exit_bb); + t = bsi_stmt (si); + gcc_assert (t && TREE_CODE (t) == OMP_RETURN_EXPR); + bsi_remove (&si, true); + single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU; +} + + +/* Expand the parallel region tree rooted at REGION. Expansion + proceeds in depth-first order. Innermost regions are expanded + first. This way, parallel regions that require a new function to + be created (e.g., OMP_PARALLEL) can be expanded without having any + internal dependencies in their body. */ + +static void +expand_omp (struct omp_region *region) +{ + while (region) + { + enum tree_code code = TREE_CODE (region->entry); + + if (region->inner) + expand_omp (region->inner); + + switch (code) + { + case OMP_PARALLEL: + expand_omp_parallel (region); + break; + + case OMP_FOR: + expand_omp_for (region); + break; + + case OMP_SECTIONS: + expand_omp_sections (region); + break; + + case OMP_SECTION: + /* Individual omp sections are handled together with their + parent OMP_SECTIONS region. */ + break; + + case OMP_SINGLE: + case OMP_MASTER: + case OMP_ORDERED: + case OMP_CRITICAL: + expand_omp_synch (region); + break; + + default: + gcc_unreachable (); + } + + region = region->next; + } +} + + +/* Helper for build_omp_regions. Scan the dominator tree starting at + block BB. PARENT is the region that contains BB. */ + +static void +build_omp_regions_1 (basic_block bb, struct omp_region *parent) +{ + block_stmt_iterator si; + tree stmt; + basic_block son; + + si = bsi_last (bb); + if (!bsi_end_p (si) && OMP_DIRECTIVE_P (bsi_stmt (si))) + { + struct omp_region *region; + + stmt = bsi_stmt (si); + + if (TREE_CODE (stmt) == OMP_RETURN_EXPR) + { + /* STMT is the return point out of region PARENT. Mark it + as the exit point and make PARENT the immediately + enclosing region. */ + gcc_assert (parent); + region = parent; + region->exit = stmt; + parent = parent->outer; + + /* If REGION is a parallel region, determine whether it is + a combined parallel+workshare region. */ + if (TREE_CODE (region->entry) == OMP_PARALLEL) + determine_parallel_type (region); + } + else + { + /* Otherwise, this directive becomes the parent for a new + region. */ + region = new_omp_region (stmt, parent); + parent = region; + } + + gcc_assert (region); + if (omp_regions == NULL) + { + omp_regions = splay_tree_new (splay_tree_compare_pointers, 0, 0); + root_omp_region = region; + } + + splay_tree_insert (omp_regions, (splay_tree_key) stmt, + (splay_tree_value) region); + } + + for (son = first_dom_son (CDI_DOMINATORS, bb); + son; + son = next_dom_son (CDI_DOMINATORS, son)) + build_omp_regions_1 (son, parent); +} + + +/* Scan the CFG and build a tree of OMP regions. Return the root of + the OMP region tree. */ + +static void +build_omp_regions (void) +{ + gcc_assert (omp_regions == NULL && root_omp_region == NULL); + calculate_dominance_info (CDI_DOMINATORS); + build_omp_regions_1 (ENTRY_BLOCK_PTR, NULL); +} + + +/* Main entry point for expanding OMP-GIMPLE into runtime calls. */ + +static void +execute_expand_omp (void) +{ + build_omp_regions (); + + if (root_omp_region) + { + if (dump_file) + { + fprintf (dump_file, "\nOMP region tree\n\n"); + dump_omp_region (dump_file, root_omp_region, 0); + fprintf (dump_file, "\n"); + } + + expand_omp (root_omp_region); + splay_tree_delete (omp_regions); + root_omp_region = NULL; + omp_regions = NULL; + free_dominance_info (CDI_DOMINATORS); + free_dominance_info (CDI_POST_DOMINATORS); + } + + /* Expansion adds basic blocks that may be merged. */ + cleanup_tree_cfg (); +} + +static bool +gate_expand_omp (void) +{ + return flag_openmp != 0 && errorcount == 0; +} + +struct tree_opt_pass pass_expand_omp = +{ + "ompexp", /* name */ + gate_expand_omp, /* gate */ + execute_expand_omp, /* execute */ + NULL, /* sub */ + NULL, /* next */ + 0, /* static_pass_number */ + 0, /* tv_id */ + PROP_gimple_any, /* properties_required */ + PROP_gimple_lomp, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + TODO_dump_func, /* todo_flags_finish */ + 0 /* letter */ +}; + +/* Routines to lower OpenMP directives into OMP-GIMPLE. */ + +/* Lower the OpenMP sections directive in *STMT_P. */ + +static void +lower_omp_sections (tree *stmt_p, omp_context *ctx) +{ + tree new_stmt, stmt, body, bind, block, ilist, olist, new_body; + tree dlist, region_exit; + tree_stmt_iterator tsi; + unsigned i, len; + + stmt = *stmt_p; + + gcc_assert (OMP_SECTIONS_SECTIONS (stmt) == NULL_TREE); + + push_gimplify_context (); + + dlist = NULL; + ilist = NULL; + lower_rec_input_clauses (OMP_SECTIONS_CLAUSES (stmt), &ilist, &dlist, ctx); + + tsi = tsi_start (OMP_SECTIONS_BODY (stmt)); + for (len = 0; !tsi_end_p (tsi); len++, tsi_next (&tsi)) + continue; + + /* There are two markers per section and one end marker for the + whole construct. */ + OMP_SECTIONS_SECTIONS (stmt) = make_tree_vec (2 * len + 1); + + tsi = tsi_start (OMP_SECTIONS_BODY (stmt)); + body = alloc_stmt_list (); + for (i = 0; i < len; i++, tsi_next (&tsi)) + { + omp_context *sctx; + tree sec_start, sec_end, sec_body; + + sec_start = tsi_stmt (tsi); + sec_body = alloc_stmt_list (); + sctx = maybe_lookup_ctx (sec_start); + gcc_assert (sctx); + + lower_omp (&OMP_SECTION_BODY (sec_start), sctx); + append_to_statement_list (OMP_SECTION_BODY (sec_start), &sec_body); + + if (i == len - 1) + { + tree l = alloc_stmt_list (); + lower_lastprivate_clauses (OMP_SECTIONS_CLAUSES (stmt), NULL, + &l, ctx); + append_to_statement_list (l, &sec_body); + } + + sec_end = make_node (OMP_RETURN_EXPR); + + OMP_SECTION_BODY (sec_start) = sec_body; + append_to_statement_list (sec_start, &body); + append_to_statement_list (sec_end, &body); + + TREE_VEC_ELT (OMP_SECTIONS_SECTIONS (stmt), i * 2) = sec_start; + TREE_VEC_ELT (OMP_SECTIONS_SECTIONS (stmt), i * 2 + 1) = sec_end; + } block = make_node (BLOCK); - bind = build3 (BIND_EXPR, void_type_node, NULL, stmt_list, block); + bind = build3 (BIND_EXPR, void_type_node, NULL, body, block); maybe_catch_exception (&BIND_EXPR_BODY (bind)); - *stmt_p = bind; - pop_gimplify_context (bind); - BIND_EXPR_VARS (bind) = chainon (BIND_EXPR_VARS (bind), ctx->block_vars); - BLOCK_VARS (block) = BIND_EXPR_VARS (bind); + olist = NULL_TREE; + lower_reduction_clauses (OMP_SECTIONS_CLAUSES (stmt), &olist, ctx); + + /* Unless there's a nowait clause, add a barrier afterward. */ + if (!find_omp_clause (OMP_SECTIONS_CLAUSES (stmt), OMP_CLAUSE_NOWAIT)) + build_omp_barrier (&olist); + + pop_gimplify_context (NULL_TREE); + record_vars_into (ctx->block_vars, ctx->cb.dst_fn); + + new_stmt = build3 (BIND_EXPR, void_type_node, NULL, NULL, NULL); + TREE_SIDE_EFFECTS (new_stmt) = 1; + OMP_SECTIONS_BODY (stmt) = body; + + region_exit = make_node (OMP_RETURN_EXPR); + + new_body = alloc_stmt_list (); + append_to_statement_list (ilist, &new_body); + append_to_statement_list (stmt, &new_body); + append_to_statement_list (region_exit, &new_body); + append_to_statement_list (olist, &new_body); + append_to_statement_list (dlist, &new_body); + BIND_EXPR_BODY (new_stmt) = new_body; + + TREE_VEC_ELT (OMP_SECTIONS_SECTIONS (stmt), 2 * len) = region_exit; + + *stmt_p = new_stmt; } -/* A subroutine of expand_omp_single. Expand the simple form of +/* A subroutine of lower_omp_single. Expand the simple form of an OMP_SINGLE, without a copyprivate clause: if (GOMP_single_start ()) BODY; [ GOMP_barrier (); ] -> unless 'nowait' is present. -*/ + + FIXME. It may be better to delay expanding the logic of this until + pass_expand_omp. The expanded logic may make the job more difficult + to a synchronization analysis pass. */ static void -expand_omp_single_simple (tree single_stmt, tree *pre_p) +lower_omp_single_simple (tree single_stmt, tree *pre_p) { tree t; @@ -2681,7 +3498,8 @@ expand_omp_single_simple (tree single_stmt, tree *pre_p) build_omp_barrier (pre_p); } -/* A subroutine of expand_omp_single. Expand the simple form of + +/* A subroutine of lower_omp_single. Expand the simple form of an OMP_SINGLE, with a copyprivate clause: #pragma omp single copyprivate (a, b, c) @@ -2705,10 +3523,13 @@ expand_omp_single_simple (tree single_stmt, tree *pre_p) } GOMP_barrier (); } -*/ + + FIXME. It may be better to delay expanding the logic of this until + pass_expand_omp. The expanded logic may make the job more difficult + to a synchronization analysis pass. */ static void -expand_omp_single_copy (tree single_stmt, tree *pre_p, omp_context *ctx) +lower_omp_single_copy (tree single_stmt, tree *pre_p, omp_context *ctx) { tree ptr_type, t, args, l0, l1, l2, copyin_seq; @@ -2739,7 +3560,7 @@ expand_omp_single_copy (tree single_stmt, tree *pre_p, omp_context *ctx) append_to_statement_list (OMP_SINGLE_BODY (single_stmt), pre_p); copyin_seq = NULL; - expand_copyprivate_clauses (OMP_SINGLE_CLAUSES (single_stmt), pre_p, + lower_copyprivate_clauses (OMP_SINGLE_CLAUSES (single_stmt), pre_p, ©in_seq, ctx); t = build_fold_addr_expr (ctx->sender_decl); @@ -2762,41 +3583,47 @@ expand_omp_single_copy (tree single_stmt, tree *pre_p, omp_context *ctx) build_omp_barrier (pre_p); } + /* Expand code for an OpenMP single directive. */ static void -expand_omp_single (tree *stmt_p, omp_context *ctx) +lower_omp_single (tree *stmt_p, omp_context *ctx) { - tree bind, block, single_stmt = *stmt_p, dlist; + tree t, bind, block, single_stmt = *stmt_p, dlist; push_gimplify_context (); block = make_node (BLOCK); bind = build3 (BIND_EXPR, void_type_node, NULL, NULL, block); - *stmt_p = bind; + TREE_SIDE_EFFECTS (bind) = 1; - expand_rec_input_clauses (OMP_SINGLE_CLAUSES (single_stmt), - &BIND_EXPR_BODY (bind), &dlist, ctx); - - expand_omp (&OMP_SINGLE_BODY (single_stmt), ctx); + lower_rec_input_clauses (OMP_SINGLE_CLAUSES (single_stmt), + &BIND_EXPR_BODY (bind), &dlist, ctx); + lower_omp (&OMP_SINGLE_BODY (single_stmt), ctx); if (ctx->record_type) - expand_omp_single_copy (single_stmt, &BIND_EXPR_BODY (bind), ctx); + lower_omp_single_copy (single_stmt, &BIND_EXPR_BODY (bind), ctx); else - expand_omp_single_simple (single_stmt, &BIND_EXPR_BODY (bind)); + lower_omp_single_simple (single_stmt, &BIND_EXPR_BODY (bind)); append_to_statement_list (dlist, &BIND_EXPR_BODY (bind)); - + t = make_node (OMP_RETURN_EXPR); + append_to_statement_list (t, &BIND_EXPR_BODY (bind)); maybe_catch_exception (&BIND_EXPR_BODY (bind)); pop_gimplify_context (bind); + BIND_EXPR_VARS (bind) = chainon (BIND_EXPR_VARS (bind), ctx->block_vars); BLOCK_VARS (block) = BIND_EXPR_VARS (bind); + + OMP_SINGLE_BODY (single_stmt) = alloc_stmt_list (); + append_to_statement_list (bind, &OMP_SINGLE_BODY (single_stmt)); } + /* Expand code for an OpenMP master directive. */ static void -expand_omp_master (tree *stmt_p, omp_context *ctx) +lower_omp_master (tree *stmt_p, omp_context *ctx) { tree bind, block, stmt = *stmt_p, lab = NULL, x; @@ -2804,7 +3631,7 @@ expand_omp_master (tree *stmt_p, omp_context *ctx) block = make_node (BLOCK); bind = build3 (BIND_EXPR, void_type_node, NULL, NULL, block); - *stmt_p = bind; + TREE_SIDE_EFFECTS (bind) = 1; x = built_in_decls[BUILT_IN_OMP_GET_THREAD_NUM]; x = build_function_call_expr (x, NULL); @@ -2812,22 +3639,28 @@ expand_omp_master (tree *stmt_p, omp_context *ctx) x = build3 (COND_EXPR, void_type_node, x, NULL, build_and_jump (&lab)); gimplify_and_add (x, &BIND_EXPR_BODY (bind)); - expand_omp (&OMP_MASTER_BODY (stmt), ctx); + lower_omp (&OMP_MASTER_BODY (stmt), ctx); append_to_statement_list (OMP_MASTER_BODY (stmt), &BIND_EXPR_BODY (bind)); x = build1 (LABEL_EXPR, void_type_node, lab); gimplify_and_add (x, &BIND_EXPR_BODY (bind)); - + x = make_node (OMP_RETURN_EXPR); + append_to_statement_list (x, &BIND_EXPR_BODY (bind)); maybe_catch_exception (&BIND_EXPR_BODY (bind)); pop_gimplify_context (bind); + BIND_EXPR_VARS (bind) = chainon (BIND_EXPR_VARS (bind), ctx->block_vars); BLOCK_VARS (block) = BIND_EXPR_VARS (bind); + + OMP_MASTER_BODY (stmt) = alloc_stmt_list (); + append_to_statement_list (bind, &OMP_MASTER_BODY (stmt)); } + /* Expand code for an OpenMP ordered directive. */ static void -expand_omp_ordered (tree *stmt_p, omp_context *ctx) +lower_omp_ordered (tree *stmt_p, omp_context *ctx) { tree bind, block, stmt = *stmt_p, x; @@ -2835,26 +3668,30 @@ expand_omp_ordered (tree *stmt_p, omp_context *ctx) block = make_node (BLOCK); bind = build3 (BIND_EXPR, void_type_node, NULL, NULL, block); - *stmt_p = bind; + TREE_SIDE_EFFECTS (bind) = 1; x = built_in_decls[BUILT_IN_GOMP_ORDERED_START]; x = build_function_call_expr (x, NULL); gimplify_and_add (x, &BIND_EXPR_BODY (bind)); - expand_omp (&OMP_ORDERED_BODY (stmt), ctx); + lower_omp (&OMP_ORDERED_BODY (stmt), ctx); append_to_statement_list (OMP_ORDERED_BODY (stmt), &BIND_EXPR_BODY (bind)); x = built_in_decls[BUILT_IN_GOMP_ORDERED_END]; x = build_function_call_expr (x, NULL); gimplify_and_add (x, &BIND_EXPR_BODY (bind)); - + x = make_node (OMP_RETURN_EXPR); + append_to_statement_list (x, &BIND_EXPR_BODY (bind)); maybe_catch_exception (&BIND_EXPR_BODY (bind)); pop_gimplify_context (bind); + BIND_EXPR_VARS (bind) = chainon (BIND_EXPR_VARS (bind), ctx->block_vars); BLOCK_VARS (block) = BIND_EXPR_VARS (bind); + + OMP_ORDERED_BODY (stmt) = alloc_stmt_list (); + append_to_statement_list (bind, &OMP_ORDERED_BODY (stmt)); } -/* Expand code for an OpenMP critical directive. */ /* Gimplify an OMP_CRITICAL statement. This is a relatively simple substitution of a couple of function calls. But in the NAMED case, @@ -2865,10 +3702,10 @@ static GTY((param1_is (tree), param2_is (tree))) splay_tree critical_name_mutexes; static void -expand_omp_critical (tree *stmt_p, omp_context *ctx) +lower_omp_critical (tree *stmt_p, omp_context *ctx) { tree bind, block, stmt = *stmt_p; - tree lock, unlock, name; + tree t, lock, unlock, name; name = OMP_CRITICAL_NAME (stmt); if (name) @@ -2924,27 +3761,217 @@ expand_omp_critical (tree *stmt_p, omp_context *ctx) block = make_node (BLOCK); bind = build3 (BIND_EXPR, void_type_node, NULL, NULL, block); - *stmt_p = bind; + TREE_SIDE_EFFECTS (bind) = 1; gimplify_and_add (lock, &BIND_EXPR_BODY (bind)); - expand_omp (&OMP_CRITICAL_BODY (stmt), ctx); + lower_omp (&OMP_CRITICAL_BODY (stmt), ctx); maybe_catch_exception (&OMP_CRITICAL_BODY (stmt)); append_to_statement_list (OMP_CRITICAL_BODY (stmt), &BIND_EXPR_BODY (bind)); gimplify_and_add (unlock, &BIND_EXPR_BODY (bind)); + t = make_node (OMP_RETURN_EXPR); + append_to_statement_list (t, &BIND_EXPR_BODY (bind)); pop_gimplify_context (bind); BIND_EXPR_VARS (bind) = chainon (BIND_EXPR_VARS (bind), ctx->block_vars); BLOCK_VARS (block) = BIND_EXPR_VARS (bind); + + OMP_CRITICAL_BODY (stmt) = alloc_stmt_list (); + append_to_statement_list (bind, &OMP_CRITICAL_BODY (stmt)); +} + + +/* A subroutine of lower_omp_for. Generate code to emit the predicate + for a lastprivate clause. Given a loop control predicate of (V + cond N2), we gate the clause on (!(V cond N2)). The lowered form + is appended to *BODY_P. */ + +static void +lower_omp_for_lastprivate (struct omp_for_data *fd, tree *body_p, + struct omp_context *ctx) +{ + tree clauses, cond; + enum tree_code cond_code; + + cond_code = fd->cond_code; + cond_code = cond_code == LT_EXPR ? GE_EXPR : LE_EXPR; + + /* When possible, use a strict equality expression. This can let VRP + type optimizations deduce the value and remove a copy. */ + if (host_integerp (fd->step, 0)) + { + HOST_WIDE_INT step = TREE_INT_CST_LOW (fd->step); + if (step == 1 || step == -1) + cond_code = EQ_EXPR; + } + + cond = build2 (cond_code, boolean_type_node, fd->v, fd->n2); + + clauses = OMP_FOR_CLAUSES (fd->for_stmt); + lower_lastprivate_clauses (clauses, cond, body_p, ctx); +} + + +/* Lower code for an OpenMP loop directive. */ + +static void +lower_omp_for (tree *stmt_p, omp_context *ctx) +{ + tree t, stmt, ilist, dlist, new_stmt, *body_p, *rhs_p; + struct omp_for_data fd; + + stmt = *stmt_p; + + push_gimplify_context (); + + lower_omp (&OMP_FOR_PRE_BODY (stmt), ctx); + lower_omp (&OMP_FOR_BODY (stmt), ctx); + + /* Move declaration of temporaries in the loop body before we make + it go away. */ + if (TREE_CODE (OMP_FOR_BODY (stmt)) == BIND_EXPR) + record_vars_into (BIND_EXPR_VARS (OMP_FOR_BODY (stmt)), ctx->cb.dst_fn); + + new_stmt = build3 (BIND_EXPR, void_type_node, NULL, NULL, NULL); + TREE_SIDE_EFFECTS (new_stmt) = 1; + body_p = &BIND_EXPR_BODY (new_stmt); + + /* The pre-body and input clauses go before the lowered OMP_FOR. */ + ilist = NULL; + dlist = NULL; + append_to_statement_list (OMP_FOR_PRE_BODY (stmt), body_p); + lower_rec_input_clauses (OMP_FOR_CLAUSES (stmt), body_p, &dlist, ctx); + + /* Lower the header expressions. At this point, we can assume that + the header is of the form: + + #pragma omp for (V = VAL1; V {<|>|<=|>=} VAL2; V = V [+-] VAL3) + + We just need to make sure that VAL1, VAL2 and VAL3 are lowered + using the .omp_data_s mapping, if needed. */ + rhs_p = &TREE_OPERAND (OMP_FOR_INIT (stmt), 1); + if (!is_gimple_min_invariant (*rhs_p)) + *rhs_p = get_formal_tmp_var (*rhs_p, body_p); + + rhs_p = &TREE_OPERAND (OMP_FOR_COND (stmt), 1); + if (!is_gimple_min_invariant (*rhs_p)) + *rhs_p = get_formal_tmp_var (*rhs_p, body_p); + + rhs_p = &TREE_OPERAND (TREE_OPERAND (OMP_FOR_INCR (stmt), 1), 1); + if (!is_gimple_min_invariant (*rhs_p)) + *rhs_p = get_formal_tmp_var (*rhs_p, body_p); + + /* Once lowered, extract the bounds and clauses. */ + extract_omp_for_data (stmt, &fd); + + /* Region exit marker goes at the end of the loop body. */ + t = make_node (OMP_RETURN_EXPR); + append_to_statement_list (t, &OMP_FOR_BODY (stmt)); + maybe_catch_exception (&OMP_FOR_BODY (stmt)); + append_to_statement_list (stmt, body_p); + + /* After the loop, add exit clauses. */ + lower_omp_for_lastprivate (&fd, &dlist, ctx); + lower_reduction_clauses (OMP_FOR_CLAUSES (stmt), body_p, ctx); + append_to_statement_list (dlist, body_p); + + /* Add a barrier unless the user specified NOWAIT. Note that if + this is a combined parallel+loop construct, the barrier will be + optimized away during expansion (see expand_omp_for). */ + if (!fd.have_nowait) + { + tree stmt = alloc_stmt_list (); + build_omp_barrier (&stmt); + append_to_statement_list (stmt, body_p); + } + + pop_gimplify_context (NULL_TREE); + record_vars_into (ctx->block_vars, ctx->cb.dst_fn); + + OMP_FOR_PRE_BODY (stmt) = NULL_TREE; + *stmt_p = new_stmt; +} + + +/* Lower the OpenMP parallel directive in *STMT_P. CTX holds context + information for the directive. */ + +static void +lower_omp_parallel (tree *stmt_p, omp_context *ctx) +{ + tree clauses, par_bind, par_body, new_body, bind; + tree olist, ilist, par_olist, par_ilist; + tree stmt, child_fn, t; + + stmt = *stmt_p; + + clauses = OMP_PARALLEL_CLAUSES (stmt); + par_bind = OMP_PARALLEL_BODY (stmt); + par_body = BIND_EXPR_BODY (par_bind); + child_fn = ctx->cb.dst_fn; + + push_gimplify_context (); + + par_olist = NULL_TREE; + par_ilist = NULL_TREE; + lower_rec_input_clauses (clauses, &par_ilist, &par_olist, ctx); + lower_omp (&par_body, ctx); + maybe_catch_exception (&par_body); + lower_reduction_clauses (clauses, &par_olist, ctx); + + /* Declare all the variables created by mapping and the variables + declared in the scope of the parallel body. */ + record_vars_into (ctx->block_vars, child_fn); + record_vars_into (BIND_EXPR_VARS (par_bind), child_fn); + + if (ctx->record_type) + { + ctx->sender_decl = create_tmp_var (ctx->record_type, ".omp_data_o"); + OMP_PARALLEL_DATA_ARG (stmt) = ctx->sender_decl; + } + + olist = NULL_TREE; + ilist = NULL_TREE; + lower_send_clauses (clauses, &ilist, &olist, ctx); + lower_send_shared_vars (&ilist, &olist, ctx); + + /* Once all the expansions are done, sequence all the different + fragments inside OMP_PARALLEL_BODY. */ + bind = build3 (BIND_EXPR, void_type_node, NULL, NULL, NULL); + append_to_statement_list (ilist, &BIND_EXPR_BODY (bind)); + + new_body = alloc_stmt_list (); + + if (ctx->record_type) + { + t = build_fold_addr_expr (ctx->sender_decl); + t = build2 (MODIFY_EXPR, void_type_node, ctx->receiver_decl, t); + append_to_statement_list (t, &new_body); + } + + append_to_statement_list (par_ilist, &new_body); + append_to_statement_list (par_body, &new_body); + append_to_statement_list (par_olist, &new_body); + t = make_node (OMP_RETURN_EXPR); + append_to_statement_list (t, &new_body); + OMP_PARALLEL_BODY (stmt) = new_body; + + append_to_statement_list (stmt, &BIND_EXPR_BODY (bind)); + append_to_statement_list (olist, &BIND_EXPR_BODY (bind)); + + *stmt_p = bind; + + pop_gimplify_context (NULL_TREE); } + /* Pass *TP back through the gimplifier within the context determined by WI. This handles replacement of DECL_VALUE_EXPR, as well as adjusting the flags on ADDR_EXPR. */ static void -expand_regimplify (tree *tp, struct walk_stmt_info *wi) +lower_regimplify (tree *tp, struct walk_stmt_info *wi) { enum gimplify_status gs; tree pre = NULL; @@ -2961,66 +3988,77 @@ expand_regimplify (tree *tp, struct walk_stmt_info *wi) tsi_link_before (&wi->tsi, pre, TSI_SAME_STMT); } + +/* Callback for walk_stmts. Lower the OpenMP directive pointed by TP. */ + static tree -expand_omp_1 (tree *tp, int *walk_subtrees, void *data) +lower_omp_1 (tree *tp, int *walk_subtrees, void *data) { struct walk_stmt_info *wi = data; omp_context *ctx = wi->info; tree t = *tp; + /* If we have issued syntax errors, avoid doing any heavy lifting. + Just replace the OpenMP directives with a NOP to avoid + confusing RTL expansion. */ + if (errorcount && OMP_DIRECTIVE_P (*tp)) + { + *tp = build_empty_stmt (); + return NULL_TREE; + } + *walk_subtrees = 0; switch (TREE_CODE (*tp)) { case OMP_PARALLEL: ctx = maybe_lookup_ctx (t); - if (!ctx->is_nested) - expand_omp_parallel (tp, ctx); + lower_omp_parallel (tp, ctx); break; case OMP_FOR: ctx = maybe_lookup_ctx (t); gcc_assert (ctx); - expand_omp_for (tp, ctx); + lower_omp_for (tp, ctx); break; case OMP_SECTIONS: ctx = maybe_lookup_ctx (t); gcc_assert (ctx); - expand_omp_sections (tp, ctx); + lower_omp_sections (tp, ctx); break; case OMP_SINGLE: ctx = maybe_lookup_ctx (t); gcc_assert (ctx); - expand_omp_single (tp, ctx); + lower_omp_single (tp, ctx); break; case OMP_MASTER: ctx = maybe_lookup_ctx (t); gcc_assert (ctx); - expand_omp_master (tp, ctx); + lower_omp_master (tp, ctx); break; case OMP_ORDERED: ctx = maybe_lookup_ctx (t); gcc_assert (ctx); - expand_omp_ordered (tp, ctx); + lower_omp_ordered (tp, ctx); break; case OMP_CRITICAL: ctx = maybe_lookup_ctx (t); gcc_assert (ctx); - expand_omp_critical (tp, ctx); + lower_omp_critical (tp, ctx); break; case VAR_DECL: if (ctx && DECL_HAS_VALUE_EXPR_P (t)) - expand_regimplify (tp, wi); + lower_regimplify (tp, wi); break; case ADDR_EXPR: if (ctx) - expand_regimplify (tp, wi); + lower_regimplify (tp, wi); break; case ARRAY_REF: @@ -3030,7 +4068,7 @@ expand_omp_1 (tree *tp, int *walk_subtrees, void *data) case COMPONENT_REF: case VIEW_CONVERT_EXPR: if (ctx) - expand_regimplify (tp, wi); + lower_regimplify (tp, wi); break; case INDIRECT_REF: @@ -3038,7 +4076,7 @@ expand_omp_1 (tree *tp, int *walk_subtrees, void *data) { wi->is_lhs = false; wi->val_only = true; - expand_regimplify (&TREE_OPERAND (t, 0), wi); + lower_regimplify (&TREE_OPERAND (t, 0), wi); } break; @@ -3052,12 +4090,12 @@ expand_omp_1 (tree *tp, int *walk_subtrees, void *data) } static void -expand_omp (tree *stmt_p, omp_context *ctx) +lower_omp (tree *stmt_p, omp_context *ctx) { struct walk_stmt_info wi; memset (&wi, 0, sizeof (wi)); - wi.callback = expand_omp_1; + wi.callback = lower_omp_1; wi.info = ctx; wi.val_only = true; wi.want_locations = true; @@ -3077,10 +4115,13 @@ execute_lower_omp (void) gcc_assert (parallel_nesting_level == 0); if (all_contexts->root) - expand_omp (&DECL_SAVED_TREE (current_function_decl), NULL); + lower_omp (&DECL_SAVED_TREE (current_function_decl), NULL); - splay_tree_delete (all_contexts); - all_contexts = NULL; + if (all_contexts) + { + splay_tree_delete (all_contexts); + all_contexts = NULL; + } } static bool @@ -3105,7 +4146,6 @@ struct tree_opt_pass pass_lower_omp = TODO_dump_func, /* todo_flags_finish */ 0 /* letter */ }; - /* The following is a utility to diagnose OpenMP structured block violations. It's part of the "omplower" pass, as that's invoked too late. It should diff --git a/gcc/passes.c b/gcc/passes.c index e2d18c9..8301590 100644 --- a/gcc/passes.c +++ b/gcc/passes.c @@ -481,6 +481,7 @@ init_optimization_passes (void) p = &all_passes; NEXT_PASS (pass_fixup_cfg); NEXT_PASS (pass_init_datastructures); + NEXT_PASS (pass_expand_omp); NEXT_PASS (pass_all_optimizations); NEXT_PASS (pass_warn_function_noreturn); NEXT_PASS (pass_mudflap_2); diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index ab86c53..bc23ccb 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,20 @@ +2006-01-19 Diego Novillo <dnovillo@redhat.com> + + * testsuite/gcc.dg/gomp/for-13.c: Use -fdump-tree-ompexp. + * testsuite/gcc.dg/gomp/critical-1.c: Likewise. + * testsuite/gcc.dg/gomp/critical-3.c: Likewise. + * testsuite/gcc.dg/gomp/empty.c: Likewise. + * testsuite/gcc.dg/gomp/ordered-1.c: Likewise. + * testsuite/gcc.dg/gomp/for-4.c: Likewise. + * testsuite/gcc.dg/gomp/for-6.c: Likewise. + * testsuite/gcc.dg/gomp/master-3.c: Likewise. + * testsuite/gcc.dg/gomp/for-8.c: Likewise. + * testsuite/gcc.dg/gomp/for-10.c: Likewise. + * testsuite/gcc.dg/gomp/for-18.c: Likewise. + * testsuite/gcc.dg/gomp/for-5.c: Likewise. + * testsuite/gcc.dg/gomp/for-7.c: Likewise. + * testsuite/gcc.dg/gomp/for-9.c: Likewise. + 2006-01-18 Jeff Law <law@redhat.com> * gcc.dg/tree-ssa/vrp25.c: New test. diff --git a/gcc/testsuite/gcc.dg/gomp/critical-1.c b/gcc/testsuite/gcc.dg/gomp/critical-1.c index bdc7bad..6f3348c 100644 --- a/gcc/testsuite/gcc.dg/gomp/critical-1.c +++ b/gcc/testsuite/gcc.dg/gomp/critical-1.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-fopenmp -fdump-tree-omplower" } */ +/* { dg-options "-fopenmp -fdump-tree-ompexp" } */ extern void bar(int); @@ -21,8 +21,8 @@ void foo (void) bar(3); } -/* { dg-final { scan-tree-dump-times "GOMP_critical_start" 2 "omplower" } } */ -/* { dg-final { scan-tree-dump-times "GOMP_critical_end" 2 "omplower" } } */ -/* { dg-final { scan-tree-dump-times "GOMP_critical_name_start" 2 "omplower" } } */ -/* { dg-final { scan-tree-dump-times "GOMP_critical_name_end" 2 "omplower" } } */ -/* { dg-final { cleanup-tree-dump "omplower" } } */ +/* { dg-final { scan-tree-dump-times "GOMP_critical_start" 2 "ompexp" } } */ +/* { dg-final { scan-tree-dump-times "GOMP_critical_end" 2 "ompexp" } } */ +/* { dg-final { scan-tree-dump-times "GOMP_critical_name_start" 2 "ompexp" } } */ +/* { dg-final { scan-tree-dump-times "GOMP_critical_name_end" 2 "ompexp" } } */ +/* { dg-final { cleanup-tree-dump "ompexp" } } */ diff --git a/gcc/testsuite/gcc.dg/gomp/critical-3.c b/gcc/testsuite/gcc.dg/gomp/critical-3.c index 9cd73ac..6726e6a 100644 --- a/gcc/testsuite/gcc.dg/gomp/critical-3.c +++ b/gcc/testsuite/gcc.dg/gomp/critical-3.c @@ -1,5 +1,5 @@ // { dg-do compile } -// { dg-options "-fopenmp -fdump-tree-omplower" } +// { dg-options "-fopenmp -fdump-tree-ompexp" } void bar(void); void foo(void) @@ -8,4 +8,4 @@ void foo(void) bar(); } -// { dg-final { scan-tree-dump-times "\\&\\.gomp_critical_user_xyzzy" 2 "omplower" } } +// { dg-final { scan-tree-dump-times "\\&\\.gomp_critical_user_xyzzy" 2 "ompexp" } } diff --git a/gcc/testsuite/gcc.dg/gomp/empty.c b/gcc/testsuite/gcc.dg/gomp/empty.c index 18af1d8..6a21c04 100644 --- a/gcc/testsuite/gcc.dg/gomp/empty.c +++ b/gcc/testsuite/gcc.dg/gomp/empty.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O -fopenmp -fdump-tree-omplower" } */ +/* { dg-options "-O -fopenmp -fdump-tree-ompexp" } */ main() { @@ -8,5 +8,5 @@ main() } /* There should not be a GOMP_parallel_start call. */ -/* { dg-final { scan-tree-dump-times "GOMP_parallel_start" 0 "omplower"} } */ -/* { dg-final { cleanup-tree-dump "omplower" } } */ +/* { dg-final { scan-tree-dump-times "GOMP_parallel_start" 0 "ompexp"} } */ +/* { dg-final { cleanup-tree-dump "ompexp" } } */ diff --git a/gcc/testsuite/gcc.dg/gomp/for-10.c b/gcc/testsuite/gcc.dg/gomp/for-10.c index 9dfac16..f214042 100644 --- a/gcc/testsuite/gcc.dg/gomp/for-10.c +++ b/gcc/testsuite/gcc.dg/gomp/for-10.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-fopenmp -fdump-tree-lower" } */ +/* { dg-options "-fopenmp -fdump-tree-ompexp" } */ extern void bar(int); @@ -12,6 +12,6 @@ void foo (int n) bar(i); } -/* { dg-final { scan-tree-dump-times "GOMP_loop_ordered_runtime_start" 1 "lower" } } */ -/* { dg-final { scan-tree-dump-times "GOMP_loop_ordered_runtime_next" 1 "lower" } } */ -/* { dg-final { cleanup-tree-dump "lower" } } */ +/* { dg-final { scan-tree-dump-times "GOMP_loop_ordered_runtime_start" 1 "ompexp" } } */ +/* { dg-final { scan-tree-dump-times "GOMP_loop_ordered_runtime_next" 1 "ompexp" } } */ +/* { dg-final { cleanup-tree-dump "ompexp" } } */ diff --git a/gcc/testsuite/gcc.dg/gomp/for-13.c b/gcc/testsuite/gcc.dg/gomp/for-13.c index 16e971f..607de49 100644 --- a/gcc/testsuite/gcc.dg/gomp/for-13.c +++ b/gcc/testsuite/gcc.dg/gomp/for-13.c @@ -2,7 +2,7 @@ // for iteration variable as private. // { dg-do compile } -// { dg-options "-fopenmp -fdump-tree-lower" } +// { dg-options "-fopenmp -fdump-tree-ompexp" } extern void bar(int); void foo(void) @@ -14,5 +14,5 @@ void foo(void) bar(i); } -// { dg-final { scan-tree-dump-times "omp_data_o" 0 "lower" } } -// { dg-final { cleanup-tree-dump "lower" } } +// { dg-final { scan-tree-dump-times "omp_data_o" 0 "ompexp" } } +// { dg-final { cleanup-tree-dump "ompexp" } } diff --git a/gcc/testsuite/gcc.dg/gomp/for-18.c b/gcc/testsuite/gcc.dg/gomp/for-18.c index c875a0c..545f271 100644 --- a/gcc/testsuite/gcc.dg/gomp/for-18.c +++ b/gcc/testsuite/gcc.dg/gomp/for-18.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O -fopenmp -fdump-tree-omplower" } */ +/* { dg-options "-O -fopenmp -fdump-tree-ompexp" } */ void foo (int *a, int i) @@ -37,6 +37,6 @@ bar (int *a, int i) a[j] = 4; } -/* { dg-final { scan-tree-dump-times "GOMP_parallel_loop_dynamic_start" 4 "omplower" { xfail *-*-* } } } */ -/* { dg-final { scan-tree-dump-times "GOMP_parallel_loop_guided_start" 4 "omplower" { xfail *-*-* } } } */ -/* { dg-final { cleanup-tree-dump "omplower" } } */ +/* { dg-final { scan-tree-dump-times "GOMP_parallel_loop_dynamic_start" 4 "ompexp" { xfail *-*-* } } } */ +/* { dg-final { scan-tree-dump-times "GOMP_parallel_loop_guided_start" 4 "ompexp" { xfail *-*-* } } } */ +/* { dg-final { cleanup-tree-dump "ompexp" } } */ diff --git a/gcc/testsuite/gcc.dg/gomp/for-4.c b/gcc/testsuite/gcc.dg/gomp/for-4.c index c5f1bb8..fb6994e 100644 --- a/gcc/testsuite/gcc.dg/gomp/for-4.c +++ b/gcc/testsuite/gcc.dg/gomp/for-4.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-fopenmp -fdump-tree-lower" } */ +/* { dg-options "-fopenmp -fdump-tree-ompexp" } */ extern void bar(int); @@ -12,6 +12,6 @@ void foo (int n) bar(i); } -/* { dg-final { scan-tree-dump-times "GOMP_loop_dynamic_start" 1 "lower" } } */ -/* { dg-final { scan-tree-dump-times "GOMP_loop_dynamic_next" 1 "lower" } } */ -/* { dg-final { cleanup-tree-dump "lower" } } */ +/* { dg-final { scan-tree-dump-times "GOMP_loop_dynamic_start" 1 "ompexp" } } */ +/* { dg-final { scan-tree-dump-times "GOMP_loop_dynamic_next" 1 "ompexp" } } */ +/* { dg-final { cleanup-tree-dump "ompexp" } } */ diff --git a/gcc/testsuite/gcc.dg/gomp/for-5.c b/gcc/testsuite/gcc.dg/gomp/for-5.c index 6d9722a..5912a4e 100644 --- a/gcc/testsuite/gcc.dg/gomp/for-5.c +++ b/gcc/testsuite/gcc.dg/gomp/for-5.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-fopenmp -fdump-tree-lower" } */ +/* { dg-options "-fopenmp -fdump-tree-ompexp" } */ extern void bar(int); @@ -12,6 +12,6 @@ void foo (int n) bar(i); } -/* { dg-final { scan-tree-dump-times "GOMP_loop_guided_start" 1 "lower" } } */ -/* { dg-final { scan-tree-dump-times "GOMP_loop_guided_next" 1 "lower" } } */ -/* { dg-final { cleanup-tree-dump "lower" } } */ +/* { dg-final { scan-tree-dump-times "GOMP_loop_guided_start" 1 "ompexp" } } */ +/* { dg-final { scan-tree-dump-times "GOMP_loop_guided_next" 1 "ompexp" } } */ +/* { dg-final { cleanup-tree-dump "ompexp" } } */ diff --git a/gcc/testsuite/gcc.dg/gomp/for-6.c b/gcc/testsuite/gcc.dg/gomp/for-6.c index 9361205..100ee2c 100644 --- a/gcc/testsuite/gcc.dg/gomp/for-6.c +++ b/gcc/testsuite/gcc.dg/gomp/for-6.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-fopenmp -fdump-tree-lower" } */ +/* { dg-options "-fopenmp -fdump-tree-ompexp" } */ extern void bar(int); @@ -12,6 +12,6 @@ void foo (int n) bar(i); } -/* { dg-final { scan-tree-dump-times "GOMP_loop_runtime_start" 1 "lower" } } */ -/* { dg-final { scan-tree-dump-times "GOMP_loop_runtime_next" 1 "lower" } } */ -/* { dg-final { cleanup-tree-dump "lower" } } */ +/* { dg-final { scan-tree-dump-times "GOMP_loop_runtime_start" 1 "ompexp" } } */ +/* { dg-final { scan-tree-dump-times "GOMP_loop_runtime_next" 1 "ompexp" } } */ +/* { dg-final { cleanup-tree-dump "ompexp" } } */ diff --git a/gcc/testsuite/gcc.dg/gomp/for-7.c b/gcc/testsuite/gcc.dg/gomp/for-7.c index b3eb997..10763dc 100644 --- a/gcc/testsuite/gcc.dg/gomp/for-7.c +++ b/gcc/testsuite/gcc.dg/gomp/for-7.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-fopenmp -fdump-tree-lower" } */ +/* { dg-options "-fopenmp -fdump-tree-ompexp" } */ extern void bar(int); @@ -12,6 +12,6 @@ void foo (int n) bar(i); } -/* { dg-final { scan-tree-dump-times "GOMP_loop_ordered_static_start" 1 "lower" } } */ -/* { dg-final { scan-tree-dump-times "GOMP_loop_ordered_static_next" 1 "lower" } } */ -/* { dg-final { cleanup-tree-dump "lower" } } */ +/* { dg-final { scan-tree-dump-times "GOMP_loop_ordered_static_start" 1 "ompexp" } } */ +/* { dg-final { scan-tree-dump-times "GOMP_loop_ordered_static_next" 1 "ompexp" } } */ +/* { dg-final { cleanup-tree-dump "ompexp" } } */ diff --git a/gcc/testsuite/gcc.dg/gomp/for-8.c b/gcc/testsuite/gcc.dg/gomp/for-8.c index c1386ce..1bc66c4 100644 --- a/gcc/testsuite/gcc.dg/gomp/for-8.c +++ b/gcc/testsuite/gcc.dg/gomp/for-8.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-fopenmp -fdump-tree-lower" } */ +/* { dg-options "-fopenmp -fdump-tree-ompexp" } */ extern void bar(int); @@ -12,6 +12,6 @@ void foo (int n) bar(i); } -/* { dg-final { scan-tree-dump-times "GOMP_loop_ordered_dynamic_start" 1 "lower" } } */ -/* { dg-final { scan-tree-dump-times "GOMP_loop_ordered_dynamic_next" 1 "lower" } } */ -/* { dg-final { cleanup-tree-dump "lower" } } */ +/* { dg-final { scan-tree-dump-times "GOMP_loop_ordered_dynamic_start" 1 "ompexp" } } */ +/* { dg-final { scan-tree-dump-times "GOMP_loop_ordered_dynamic_next" 1 "ompexp" } } */ +/* { dg-final { cleanup-tree-dump "ompexp" } } */ diff --git a/gcc/testsuite/gcc.dg/gomp/for-9.c b/gcc/testsuite/gcc.dg/gomp/for-9.c index 2a554d5..af99e21 100644 --- a/gcc/testsuite/gcc.dg/gomp/for-9.c +++ b/gcc/testsuite/gcc.dg/gomp/for-9.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-fopenmp -fdump-tree-lower" } */ +/* { dg-options "-fopenmp -fdump-tree-ompexp" } */ extern void bar(int); @@ -12,6 +12,6 @@ void foo (int n) bar(i); } -/* { dg-final { scan-tree-dump-times "GOMP_loop_ordered_guided_start" 1 "lower" } } */ -/* { dg-final { scan-tree-dump-times "GOMP_loop_ordered_guided_next" 1 "lower" } } */ -/* { dg-final { cleanup-tree-dump "lower" } } */ +/* { dg-final { scan-tree-dump-times "GOMP_loop_ordered_guided_start" 1 "ompexp" } } */ +/* { dg-final { scan-tree-dump-times "GOMP_loop_ordered_guided_next" 1 "ompexp" } } */ +/* { dg-final { cleanup-tree-dump "ompexp" } } */ diff --git a/gcc/testsuite/gcc.dg/gomp/master-3.c b/gcc/testsuite/gcc.dg/gomp/master-3.c index 3796610..fee09dd 100644 --- a/gcc/testsuite/gcc.dg/gomp/master-3.c +++ b/gcc/testsuite/gcc.dg/gomp/master-3.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-fopenmp -fdump-tree-omplower" } */ +/* { dg-options "-fopenmp -fdump-tree-ompexp" } */ extern void bar(int); @@ -9,5 +9,5 @@ void foo (void) bar(0); } -/* { dg-final { scan-tree-dump-times "omp_get_thread_num" 1 "omplower" } } */ -/* { dg-final { cleanup-tree-dump "omplower" } } */ +/* { dg-final { scan-tree-dump-times "omp_get_thread_num" 1 "ompexp" } } */ +/* { dg-final { cleanup-tree-dump "ompexp" } } */ diff --git a/gcc/testsuite/gcc.dg/gomp/ordered-1.c b/gcc/testsuite/gcc.dg/gomp/ordered-1.c index a1cd7f4..de5e116 100644 --- a/gcc/testsuite/gcc.dg/gomp/ordered-1.c +++ b/gcc/testsuite/gcc.dg/gomp/ordered-1.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-fopenmp -fdump-tree-omplower" } */ +/* { dg-options "-fopenmp -fdump-tree-ompexp" } */ extern void bar(int); @@ -15,6 +15,6 @@ void foo (void) } } -/* { dg-final { scan-tree-dump-times "GOMP_ordered_start" 2 "omplower" } } */ -/* { dg-final { scan-tree-dump-times "GOMP_ordered_end" 2 "omplower" } } */ -/* { dg-final { cleanup-tree-dump "omplower" } } */ +/* { dg-final { scan-tree-dump-times "GOMP_ordered_start" 2 "ompexp" } } */ +/* { dg-final { scan-tree-dump-times "GOMP_ordered_end" 2 "ompexp" } } */ +/* { dg-final { cleanup-tree-dump "ompexp" } } */ diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c index 45e78dd..f76f663 100644 --- a/gcc/tree-cfg.c +++ b/gcc/tree-cfg.c @@ -486,6 +486,37 @@ make_edges (void) } +/* Link an OMP_SECTIONS block to all the OMP_SECTION blocks in its body. */ + +static void +make_omp_sections_edges (basic_block bb) +{ + basic_block exit_bb; + size_t i, n; + tree vec, stmt; + + stmt = last_stmt (bb); + vec = OMP_SECTIONS_SECTIONS (stmt); + n = TREE_VEC_LENGTH (vec); + exit_bb = bb_for_stmt (TREE_VEC_ELT (vec, n - 1)); + + for (i = 0; i < n - 1; i += 2) + { + basic_block start_bb = bb_for_stmt (TREE_VEC_ELT (vec, i)); + basic_block end_bb = bb_for_stmt (TREE_VEC_ELT (vec, i + 1)); + make_edge (bb, start_bb, EDGE_ABNORMAL); + make_edge (end_bb, exit_bb, EDGE_FALLTHRU); + } + + /* Once the CFG has been built, the vector of sections is no longer + useful. The region can be easily obtained with build_omp_regions. + Furthermore, this sharing of tree expressions is not allowed by the + statement verifier. */ + OMP_SECTIONS_SECTIONS (stmt) = NULL_TREE; +} + + + /* Create edges for control statement at basic block BB. */ static void @@ -581,6 +612,27 @@ make_exit_edges (basic_block bb) make_edge (bb, bb->next_bb, EDGE_FALLTHRU); break; + case OMP_PARALLEL: + case OMP_FOR: + case OMP_SINGLE: + case OMP_MASTER: + case OMP_ORDERED: + case OMP_CRITICAL: + make_edge (bb, bb->next_bb, EDGE_ABNORMAL); + + case OMP_RETURN_EXPR: + if (EDGE_COUNT (bb->succs) == 0) + make_edge (bb, bb->next_bb, EDGE_FALLTHRU); + break; + + case OMP_SECTIONS: + make_omp_sections_edges (bb); + break; + + case OMP_SECTION: + make_edge (bb, bb->next_bb, EDGE_FALLTHRU); + break; + default: gcc_unreachable (); } @@ -2503,6 +2555,10 @@ is_ctrl_altering_stmt (tree t) return true; } + /* OpenMP directives alter control flow. */ + if (flag_openmp && OMP_DIRECTIVE_P (t)) + return true; + /* If a statement can throw, it alters control flow. */ return tree_can_throw_internal (t); } @@ -2746,12 +2802,9 @@ set_bb_for_stmt (tree t, basic_block bb) stmt_ann_t ann = get_stmt_ann (t); ann->bb = bb; - /* If the statement is a label, add the label to block-to-labels - map so that we can speed up edge creation for GOTO_EXPRs. - Note that LABEL_TO_BLOCK_MAP may not exist if we are - currently expanding into RTL (in which case, this mapping is - unnecessary, anyway). */ - if (TREE_CODE (t) == LABEL_EXPR && !currently_expanding_to_rtl) + /* If the statement is a label, add the label to block-to-labels map + so that we can speed up edge creation for GOTO_EXPRs. */ + if (TREE_CODE (t) == LABEL_EXPR) { int uid; @@ -3432,6 +3485,17 @@ verify_stmt (tree stmt, bool last_in_block) { tree addr; + if (OMP_DIRECTIVE_P (stmt)) + { + /* OpenMP directives are validated by the FE and never operated + on by the optimizers. Furthermore, OMP_FOR may contain + non-gimple expressions when the main index variable has had + its address taken. This does not affect the loop itself + because the header of an OMP_FOR is merely used to determine + how to setup the parallel iteration. */ + return false; + } + if (!is_gimple_stmt (stmt)) { error ("is not a valid GIMPLE statement"); @@ -4494,6 +4558,329 @@ tree_duplicate_sese_region (edge entry, edge exit, return true; } +/* +DEF_VEC_P(basic_block); +DEF_VEC_ALLOC_P(basic_block,heap); +*/ + +/* Add all the blocks dominated by ENTRY to the array BBS_P. Stop + adding blocks when the dominator traversal reaches EXIT. This + function silently assumes that ENTRY strictly dominates EXIT. */ + +static void +gather_blocks_in_sese_region (basic_block entry, basic_block exit, + VEC(basic_block,heap) **bbs_p) +{ + basic_block son; + + for (son = first_dom_son (CDI_DOMINATORS, entry); + son; + son = next_dom_son (CDI_DOMINATORS, son)) + { + VEC_safe_push (basic_block, heap, *bbs_p, son); + if (son != exit) + gather_blocks_in_sese_region (son, exit, bbs_p); + } +} + + +struct move_stmt_d +{ + tree block; + tree from_context; + tree to_context; + bitmap vars_to_remove; + bool remap_decls_p; +}; + +/* Helper for move_block_to_fn. Set TREE_BLOCK in every expression + contained in *TP and change the DECL_CONTEXT of every local + variable referenced in *TP. */ + +static tree +move_stmt_r (tree *tp, int *walk_subtrees ATTRIBUTE_UNUSED, void *data) +{ + struct move_stmt_d *p = (struct move_stmt_d *) data; + + if (p->block && IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (TREE_CODE (*tp)))) + TREE_BLOCK (*tp) = p->block; + + if (OMP_DIRECTIVE_P (*tp)) + { + /* Do not remap variables inside OMP directives. Variables + referenced in clauses and directive header belong to the + parent function and should not be moved into the child + function. */ + p->remap_decls_p = false; + } + + if (p->remap_decls_p + && DECL_P (*tp) + && DECL_CONTEXT (*tp) == p->from_context) + { + DECL_CONTEXT (*tp) = p->to_context; + + if (TREE_CODE (*tp) == VAR_DECL) + { + struct function *f = DECL_STRUCT_FUNCTION (p->to_context); + f->unexpanded_var_list = tree_cons (0, *tp, f->unexpanded_var_list); + + /* Mark *TP to be removed from the original function, + otherwise it will be given a DECL_RTL when the original + function is expanded. */ + bitmap_set_bit (p->vars_to_remove, DECL_UID (*tp)); + } + } + + return NULL_TREE; +} + + +/* Move basic block BB from function CFUN to function DEST_FN. The + block is moved out of the original linked list and placed after + block AFTER in the new list. Also, the block is removed from the + original array of blocks and placed in DEST_FN's array of blocks. + If UPDATE_EDGE_COUNT_P is true, the edge counts on both CFGs is + updated to reflect the moved edges. + + On exit, local variables that need to be removed from + CFUN->UNEXPANDED_VAR_LIST will have been added to VARS_TO_REMOVE. */ + +static void +move_block_to_fn (struct function *dest_cfun, basic_block bb, + basic_block after, bool update_edge_count_p, + bitmap vars_to_remove) +{ + struct control_flow_graph *cfg; + edge_iterator ei; + edge e; + block_stmt_iterator si; + struct move_stmt_d d; + unsigned sz; + + /* Link BB to the new linked list. */ + move_block_after (bb, after); + + /* Update the edge count in the corresponding flowgraphs. */ + if (update_edge_count_p) + FOR_EACH_EDGE (e, ei, bb->succs) + { + cfun->cfg->x_n_edges--; + dest_cfun->cfg->x_n_edges++; + } + + /* Remove BB from the original basic block array. */ + VEC_replace (basic_block, cfun->cfg->x_basic_block_info, bb->index, NULL); + cfun->cfg->x_n_basic_blocks--; + + /* Grow DEST_CFUN's basic block array if needed. */ + cfg = dest_cfun->cfg; + cfg->x_n_basic_blocks++; + if (bb->index > cfg->x_last_basic_block) + cfg->x_last_basic_block = bb->index; + + sz = VEC_length (basic_block, cfg->x_basic_block_info); + if ((unsigned) cfg->x_last_basic_block >= sz) + { + sz = cfg->x_last_basic_block + (cfg->x_last_basic_block + 3) / 4; + VEC_safe_grow (basic_block, gc, cfg->x_basic_block_info, sz); + } + + VEC_replace (basic_block, cfg->x_basic_block_info, + cfg->x_last_basic_block, bb); + + /* The statements in BB need to be associated with a new TREE_BLOCK. + Labels need to be associated with a new label-to-block map. */ + memset (&d, 0, sizeof (d)); + d.vars_to_remove = vars_to_remove; + + for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si)) + { + tree stmt = bsi_stmt (si); + + d.from_context = cfun->decl; + d.to_context = dest_cfun->decl; + d.remap_decls_p = true; + if (TREE_BLOCK (stmt)) + d.block = DECL_INITIAL (dest_cfun->decl); + + walk_tree (&stmt, move_stmt_r, &d, NULL); + + if (TREE_CODE (stmt) == LABEL_EXPR) + { + unsigned old_len; + tree label = LABEL_EXPR_LABEL (stmt); + int uid = LABEL_DECL_UID (label); + + gcc_assert (uid > -1); + + old_len = VEC_length (basic_block, cfg->x_label_to_block_map); + if (old_len <= (unsigned) uid) + { + basic_block *addr; + unsigned new_len = 3 * uid / 2; + VEC_safe_grow (basic_block, gc, cfg->x_label_to_block_map, + new_len); + addr = VEC_address (basic_block, cfg->x_label_to_block_map); + memset (&addr[old_len], 0, + sizeof (basic_block) * (new_len - old_len)); + } + + VEC_replace (basic_block, cfg->x_label_to_block_map, uid, bb); + VEC_replace (basic_block, cfun->cfg->x_label_to_block_map, uid, NULL); + + gcc_assert (DECL_CONTEXT (label) == dest_cfun->decl); + + if (uid >= dest_cfun->last_label_uid) + dest_cfun->last_label_uid = uid + 1; + } + } +} + + +/* Move a single-entry, single-exit region delimited by ENTRY_BB and + EXIT_BB to function DEST_CFUN. The whole region is replaced by a + single basic block in the original CFG and the new basic block is + returned. DEST_CFUN must not have a CFG yet. + + Note that the region need not be a pure SESE region. Blocks inside + the region may contain calls to abort/exit. The only restriction + is that ENTRY_BB should be the only entry point and it must + dominate EXIT_BB. + + All local variables referenced in the region are assumed to be in + the corresponding BLOCK_VARS and unexpanded variable lists + associated with DEST_CFUN. */ + +basic_block +move_sese_region_to_fn (struct function *dest_cfun, basic_block entry_bb, + basic_block exit_bb) +{ + VEC(basic_block,heap) *bbs; + basic_block after, bb, *entry_pred, *exit_succ; + struct function *saved_cfun; + int *entry_flag, *exit_flag; + unsigned i, num_entry_edges, num_exit_edges; + edge e; + edge_iterator ei; + bitmap vars_to_remove; + + saved_cfun = cfun; + + /* Collect all the blocks in the region. Manually add ENTRY_BB + because it won't be added by dfs_enumerate_from. */ + calculate_dominance_info (CDI_DOMINATORS); + + /* If ENTRY does not strictly dominate EXIT, this cannot be an SESE + region. */ + gcc_assert (entry_bb != exit_bb + && dominated_by_p (CDI_DOMINATORS, exit_bb, entry_bb)); + + bbs = NULL; + VEC_safe_push (basic_block, heap, bbs, entry_bb); + gather_blocks_in_sese_region (entry_bb, exit_bb, &bbs); + + /* Detach ENTRY_BB and EXIT_BB from CFUN->CFG. We need to remember + the predecessor edges to ENTRY_BB and the successor edges to + EXIT_BB so that we can re-attach them to the new basic block that + will replace the region. */ + num_entry_edges = EDGE_COUNT (entry_bb->preds); + entry_pred = (basic_block *) xcalloc (num_entry_edges, sizeof (basic_block)); + entry_flag = (int *) xcalloc (num_entry_edges, sizeof (int)); + i = 0; + for (ei = ei_start (entry_bb->preds); (e = ei_safe_edge (ei)) != NULL;) + { + entry_flag[i] = e->flags; + entry_pred[i++] = e->src; + remove_edge (e); + } + + num_exit_edges = EDGE_COUNT (exit_bb->succs); + exit_succ = (basic_block *) xcalloc (num_exit_edges, sizeof (basic_block)); + exit_flag = (int *) xcalloc (num_exit_edges, sizeof (int)); + i = 0; + for (ei = ei_start (exit_bb->succs); (e = ei_safe_edge (ei)) != NULL;) + { + exit_flag[i] = e->flags; + exit_succ[i++] = e->dest; + remove_edge (e); + } + + /* Switch context to the child function to initialize DEST_FN's CFG. */ + gcc_assert (dest_cfun->cfg == NULL); + cfun = dest_cfun; + init_empty_tree_cfg (); + cfun = saved_cfun; + + /* Move blocks from BBS into DEST_CFUN. */ + gcc_assert (VEC_length (basic_block, bbs) >= 2); + after = dest_cfun->cfg->x_entry_block_ptr; + vars_to_remove = BITMAP_ALLOC (NULL); + for (i = 0; VEC_iterate (basic_block, bbs, i, bb); i++) + { + /* No need to update edge counts on the last block. It has + already been updated earlier when we detached the region from + the original CFG. */ + move_block_to_fn (dest_cfun, bb, after, bb != exit_bb, vars_to_remove); + after = bb; + } + + /* Remove the variables marked in VARS_TO_REMOVE from + CFUN->UNEXPANDED_VAR_LIST. Otherwise, they will be given a + DECL_RTL in the context of CFUN. */ + if (!bitmap_empty_p (vars_to_remove)) + { + tree *p; + + for (p = &cfun->unexpanded_var_list; *p; ) + { + tree var = TREE_VALUE (*p); + if (bitmap_bit_p (vars_to_remove, DECL_UID (var))) + { + *p = TREE_CHAIN (*p); + continue; + } + + p = &TREE_CHAIN (*p); + } + } + + BITMAP_FREE (vars_to_remove); + + /* Rewire the entry and exit blocks. The successor to the entry + block turns into the successor of DEST_FN's ENTRY_BLOCK_PTR in + the child function. Similarly, the predecessor of DEST_FN's + EXIT_BLOCK_PTR turns into the predecessor of EXIT_BLOCK_PTR. We + need to switch CFUN between DEST_CFUN and SAVED_CFUN so that the + various CFG manipulation function get to the right CFG. + + FIXME, this is silly. The CFG ought to become a parameter to + these helpers. */ + cfun = dest_cfun; + make_edge (ENTRY_BLOCK_PTR, entry_bb, EDGE_FALLTHRU); + make_edge (exit_bb, EXIT_BLOCK_PTR, 0); + cfun = saved_cfun; + + /* Back in the original function, the SESE region has disappeared, + create a new basic block in its place. */ + bb = create_empty_bb (entry_pred[0]); + for (i = 0; i < num_entry_edges; i++) + make_edge (entry_pred[i], bb, entry_flag[i]); + + for (i = 0; i < num_exit_edges; i++) + make_edge (bb, exit_succ[i], exit_flag[i]); + + free (exit_flag); + free (entry_flag); + free (entry_pred); + free (exit_succ); + free_dominance_info (CDI_DOMINATORS); + free_dominance_info (CDI_POST_DOMINATORS); + VEC_free (basic_block, heap, bbs); + + return bb; +} + /* Dump FUNCTION_DECL FN to file FILE using FLAGS (see TDF_* in tree.h) */ diff --git a/gcc/tree-flow.h b/gcc/tree-flow.h index a766e39..0c5e96b 100644 --- a/gcc/tree-flow.h +++ b/gcc/tree-flow.h @@ -540,6 +540,8 @@ extern void fold_cond_expr_cond (void); extern void replace_uses_by (tree, tree); extern void start_recording_case_labels (void); extern void end_recording_case_labels (void); +extern basic_block move_sese_region_to_fn (struct function *, basic_block, + basic_block); /* In tree-cfgcleanup.c */ extern bool cleanup_tree_cfg (void); @@ -580,8 +582,9 @@ extern void remove_phi_node (tree, tree); extern tree phi_reverse (tree); /* In gimple-low.c */ +extern void record_vars_into (tree, tree); extern void record_vars (tree); -extern bool block_may_fallthru (tree block); +extern bool block_may_fallthru (tree); /* In tree-ssa-alias.c */ extern void dump_may_aliases_for (FILE *, tree); @@ -886,6 +889,7 @@ tree create_mem_ref (block_stmt_iterator *, tree, rtx addr_for_mem_ref (struct mem_address *, bool); void get_address_description (tree, struct mem_address *); tree maybe_fold_tmr (tree); + /* This structure is simply used during pushing fields onto the fieldstack to track the offset of the field, since bitpos_of_field gives it relative to its immediate containing type, and we want it relative to the ultimate diff --git a/gcc/tree-gimple.c b/gcc/tree-gimple.c index 5edf558..b47b001 100644 --- a/gcc/tree-gimple.c +++ b/gcc/tree-gimple.c @@ -224,6 +224,7 @@ is_gimple_stmt (tree t) case OMP_MASTER: case OMP_ORDERED: case OMP_CRITICAL: + case OMP_RETURN_EXPR: /* These are always void. */ return true; diff --git a/gcc/tree-gimple.h b/gcc/tree-gimple.h index ff1a6d2..9cba07f 100644 --- a/gcc/tree-gimple.h +++ b/gcc/tree-gimple.h @@ -109,21 +109,6 @@ enum gimplify_status { GS_ALL_DONE = 1 /* The expression is fully gimplified. */ }; -/* Type of parallel constructs. Used to decide what runtime function - to use for launching children threads and the gimplification - strategy. */ - -enum omp_parallel_type { - IS_NOT_PARALLEL = 0, - - /* Regular omp parallel */ - IS_PARALLEL, - - /* Combined parallel + workshare (parallel loop and parallel - sections). */ - IS_COMBINED_PARALLEL -}; - extern enum gimplify_status gimplify_expr (tree *, tree *, tree *, bool (*) (tree), fallback_t); extern void gimplify_type_sizes (tree, tree *); @@ -147,12 +132,11 @@ extern tree force_labels_r (tree *, int *, void *); extern enum gimplify_status gimplify_va_arg_expr (tree *, tree *, tree *); struct gimplify_omp_ctx; extern void omp_firstprivatize_variable (struct gimplify_omp_ctx *, tree); +extern tree gimple_boolify (tree); /* In omp-low.c. */ -extern tree find_omp_clause (tree, enum tree_code); extern void diagnose_omp_structured_block_errors (tree); extern tree omp_reduction_init (tree, tree); -enum omp_parallel_type determine_parallel_type (tree stmt); /* In tree-nested.c. */ extern void lower_nested_functions (tree); diff --git a/gcc/tree-inline.c b/gcc/tree-inline.c index 9a93427..c75cc9b 100644 --- a/gcc/tree-inline.c +++ b/gcc/tree-inline.c @@ -1598,6 +1598,29 @@ estimate_num_insns_1 (tree *tp, int *walk_subtrees, void *data) case LOOP_EXPR: case PHI_NODE: case WITH_SIZE_EXPR: + case OMP_PARALLEL: + case OMP_FOR: + case OMP_SECTIONS: + case OMP_SINGLE: + case OMP_SECTION: + case OMP_MASTER: + case OMP_ORDERED: + case OMP_CRITICAL: + case OMP_ATOMIC: + case OMP_CLAUSE_PRIVATE: + case OMP_CLAUSE_SHARED: + case OMP_CLAUSE_FIRSTPRIVATE: + case OMP_CLAUSE_LASTPRIVATE: + case OMP_CLAUSE_REDUCTION: + case OMP_CLAUSE_COPYIN: + case OMP_CLAUSE_COPYPRIVATE: + case OMP_CLAUSE_IF: + case OMP_CLAUSE_NUM_THREADS: + case OMP_CLAUSE_SCHEDULE: + case OMP_CLAUSE_NOWAIT: + case OMP_CLAUSE_ORDERED: + case OMP_CLAUSE_DEFAULT: + case OMP_RETURN_EXPR: break; /* We don't account constants for now. Assume that the cost is amortized @@ -2285,7 +2308,22 @@ copy_tree_r (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED) /* Now, restore the chain, if appropriate. That will cause walk_tree to walk into the chain as well. */ - if (code == PARM_DECL || code == TREE_LIST) + if (code == PARM_DECL + || code == TREE_LIST + /* OpenMP clauses are linked through TREE_CHAIN. */ + || code == OMP_CLAUSE_PRIVATE + || code == OMP_CLAUSE_SHARED + || code == OMP_CLAUSE_FIRSTPRIVATE + || code == OMP_CLAUSE_LASTPRIVATE + || code == OMP_CLAUSE_REDUCTION + || code == OMP_CLAUSE_COPYIN + || code == OMP_CLAUSE_COPYPRIVATE + || code == OMP_CLAUSE_IF + || code == OMP_CLAUSE_NUM_THREADS + || code == OMP_CLAUSE_SCHEDULE + || code == OMP_CLAUSE_NOWAIT + || code == OMP_CLAUSE_ORDERED + || code == OMP_CLAUSE_DEFAULT) TREE_CHAIN (*tp) = chain; /* For now, we don't update BLOCKs when we make copies. So, we diff --git a/gcc/tree-iterator.c b/gcc/tree-iterator.c index c4c3010..ad2b47e 100644 --- a/gcc/tree-iterator.c +++ b/gcc/tree-iterator.c @@ -40,6 +40,7 @@ alloc_stmt_list (void) if (list) { stmt_list_cache = TREE_CHAIN (list); + gcc_assert (stmt_list_cache != list); memset (list, 0, sizeof(struct tree_common)); TREE_SET_CODE (list, STATEMENT_LIST); } @@ -54,6 +55,9 @@ free_stmt_list (tree t) { gcc_assert (!STATEMENT_LIST_HEAD (t)); gcc_assert (!STATEMENT_LIST_TAIL (t)); + /* If this triggers, it's a sign that the same list is being freed + twice. */ + gcc_assert (t != stmt_list_cache || stmt_list_cache == NULL); TREE_CHAIN (t) = stmt_list_cache; stmt_list_cache = t; } diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h index 90327ba..82e8c10 100644 --- a/gcc/tree-pass.h +++ b/gcc/tree-pass.h @@ -263,6 +263,7 @@ extern struct tree_opt_pass pass_lower_complex; extern struct tree_opt_pass pass_lower_vector; extern struct tree_opt_pass pass_lower_vector_ssa; extern struct tree_opt_pass pass_lower_omp; +extern struct tree_opt_pass pass_expand_omp; extern struct tree_opt_pass pass_object_sizes; extern struct tree_opt_pass pass_fold_builtins; extern struct tree_opt_pass pass_stdarg; diff --git a/gcc/tree-pretty-print.c b/gcc/tree-pretty-print.c index 4e50b8d..b23416d 100644 --- a/gcc/tree-pretty-print.c +++ b/gcc/tree-pretty-print.c @@ -1702,6 +1702,21 @@ dump_generic_node (pretty_printer *buffer, tree node, int spc, int flags, case OMP_PARALLEL: pp_string (buffer, "#pragma omp parallel"); dump_omp_clauses (buffer, OMP_PARALLEL_CLAUSES (node), spc, flags); + if (OMP_PARALLEL_FN (node)) + { + pp_string (buffer, " [child fn: "); + dump_generic_node (buffer, OMP_PARALLEL_FN (node), spc, flags, false); + + pp_string (buffer, " ("); + + if (OMP_PARALLEL_DATA_ARG (node)) + dump_generic_node (buffer, OMP_PARALLEL_DATA_ARG (node), spc, flags, + false); + else + pp_string (buffer, "???"); + + pp_string (buffer, ")]"); + } dump_omp_body: if (!(flags & TDF_SLIM) && OMP_BODY (node)) @@ -1803,6 +1818,11 @@ dump_generic_node (pretty_printer *buffer, tree node, int spc, int flags, dump_omp_clauses (buffer, OMP_SINGLE_CLAUSES (node), spc, flags); goto dump_omp_body; + case OMP_RETURN_EXPR: + pp_string (buffer, "OMP_RETURN"); + is_expr = false; + break; + case REDUC_MAX_EXPR: pp_string (buffer, " REDUC_MAX_EXPR < "); dump_generic_node (buffer, TREE_OPERAND (node, 0), spc, flags, false); diff --git a/gcc/tree-ssa-operands.c b/gcc/tree-ssa-operands.c index e3b95e7..fbee0b9 100644 --- a/gcc/tree-ssa-operands.c +++ b/gcc/tree-ssa-operands.c @@ -1288,6 +1288,14 @@ get_expr_operands (tree stmt, tree *expr_p, int flags) case FILTER_EXPR: case LABEL_DECL: case CONST_DECL: + case OMP_PARALLEL: + case OMP_SECTIONS: + case OMP_FOR: + case OMP_RETURN_EXPR: + case OMP_SINGLE: + case OMP_MASTER: + case OMP_ORDERED: + case OMP_CRITICAL: /* Expressions that make no memory references. */ return; diff --git a/gcc/tree.def b/gcc/tree.def index 3cd03fd..f99b247 100644 --- a/gcc/tree.def +++ b/gcc/tree.def @@ -65,7 +65,6 @@ DEFTREECODE (TREE_VEC, "tree_vec", tcc_exceptional, 0) For a block which represents the outermost scope of a function, it points to the FUNCTION_DECL node. BLOCK_VARS points to a chain of decl nodes. - BLOCK_TYPE_TAGS points to a chain of types which have their own names. BLOCK_CHAIN points to the next BLOCK at the same level. BLOCK_ABSTRACT_ORIGIN points to the original (abstract) tree node which this block is an instance of, or else is NULL to indicate that this @@ -957,8 +956,15 @@ DEFTREECODE (TARGET_MEM_REF, "target_mem_ref", tcc_reference, 7) exposed to TREE_RANGE_CHECK. */ /* OpenMP - #pragma omp parallel [clause1 ... clauseN] Operand 0: OMP_PARALLEL_BODY: Code to be executed by all threads. - Operand 1: OMP_PARALLEL_CLAUSES: List of clauses. */ -DEFTREECODE (OMP_PARALLEL, "omp_parallel", tcc_statement, 2) + Operand 1: OMP_PARALLEL_CLAUSES: List of clauses. + Operand 2: OMP_PARALLEL_FN: FUNCTION_DECL used when outlining the + body of the parallel region. Only valid after + pass_lower_omp. + Operand 3: OMP_PARALLEL_DATA_ARG: Local variable in the parent + function containing data to be shared with the child + function. */ + +DEFTREECODE (OMP_PARALLEL, "omp_parallel", tcc_statement, 4) /* OpenMP - #pragma omp for [clause1 ... clauseN] Operand 0: OMP_FOR_BODY: Loop body. @@ -983,8 +989,11 @@ DEFTREECODE (OMP_FOR, "omp_for", tcc_statement, 6) /* OpenMP - #pragma omp sections [clause1 ... clauseN] Operand 0: OMP_SECTIONS_BODY: Sections body. - Operand 1: OMP_SECTIONS_CLAUSES: List of clauses. */ -DEFTREECODE (OMP_SECTIONS, "omp_sections", tcc_statement, 2) + Operand 1: OMP_SECTIONS_CLAUSES: List of clauses. + Operand 2: OMP_SECTIONS_SECTIONS: Vector of the different sections + in the body. Only valid after lowering and destroyed + after the CFG has been built. */ +DEFTREECODE (OMP_SECTIONS, "omp_sections", tcc_statement, 3) /* OpenMP - #pragma omp single Operand 0: OMP_SINGLE_BODY: Single section body. @@ -1063,6 +1072,9 @@ DEFTREECODE (OMP_CLAUSE_ORDERED, "ordered", tcc_expression, 0) /* OpenMP clause: default. */ DEFTREECODE (OMP_CLAUSE_DEFAULT, "default", tcc_expression, 0) +/* Return from an OpenMP directive. */ +DEFTREECODE (OMP_RETURN_EXPR, "omp_return", tcc_statement, 0) + /* Reduction operations. Operations that take a vector of elements and "reduce" it to a scalar result (e.g. summing the elements of the vector, finding the minimum over @@ -166,6 +166,19 @@ extern const enum tree_code_class tree_code_type[]; #define EXPR_P(NODE) IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (TREE_CODE (NODE))) +/* Returns nonzero iff NODE is an OpenMP directive. */ + +#define OMP_DIRECTIVE_P(NODE) \ + (TREE_CODE (NODE) == OMP_PARALLEL \ + || TREE_CODE (NODE) == OMP_SECTIONS \ + || TREE_CODE (NODE) == OMP_SECTION \ + || TREE_CODE (NODE) == OMP_FOR \ + || TREE_CODE (NODE) == OMP_RETURN_EXPR \ + || TREE_CODE (NODE) == OMP_SINGLE \ + || TREE_CODE (NODE) == OMP_MASTER \ + || TREE_CODE (NODE) == OMP_ORDERED \ + || TREE_CODE (NODE) == OMP_CRITICAL) + /* Number of argument-words in each kind of tree-node. */ extern const unsigned char tree_code_length[]; @@ -1424,6 +1437,8 @@ struct tree_constructor GTY(()) #define OMP_PARALLEL_BODY(NODE) TREE_OPERAND (OMP_PARALLEL_CHECK (NODE), 0) #define OMP_PARALLEL_CLAUSES(NODE) TREE_OPERAND (OMP_PARALLEL_CHECK (NODE), 1) +#define OMP_PARALLEL_FN(NODE) TREE_OPERAND (OMP_PARALLEL_CHECK (NODE), 2) +#define OMP_PARALLEL_DATA_ARG(NODE) TREE_OPERAND (OMP_PARALLEL_CHECK (NODE), 3) #define OMP_FOR_BODY(NODE) TREE_OPERAND (OMP_FOR_CHECK (NODE), 0) #define OMP_FOR_CLAUSES(NODE) TREE_OPERAND (OMP_FOR_CHECK (NODE), 1) @@ -1434,6 +1449,7 @@ struct tree_constructor GTY(()) #define OMP_SECTIONS_BODY(NODE) TREE_OPERAND (OMP_SECTIONS_CHECK (NODE), 0) #define OMP_SECTIONS_CLAUSES(NODE) TREE_OPERAND (OMP_SECTIONS_CHECK (NODE), 1) +#define OMP_SECTIONS_SECTIONS(NODE) TREE_OPERAND (OMP_SECTIONS_CHECK (NODE), 2) #define OMP_SECTION_BODY(NODE) TREE_OPERAND (OMP_SECTION_CHECK (NODE), 0) |