diff options
Diffstat (limited to 'gcc/omp-expand.c')
-rw-r--r-- | gcc/omp-expand.c | 8195 |
1 files changed, 8195 insertions, 0 deletions
diff --git a/gcc/omp-expand.c b/gcc/omp-expand.c new file mode 100644 index 0000000..a953c8b --- /dev/null +++ b/gcc/omp-expand.c @@ -0,0 +1,8195 @@ +/* Expansion pass for OMP directives. Outlines regions of certain OMP + directives to separate functions, converts others into explicit calls to the + runtime library (libgomp) and so forth + +Copyright (C) 2005-2016 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "memmodel.h" +#include "backend.h" +#include "target.h" +#include "rtl.h" +#include "tree.h" +#include "gimple.h" +#include "cfghooks.h" +#include "tree-pass.h" +#include "ssa.h" +#include "optabs.h" +#include "cgraph.h" +#include "pretty-print.h" +#include "diagnostic-core.h" +#include "fold-const.h" +#include "stor-layout.h" +#include "cfganal.h" +#include "internal-fn.h" +#include "gimplify.h" +#include "gimple-iterator.h" +#include "gimplify-me.h" +#include "gimple-walk.h" +#include "tree-cfg.h" +#include "tree-into-ssa.h" +#include "tree-ssa.h" +#include "splay-tree.h" +#include "cfgloop.h" +#include "omp-general.h" +#include "omp-offload.h" +#include "tree-cfgcleanup.h" +#include "symbol-summary.h" +#include "cilk.h" +#include "gomp-constants.h" +#include "gimple-pretty-print.h" +#include "hsa.h" + + +/* OMP region information. Every parallel and workshare + directive is enclosed between two markers, the OMP_* directive + and a corresponding GIMPLE_OMP_RETURN statement. */ + +struct omp_region +{ + /* The enclosing region. */ + struct omp_region *outer; + + /* First child region. */ + struct omp_region *inner; + + /* Next peer region. */ + struct omp_region *next; + + /* Block containing the omp directive as its last stmt. */ + basic_block entry; + + /* Block containing the GIMPLE_OMP_RETURN as its last stmt. */ + basic_block exit; + + /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt. */ + basic_block cont; + + /* If this is a combined parallel+workshare region, this is a list + of additional arguments needed by the combined parallel+workshare + library call. */ + vec<tree, va_gc> *ws_args; + + /* The code for the omp directive of this region. */ + enum gimple_code type; + + /* Schedule kind, only used for GIMPLE_OMP_FOR type regions. */ + enum omp_clause_schedule_kind sched_kind; + + /* Schedule modifiers. */ + unsigned char sched_modifiers; + + /* True if this is a combined parallel+workshare region. */ + bool is_combined_parallel; + + /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has + a depend clause. */ + gomp_ordered *ord_stmt; +}; + +static struct omp_region *root_omp_region; +static bool omp_any_child_fn_dumped; + +static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree, + bool = false); +static gphi *find_phi_with_arg_on_edge (tree, edge); +static void expand_omp (struct omp_region *region); + +/* Return true if REGION is a combined parallel+workshare region. */ + +static inline bool +is_combined_parallel (struct omp_region *region) +{ + return region->is_combined_parallel; +} + +/* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB + is the immediate dominator of PAR_ENTRY_BB, return true if there + are no data dependencies that would prevent expanding the parallel + directive at PAR_ENTRY_BB as a combined parallel+workshare region. + + When expanding a combined parallel+workshare region, the call to + the child function may need additional arguments in the case of + GIMPLE_OMP_FOR regions. In some cases, these arguments are + computed out of variables passed in from the parent to the child + via 'struct .omp_data_s'. For instance: + + #pragma omp parallel for schedule (guided, i * 4) + for (j ...) + + Is lowered into: + + # BLOCK 2 (PAR_ENTRY_BB) + .omp_data_o.i = i; + #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598) + + # BLOCK 3 (WS_ENTRY_BB) + .omp_data_i = &.omp_data_o; + D.1667 = .omp_data_i->i; + D.1598 = D.1667 * 4; + #pragma omp for schedule (guided, D.1598) + + When we outline the parallel region, the call to the child function + 'bar.omp_fn.0' will need the value D.1598 in its argument list, but + that value is computed *after* the call site. So, in principle we + cannot do the transformation. + + To see whether the code in WS_ENTRY_BB blocks the combined + parallel+workshare call, we collect all the variables used in the + GIMPLE_OMP_FOR header check whether they appear on the LHS of any + statement in WS_ENTRY_BB. If so, then we cannot emit the combined + call. + + FIXME. If we had the SSA form built at this point, we could merely + hoist the code in block 3 into block 2 and be done with it. But at + this point we don't have dataflow information and though we could + hack something up here, it is really not worth the aggravation. */ + +static bool +workshare_safe_to_combine_p (basic_block ws_entry_bb) +{ + struct omp_for_data fd; + gimple *ws_stmt = last_stmt (ws_entry_bb); + + if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS) + return true; + + gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR); + + omp_extract_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL); + + if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST) + return false; + if (fd.iter_type != long_integer_type_node) + return false; + + /* FIXME. We give up too easily here. If any of these arguments + are not constants, they will likely involve variables that have + been mapped into fields of .omp_data_s for sharing with the child + function. With appropriate data flow, it would be possible to + see through this. */ + if (!is_gimple_min_invariant (fd.loop.n1) + || !is_gimple_min_invariant (fd.loop.n2) + || !is_gimple_min_invariant (fd.loop.step) + || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size))) + return false; + + return true; +} + +/* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier + presence (SIMD_SCHEDULE). */ + +static tree +omp_adjust_chunk_size (tree chunk_size, bool simd_schedule) +{ + if (!simd_schedule) + return chunk_size; + + int vf = omp_max_vf (); + if (vf == 1) + return chunk_size; + + tree type = TREE_TYPE (chunk_size); + chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size, + build_int_cst (type, vf - 1)); + return fold_build2 (BIT_AND_EXPR, type, chunk_size, + build_int_cst (type, -vf)); +} + +/* Collect additional arguments needed to emit a combined + parallel+workshare call. WS_STMT is the workshare directive being + expanded. */ + +static vec<tree, va_gc> * +get_ws_args_for (gimple *par_stmt, gimple *ws_stmt) +{ + tree t; + location_t loc = gimple_location (ws_stmt); + vec<tree, va_gc> *ws_args; + + if (gomp_for *for_stmt = dyn_cast <gomp_for *> (ws_stmt)) + { + struct omp_for_data fd; + tree n1, n2; + + omp_extract_for_data (for_stmt, &fd, NULL); + n1 = fd.loop.n1; + n2 = fd.loop.n2; + + if (gimple_omp_for_combined_into_p (for_stmt)) + { + tree innerc + = omp_find_clause (gimple_omp_parallel_clauses (par_stmt), + OMP_CLAUSE__LOOPTEMP_); + gcc_assert (innerc); + n1 = OMP_CLAUSE_DECL (innerc); + innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), + OMP_CLAUSE__LOOPTEMP_); + gcc_assert (innerc); + n2 = OMP_CLAUSE_DECL (innerc); + } + + vec_alloc (ws_args, 3 + (fd.chunk_size != 0)); + + t = fold_convert_loc (loc, long_integer_type_node, n1); + ws_args->quick_push (t); + + t = fold_convert_loc (loc, long_integer_type_node, n2); + ws_args->quick_push (t); + + t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step); + ws_args->quick_push (t); + + if (fd.chunk_size) + { + t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size); + t = omp_adjust_chunk_size (t, fd.simd_schedule); + ws_args->quick_push (t); + } + + return ws_args; + } + else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS) + { + /* Number of sections is equal to the number of edges from the + GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to + the exit of the sections region. */ + basic_block bb = single_succ (gimple_bb (ws_stmt)); + t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1); + vec_alloc (ws_args, 1); + ws_args->quick_push (t); + return ws_args; + } + + gcc_unreachable (); +} + +/* Discover whether REGION is a combined parallel+workshare region. */ + +static void +determine_parallel_type (struct omp_region *region) +{ + basic_block par_entry_bb, par_exit_bb; + basic_block ws_entry_bb, ws_exit_bb; + + if (region == NULL || region->inner == NULL + || region->exit == NULL || region->inner->exit == NULL + || region->inner->cont == NULL) + return; + + /* We only support parallel+for and parallel+sections. */ + if (region->type != GIMPLE_OMP_PARALLEL + || (region->inner->type != GIMPLE_OMP_FOR + && region->inner->type != GIMPLE_OMP_SECTIONS)) + return; + + /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and + WS_EXIT_BB -> PAR_EXIT_BB. */ + par_entry_bb = region->entry; + par_exit_bb = region->exit; + ws_entry_bb = region->inner->entry; + ws_exit_bb = region->inner->exit; + + if (single_succ (par_entry_bb) == ws_entry_bb + && single_succ (ws_exit_bb) == par_exit_bb + && workshare_safe_to_combine_p (ws_entry_bb) + && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb)) + || (last_and_only_stmt (ws_entry_bb) + && last_and_only_stmt (par_exit_bb)))) + { + gimple *par_stmt = last_stmt (par_entry_bb); + gimple *ws_stmt = last_stmt (ws_entry_bb); + + if (region->inner->type == GIMPLE_OMP_FOR) + { + /* If this is a combined parallel loop, we need to determine + whether or not to use the combined library calls. There + are two cases where we do not apply the transformation: + static loops and any kind of ordered loop. In the first + case, we already open code the loop so there is no need + to do anything else. In the latter case, the combined + parallel loop call would still need extra synchronization + to implement ordered semantics, so there would not be any + gain in using the combined call. */ + tree clauses = gimple_omp_for_clauses (ws_stmt); + tree c = omp_find_clause (clauses, OMP_CLAUSE_SCHEDULE); + if (c == NULL + || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK) + == OMP_CLAUSE_SCHEDULE_STATIC) + || omp_find_clause (clauses, OMP_CLAUSE_ORDERED)) + { + region->is_combined_parallel = false; + region->inner->is_combined_parallel = false; + return; + } + } + + region->is_combined_parallel = true; + region->inner->is_combined_parallel = true; + region->ws_args = get_ws_args_for (par_stmt, ws_stmt); + } +} + +/* Debugging dumps for parallel regions. */ +void dump_omp_region (FILE *, struct omp_region *, int); +void debug_omp_region (struct omp_region *); +void debug_all_omp_regions (void); + +/* Dump the parallel region tree rooted at REGION. */ + +void +dump_omp_region (FILE *file, struct omp_region *region, int indent) +{ + fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index, + gimple_code_name[region->type]); + + if (region->inner) + dump_omp_region (file, region->inner, indent + 4); + + if (region->cont) + { + fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "", + region->cont->index); + } + + if (region->exit) + fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "", + region->exit->index); + else + fprintf (file, "%*s[no exit marker]\n", indent, ""); + + if (region->next) + dump_omp_region (file, region->next, indent); +} + +DEBUG_FUNCTION void +debug_omp_region (struct omp_region *region) +{ + dump_omp_region (stderr, region, 0); +} + +DEBUG_FUNCTION void +debug_all_omp_regions (void) +{ + dump_omp_region (stderr, root_omp_region, 0); +} + +/* Create a new parallel region starting at STMT inside region PARENT. */ + +static struct omp_region * +new_omp_region (basic_block bb, enum gimple_code type, + struct omp_region *parent) +{ + struct omp_region *region = XCNEW (struct omp_region); + + region->outer = parent; + region->entry = bb; + region->type = type; + + if (parent) + { + /* This is a nested region. Add it to the list of inner + regions in PARENT. */ + region->next = parent->inner; + parent->inner = region; + } + else + { + /* This is a toplevel region. Add it to the list of toplevel + regions in ROOT_OMP_REGION. */ + region->next = root_omp_region; + root_omp_region = region; + } + + return region; +} + +/* Release the memory associated with the region tree rooted at REGION. */ + +static void +free_omp_region_1 (struct omp_region *region) +{ + struct omp_region *i, *n; + + for (i = region->inner; i ; i = n) + { + n = i->next; + free_omp_region_1 (i); + } + + free (region); +} + +/* Release the memory for the entire omp region tree. */ + +void +omp_free_regions (void) +{ + struct omp_region *r, *n; + for (r = root_omp_region; r ; r = n) + { + n = r->next; + free_omp_region_1 (r); + } + root_omp_region = NULL; +} + +/* A convenience function to build an empty GIMPLE_COND with just the + condition. */ + +static gcond * +gimple_build_cond_empty (tree cond) +{ + enum tree_code pred_code; + tree lhs, rhs; + + gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs); + return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE); +} + +/* Return true if a parallel REGION is within a declare target function or + within a target region and is not a part of a gridified target. */ + +static bool +parallel_needs_hsa_kernel_p (struct omp_region *region) +{ + bool indirect = false; + for (region = region->outer; region; region = region->outer) + { + if (region->type == GIMPLE_OMP_PARALLEL) + indirect = true; + else if (region->type == GIMPLE_OMP_TARGET) + { + gomp_target *tgt_stmt + = as_a <gomp_target *> (last_stmt (region->entry)); + + if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt), + OMP_CLAUSE__GRIDDIM_)) + return indirect; + else + return true; + } + } + + if (lookup_attribute ("omp declare target", + DECL_ATTRIBUTES (current_function_decl))) + return true; + + return false; +} + +/* Build the function calls to GOMP_parallel_start etc to actually + generate the parallel operation. REGION is the parallel region + being expanded. BB is the block where to insert the code. WS_ARGS + will be set if this is a call to a combined parallel+workshare + construct, it contains the list of additional arguments needed by + the workshare construct. */ + +static void +expand_parallel_call (struct omp_region *region, basic_block bb, + gomp_parallel *entry_stmt, + vec<tree, va_gc> *ws_args) +{ + tree t, t1, t2, val, cond, c, clauses, flags; + gimple_stmt_iterator gsi; + gimple *stmt; + enum built_in_function start_ix; + int start_ix2; + location_t clause_loc; + vec<tree, va_gc> *args; + + clauses = gimple_omp_parallel_clauses (entry_stmt); + + /* Determine what flavor of GOMP_parallel we will be + emitting. */ + start_ix = BUILT_IN_GOMP_PARALLEL; + if (is_combined_parallel (region)) + { + switch (region->inner->type) + { + case GIMPLE_OMP_FOR: + gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO); + switch (region->inner->sched_kind) + { + case OMP_CLAUSE_SCHEDULE_RUNTIME: + start_ix2 = 3; + break; + case OMP_CLAUSE_SCHEDULE_DYNAMIC: + case OMP_CLAUSE_SCHEDULE_GUIDED: + if (region->inner->sched_modifiers + & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) + { + start_ix2 = 3 + region->inner->sched_kind; + break; + } + /* FALLTHRU */ + default: + start_ix2 = region->inner->sched_kind; + break; + } + start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC; + start_ix = (enum built_in_function) start_ix2; + break; + case GIMPLE_OMP_SECTIONS: + start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS; + break; + default: + gcc_unreachable (); + } + } + + /* By default, the value of NUM_THREADS is zero (selected at run time) + and there is no conditional. */ + cond = NULL_TREE; + val = build_int_cst (unsigned_type_node, 0); + flags = build_int_cst (unsigned_type_node, 0); + + c = omp_find_clause (clauses, OMP_CLAUSE_IF); + if (c) + cond = OMP_CLAUSE_IF_EXPR (c); + + c = omp_find_clause (clauses, OMP_CLAUSE_NUM_THREADS); + if (c) + { + val = OMP_CLAUSE_NUM_THREADS_EXPR (c); + clause_loc = OMP_CLAUSE_LOCATION (c); + } + else + clause_loc = gimple_location (entry_stmt); + + c = omp_find_clause (clauses, OMP_CLAUSE_PROC_BIND); + if (c) + flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c)); + + /* Ensure 'val' is of the correct type. */ + val = fold_convert_loc (clause_loc, unsigned_type_node, val); + + /* If we found the clause 'if (cond)', build either + (cond != 0) or (cond ? val : 1u). */ + if (cond) + { + cond = gimple_boolify (cond); + + if (integer_zerop (val)) + val = fold_build2_loc (clause_loc, + EQ_EXPR, unsigned_type_node, cond, + build_int_cst (TREE_TYPE (cond), 0)); + else + { + basic_block cond_bb, then_bb, else_bb; + edge e, e_then, e_else; + tree tmp_then, tmp_else, tmp_join, tmp_var; + + tmp_var = create_tmp_var (TREE_TYPE (val)); + if (gimple_in_ssa_p (cfun)) + { + tmp_then = make_ssa_name (tmp_var); + tmp_else = make_ssa_name (tmp_var); + tmp_join = make_ssa_name (tmp_var); + } + else + { + tmp_then = tmp_var; + tmp_else = tmp_var; + tmp_join = tmp_var; + } + + e = split_block_after_labels (bb); + cond_bb = e->src; + bb = e->dest; + remove_edge (e); + + then_bb = create_empty_bb (cond_bb); + else_bb = create_empty_bb (then_bb); + set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb); + set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb); + + stmt = gimple_build_cond_empty (cond); + gsi = gsi_start_bb (cond_bb); + gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); + + gsi = gsi_start_bb (then_bb); + expand_omp_build_assign (&gsi, tmp_then, val, true); + + gsi = gsi_start_bb (else_bb); + expand_omp_build_assign (&gsi, tmp_else, + build_int_cst (unsigned_type_node, 1), + true); + + make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE); + make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE); + add_bb_to_loop (then_bb, cond_bb->loop_father); + add_bb_to_loop (else_bb, cond_bb->loop_father); + e_then = make_edge (then_bb, bb, EDGE_FALLTHRU); + e_else = make_edge (else_bb, bb, EDGE_FALLTHRU); + + if (gimple_in_ssa_p (cfun)) + { + gphi *phi = create_phi_node (tmp_join, bb); + add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION); + add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION); + } + + val = tmp_join; + } + + gsi = gsi_start_bb (bb); + val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE, + false, GSI_CONTINUE_LINKING); + } + + gsi = gsi_last_bb (bb); + t = gimple_omp_parallel_data_arg (entry_stmt); + if (t == NULL) + t1 = null_pointer_node; + else + t1 = build_fold_addr_expr (t); + tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt); + t2 = build_fold_addr_expr (child_fndecl); + + vec_alloc (args, 4 + vec_safe_length (ws_args)); + args->quick_push (t2); + args->quick_push (t1); + args->quick_push (val); + if (ws_args) + args->splice (*ws_args); + args->quick_push (flags); + + t = build_call_expr_loc_vec (UNKNOWN_LOCATION, + builtin_decl_explicit (start_ix), args); + + force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, + false, GSI_CONTINUE_LINKING); + + if (hsa_gen_requested_p () + && parallel_needs_hsa_kernel_p (region)) + { + cgraph_node *child_cnode = cgraph_node::get (child_fndecl); + hsa_register_kernel (child_cnode); + } +} + +/* Insert a function call whose name is FUNC_NAME with the information from + ENTRY_STMT into the basic_block BB. */ + +static void +expand_cilk_for_call (basic_block bb, gomp_parallel *entry_stmt, + vec <tree, va_gc> *ws_args) +{ + tree t, t1, t2; + gimple_stmt_iterator gsi; + vec <tree, va_gc> *args; + + gcc_assert (vec_safe_length (ws_args) == 2); + tree func_name = (*ws_args)[0]; + tree grain = (*ws_args)[1]; + + tree clauses = gimple_omp_parallel_clauses (entry_stmt); + tree count = omp_find_clause (clauses, OMP_CLAUSE__CILK_FOR_COUNT_); + gcc_assert (count != NULL_TREE); + count = OMP_CLAUSE_OPERAND (count, 0); + + gsi = gsi_last_bb (bb); + t = gimple_omp_parallel_data_arg (entry_stmt); + if (t == NULL) + t1 = null_pointer_node; + else + t1 = build_fold_addr_expr (t); + t2 = build_fold_addr_expr (gimple_omp_parallel_child_fn (entry_stmt)); + + vec_alloc (args, 4); + args->quick_push (t2); + args->quick_push (t1); + args->quick_push (count); + args->quick_push (grain); + t = build_call_expr_loc_vec (UNKNOWN_LOCATION, func_name, args); + + force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, false, + GSI_CONTINUE_LINKING); +} + +/* Build the function call to GOMP_task to actually + generate the task operation. BB is the block where to insert the code. */ + +static void +expand_task_call (struct omp_region *region, basic_block bb, + gomp_task *entry_stmt) +{ + tree t1, t2, t3; + gimple_stmt_iterator gsi; + location_t loc = gimple_location (entry_stmt); + + tree clauses = gimple_omp_task_clauses (entry_stmt); + + tree ifc = omp_find_clause (clauses, OMP_CLAUSE_IF); + tree untied = omp_find_clause (clauses, OMP_CLAUSE_UNTIED); + tree mergeable = omp_find_clause (clauses, OMP_CLAUSE_MERGEABLE); + tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND); + tree finalc = omp_find_clause (clauses, OMP_CLAUSE_FINAL); + tree priority = omp_find_clause (clauses, OMP_CLAUSE_PRIORITY); + + unsigned int iflags + = (untied ? GOMP_TASK_FLAG_UNTIED : 0) + | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0) + | (depend ? GOMP_TASK_FLAG_DEPEND : 0); + + bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt); + tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE; + tree num_tasks = NULL_TREE; + bool ull = false; + if (taskloop_p) + { + gimple *g = last_stmt (region->outer->entry); + gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR + && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP); + struct omp_for_data fd; + omp_extract_for_data (as_a <gomp_for *> (g), &fd, NULL); + startvar = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_); + endvar = omp_find_clause (OMP_CLAUSE_CHAIN (startvar), + OMP_CLAUSE__LOOPTEMP_); + startvar = OMP_CLAUSE_DECL (startvar); + endvar = OMP_CLAUSE_DECL (endvar); + step = fold_convert_loc (loc, fd.iter_type, fd.loop.step); + if (fd.loop.cond_code == LT_EXPR) + iflags |= GOMP_TASK_FLAG_UP; + tree tclauses = gimple_omp_for_clauses (g); + num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_NUM_TASKS); + if (num_tasks) + num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks); + else + { + num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_GRAINSIZE); + if (num_tasks) + { + iflags |= GOMP_TASK_FLAG_GRAINSIZE; + num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks); + } + else + num_tasks = integer_zero_node; + } + num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks); + if (ifc == NULL_TREE) + iflags |= GOMP_TASK_FLAG_IF; + if (omp_find_clause (tclauses, OMP_CLAUSE_NOGROUP)) + iflags |= GOMP_TASK_FLAG_NOGROUP; + ull = fd.iter_type == long_long_unsigned_type_node; + } + else if (priority) + iflags |= GOMP_TASK_FLAG_PRIORITY; + + tree flags = build_int_cst (unsigned_type_node, iflags); + + tree cond = boolean_true_node; + if (ifc) + { + if (taskloop_p) + { + tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc)); + t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t, + build_int_cst (unsigned_type_node, + GOMP_TASK_FLAG_IF), + build_int_cst (unsigned_type_node, 0)); + flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, + flags, t); + } + else + cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc)); + } + + if (finalc) + { + tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc)); + t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t, + build_int_cst (unsigned_type_node, + GOMP_TASK_FLAG_FINAL), + build_int_cst (unsigned_type_node, 0)); + flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t); + } + if (depend) + depend = OMP_CLAUSE_DECL (depend); + else + depend = build_int_cst (ptr_type_node, 0); + if (priority) + priority = fold_convert (integer_type_node, + OMP_CLAUSE_PRIORITY_EXPR (priority)); + else + priority = integer_zero_node; + + gsi = gsi_last_bb (bb); + tree t = gimple_omp_task_data_arg (entry_stmt); + if (t == NULL) + t2 = null_pointer_node; + else + t2 = build_fold_addr_expr_loc (loc, t); + t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt)); + t = gimple_omp_task_copy_fn (entry_stmt); + if (t == NULL) + t3 = null_pointer_node; + else + t3 = build_fold_addr_expr_loc (loc, t); + + if (taskloop_p) + t = build_call_expr (ull + ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL) + : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP), + 11, t1, t2, t3, + gimple_omp_task_arg_size (entry_stmt), + gimple_omp_task_arg_align (entry_stmt), flags, + num_tasks, priority, startvar, endvar, step); + else + t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK), + 9, t1, t2, t3, + gimple_omp_task_arg_size (entry_stmt), + gimple_omp_task_arg_align (entry_stmt), cond, flags, + depend, priority); + + force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, + false, GSI_CONTINUE_LINKING); +} + +/* Chain all the DECLs in LIST by their TREE_CHAIN fields. */ + +static tree +vec2chain (vec<tree, va_gc> *v) +{ + tree chain = NULL_TREE, t; + unsigned ix; + + FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t) + { + DECL_CHAIN (t) = chain; + chain = t; + } + + return chain; +} + +/* Remove barriers in REGION->EXIT's block. Note that this is only + valid for GIMPLE_OMP_PARALLEL regions. Since the end of a parallel region + is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that + left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be + removed. */ + +static void +remove_exit_barrier (struct omp_region *region) +{ + gimple_stmt_iterator gsi; + basic_block exit_bb; + edge_iterator ei; + edge e; + gimple *stmt; + int any_addressable_vars = -1; + + exit_bb = region->exit; + + /* If the parallel region doesn't return, we don't have REGION->EXIT + block at all. */ + if (! exit_bb) + return; + + /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN. The + workshare's GIMPLE_OMP_RETURN will be in a preceding block. The kinds of + statements that can appear in between are extremely limited -- no + memory operations at all. Here, we allow nothing at all, so the + only thing we allow to precede this GIMPLE_OMP_RETURN is a label. */ + gsi = gsi_last_bb (exit_bb); + gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN); + gsi_prev (&gsi); + if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL) + return; + + FOR_EACH_EDGE (e, ei, exit_bb->preds) + { + gsi = gsi_last_bb (e->src); + if (gsi_end_p (gsi)) + continue; + stmt = gsi_stmt (gsi); + if (gimple_code (stmt) == GIMPLE_OMP_RETURN + && !gimple_omp_return_nowait_p (stmt)) + { + /* OpenMP 3.0 tasks unfortunately prevent this optimization + in many cases. If there could be tasks queued, the barrier + might be needed to let the tasks run before some local + variable of the parallel that the task uses as shared + runs out of scope. The task can be spawned either + from within current function (this would be easy to check) + or from some function it calls and gets passed an address + of such a variable. */ + if (any_addressable_vars < 0) + { + gomp_parallel *parallel_stmt + = as_a <gomp_parallel *> (last_stmt (region->entry)); + tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt); + tree local_decls, block, decl; + unsigned ix; + + any_addressable_vars = 0; + FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl) + if (TREE_ADDRESSABLE (decl)) + { + any_addressable_vars = 1; + break; + } + for (block = gimple_block (stmt); + !any_addressable_vars + && block + && TREE_CODE (block) == BLOCK; + block = BLOCK_SUPERCONTEXT (block)) + { + for (local_decls = BLOCK_VARS (block); + local_decls; + local_decls = DECL_CHAIN (local_decls)) + if (TREE_ADDRESSABLE (local_decls)) + { + any_addressable_vars = 1; + break; + } + if (block == gimple_block (parallel_stmt)) + break; + } + } + if (!any_addressable_vars) + gimple_omp_return_set_nowait (stmt); + } + } +} + +static void +remove_exit_barriers (struct omp_region *region) +{ + if (region->type == GIMPLE_OMP_PARALLEL) + remove_exit_barrier (region); + + if (region->inner) + { + region = region->inner; + remove_exit_barriers (region); + while (region->next) + { + region = region->next; + remove_exit_barriers (region); + } + } +} + +/* Optimize omp_get_thread_num () and omp_get_num_threads () + calls. These can't be declared as const functions, but + within one parallel body they are constant, so they can be + transformed there into __builtin_omp_get_{thread_num,num_threads} () + which are declared const. Similarly for task body, except + that in untied task omp_get_thread_num () can change at any task + scheduling point. */ + +static void +optimize_omp_library_calls (gimple *entry_stmt) +{ + basic_block bb; + gimple_stmt_iterator gsi; + tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM); + tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree); + tree num_thr_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS); + tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree); + bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK + && omp_find_clause (gimple_omp_task_clauses (entry_stmt), + OMP_CLAUSE_UNTIED) != NULL); + + FOR_EACH_BB_FN (bb, cfun) + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple *call = gsi_stmt (gsi); + tree decl; + + if (is_gimple_call (call) + && (decl = gimple_call_fndecl (call)) + && DECL_EXTERNAL (decl) + && TREE_PUBLIC (decl) + && DECL_INITIAL (decl) == NULL) + { + tree built_in; + + if (DECL_NAME (decl) == thr_num_id) + { + /* In #pragma omp task untied omp_get_thread_num () can change + during the execution of the task region. */ + if (untied_task) + continue; + built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM); + } + else if (DECL_NAME (decl) == num_thr_id) + built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS); + else + continue; + + if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in) + || gimple_call_num_args (call) != 0) + continue; + + if (flag_exceptions && !TREE_NOTHROW (decl)) + continue; + + if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE + || !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)), + TREE_TYPE (TREE_TYPE (built_in)))) + continue; + + gimple_call_set_fndecl (call, built_in); + } + } +} + +/* Callback for expand_omp_build_assign. Return non-NULL if *tp needs to be + regimplified. */ + +static tree +expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *) +{ + tree t = *tp; + + /* Any variable with DECL_VALUE_EXPR needs to be regimplified. */ + if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t)) + return t; + + if (TREE_CODE (t) == ADDR_EXPR) + recompute_tree_invariant_for_addr_expr (t); + + *walk_subtrees = !TYPE_P (t) && !DECL_P (t); + return NULL_TREE; +} + +/* Prepend or append TO = FROM assignment before or after *GSI_P. */ + +static void +expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from, + bool after) +{ + bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to); + from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE, + !after, after ? GSI_CONTINUE_LINKING + : GSI_SAME_STMT); + gimple *stmt = gimple_build_assign (to, from); + if (after) + gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING); + else + gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT); + if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL) + || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL)) + { + gimple_stmt_iterator gsi = gsi_for_stmt (stmt); + gimple_regimplify_operands (stmt, &gsi); + } +} + +/* Expand the OpenMP parallel or task directive starting at REGION. */ + +static void +expand_omp_taskreg (struct omp_region *region) +{ + basic_block entry_bb, exit_bb, new_bb; + struct function *child_cfun; + tree child_fn, block, t; + gimple_stmt_iterator gsi; + gimple *entry_stmt, *stmt; + edge e; + vec<tree, va_gc> *ws_args; + + entry_stmt = last_stmt (region->entry); + child_fn = gimple_omp_taskreg_child_fn (entry_stmt); + child_cfun = DECL_STRUCT_FUNCTION (child_fn); + + entry_bb = region->entry; + if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK) + exit_bb = region->cont; + else + exit_bb = region->exit; + + bool is_cilk_for + = (flag_cilkplus + && gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL + && omp_find_clause (gimple_omp_parallel_clauses (entry_stmt), + OMP_CLAUSE__CILK_FOR_COUNT_) != NULL_TREE); + + if (is_cilk_for) + /* If it is a _Cilk_for statement, it is modelled *like* a parallel for, + and the inner statement contains the name of the built-in function + and grain. */ + ws_args = region->inner->ws_args; + else if (is_combined_parallel (region)) + ws_args = region->ws_args; + else + ws_args = NULL; + + if (child_cfun->cfg) + { + /* Due to inlining, it may happen that we have already outlined + the region, in which case all we need to do is make the + sub-graph unreachable and emit the parallel call. */ + edge entry_succ_e, exit_succ_e; + + entry_succ_e = single_succ_edge (entry_bb); + + gsi = gsi_last_bb (entry_bb); + gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL + || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK); + gsi_remove (&gsi, true); + + new_bb = entry_bb; + if (exit_bb) + { + exit_succ_e = single_succ_edge (exit_bb); + make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU); + } + remove_edge_and_dominated_blocks (entry_succ_e); + } + else + { + unsigned srcidx, dstidx, num; + + /* If the parallel region needs data sent from the parent + function, then the very first statement (except possible + tree profile counter updates) of the parallel body + is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since + &.OMP_DATA_O is passed as an argument to the child function, + we need to replace it with the argument as seen by the child + function. + + In most cases, this will end up being the identity assignment + .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had + a function call that has been inlined, the original PARM_DECL + .OMP_DATA_I may have been converted into a different local + variable. In which case, we need to keep the assignment. */ + if (gimple_omp_taskreg_data_arg (entry_stmt)) + { + basic_block entry_succ_bb + = single_succ_p (entry_bb) ? single_succ (entry_bb) + : FALLTHRU_EDGE (entry_bb)->dest; + tree arg; + gimple *parcopy_stmt = NULL; + + for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi)) + { + gimple *stmt; + + gcc_assert (!gsi_end_p (gsi)); + stmt = gsi_stmt (gsi); + if (gimple_code (stmt) != GIMPLE_ASSIGN) + continue; + + if (gimple_num_ops (stmt) == 2) + { + tree arg = gimple_assign_rhs1 (stmt); + + /* We're ignore the subcode because we're + effectively doing a STRIP_NOPS. */ + + if (TREE_CODE (arg) == ADDR_EXPR + && TREE_OPERAND (arg, 0) + == gimple_omp_taskreg_data_arg (entry_stmt)) + { + parcopy_stmt = stmt; + break; + } + } + } + + gcc_assert (parcopy_stmt != NULL); + arg = DECL_ARGUMENTS (child_fn); + + if (!gimple_in_ssa_p (cfun)) + { + if (gimple_assign_lhs (parcopy_stmt) == arg) + gsi_remove (&gsi, true); + else + { + /* ?? Is setting the subcode really necessary ?? */ + gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg)); + gimple_assign_set_rhs1 (parcopy_stmt, arg); + } + } + else + { + tree lhs = gimple_assign_lhs (parcopy_stmt); + gcc_assert (SSA_NAME_VAR (lhs) == arg); + /* We'd like to set the rhs to the default def in the child_fn, + but it's too early to create ssa names in the child_fn. + Instead, we set the rhs to the parm. In + move_sese_region_to_fn, we introduce a default def for the + parm, map the parm to it's default def, and once we encounter + this stmt, replace the parm with the default def. */ + gimple_assign_set_rhs1 (parcopy_stmt, arg); + update_stmt (parcopy_stmt); + } + } + + /* Declare local variables needed in CHILD_CFUN. */ + block = DECL_INITIAL (child_fn); + BLOCK_VARS (block) = vec2chain (child_cfun->local_decls); + /* The gimplifier could record temporaries in parallel/task block + rather than in containing function's local_decls chain, + which would mean cgraph missed finalizing them. Do it now. */ + for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t)) + if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t)) + varpool_node::finalize_decl (t); + DECL_SAVED_TREE (child_fn) = NULL; + /* We'll create a CFG for child_fn, so no gimple body is needed. */ + gimple_set_body (child_fn, NULL); + TREE_USED (block) = 1; + + /* Reset DECL_CONTEXT on function arguments. */ + for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t)) + DECL_CONTEXT (t) = child_fn; + + /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK, + so that it can be moved to the child function. */ + gsi = gsi_last_bb (entry_bb); + stmt = gsi_stmt (gsi); + gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL + || gimple_code (stmt) == GIMPLE_OMP_TASK)); + e = split_block (entry_bb, stmt); + gsi_remove (&gsi, true); + entry_bb = e->dest; + edge e2 = NULL; + if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL) + single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU; + else + { + e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL); + gcc_assert (e2->dest == region->exit); + remove_edge (BRANCH_EDGE (entry_bb)); + set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src); + gsi = gsi_last_bb (region->exit); + gcc_assert (!gsi_end_p (gsi) + && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN); + gsi_remove (&gsi, true); + } + + /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR. */ + if (exit_bb) + { + gsi = gsi_last_bb (exit_bb); + gcc_assert (!gsi_end_p (gsi) + && (gimple_code (gsi_stmt (gsi)) + == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN))); + stmt = gimple_build_return (NULL); + gsi_insert_after (&gsi, stmt, GSI_SAME_STMT); + gsi_remove (&gsi, true); + } + + /* Move the parallel region into CHILD_CFUN. */ + + if (gimple_in_ssa_p (cfun)) + { + init_tree_ssa (child_cfun); + init_ssa_operands (child_cfun); + child_cfun->gimple_df->in_ssa_p = true; + block = NULL_TREE; + } + else + block = gimple_block (entry_stmt); + + new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block); + if (exit_bb) + single_succ_edge (new_bb)->flags = EDGE_FALLTHRU; + if (e2) + { + basic_block dest_bb = e2->dest; + if (!exit_bb) + make_edge (new_bb, dest_bb, EDGE_FALLTHRU); + remove_edge (e2); + set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb); + } + /* When the OMP expansion process cannot guarantee an up-to-date + loop tree arrange for the child function to fixup loops. */ + if (loops_state_satisfies_p (LOOPS_NEED_FIXUP)) + child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP; + + /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */ + num = vec_safe_length (child_cfun->local_decls); + for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++) + { + t = (*child_cfun->local_decls)[srcidx]; + if (DECL_CONTEXT (t) == cfun->decl) + continue; + if (srcidx != dstidx) + (*child_cfun->local_decls)[dstidx] = t; + dstidx++; + } + if (dstidx != num) + vec_safe_truncate (child_cfun->local_decls, dstidx); + + /* Inform the callgraph about the new function. */ + child_cfun->curr_properties = cfun->curr_properties; + child_cfun->has_simduid_loops |= cfun->has_simduid_loops; + child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops; + cgraph_node *node = cgraph_node::get_create (child_fn); + node->parallelized_function = 1; + cgraph_node::add_new_function (child_fn, true); + + bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl) + && !DECL_ASSEMBLER_NAME_SET_P (child_fn); + + /* Fix the callgraph edges for child_cfun. Those for cfun will be + fixed in a following pass. */ + push_cfun (child_cfun); + if (need_asm) + assign_assembler_name_if_neeeded (child_fn); + + if (optimize) + optimize_omp_library_calls (entry_stmt); + cgraph_edge::rebuild_edges (); + + /* Some EH regions might become dead, see PR34608. If + pass_cleanup_cfg isn't the first pass to happen with the + new child, these dead EH edges might cause problems. + Clean them up now. */ + if (flag_exceptions) + { + basic_block bb; + bool changed = false; + + FOR_EACH_BB_FN (bb, cfun) + changed |= gimple_purge_dead_eh_edges (bb); + if (changed) + cleanup_tree_cfg (); + } + if (gimple_in_ssa_p (cfun)) + update_ssa (TODO_update_ssa); + if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP)) + verify_loop_structure (); + pop_cfun (); + + if (dump_file && !gimple_in_ssa_p (cfun)) + { + omp_any_child_fn_dumped = true; + dump_function_header (dump_file, child_fn, dump_flags); + dump_function_to_file (child_fn, dump_file, dump_flags); + } + } + + /* Emit a library call to launch the children threads. */ + if (is_cilk_for) + expand_cilk_for_call (new_bb, + as_a <gomp_parallel *> (entry_stmt), ws_args); + else if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL) + expand_parallel_call (region, new_bb, + as_a <gomp_parallel *> (entry_stmt), ws_args); + else + expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt)); + if (gimple_in_ssa_p (cfun)) + update_ssa (TODO_update_ssa_only_virtuals); +} + +/* Information about members of an OpenACC collapsed loop nest. */ + +struct oacc_collapse +{ + tree base; /* Base value. */ + tree iters; /* Number of steps. */ + tree step; /* step size. */ +}; + +/* Helper for expand_oacc_for. Determine collapsed loop information. + Fill in COUNTS array. Emit any initialization code before GSI. + Return the calculated outer loop bound of BOUND_TYPE. */ + +static tree +expand_oacc_collapse_init (const struct omp_for_data *fd, + gimple_stmt_iterator *gsi, + oacc_collapse *counts, tree bound_type) +{ + tree total = build_int_cst (bound_type, 1); + int ix; + + gcc_assert (integer_onep (fd->loop.step)); + gcc_assert (integer_zerop (fd->loop.n1)); + + for (ix = 0; ix != fd->collapse; ix++) + { + const omp_for_data_loop *loop = &fd->loops[ix]; + + tree iter_type = TREE_TYPE (loop->v); + tree diff_type = iter_type; + tree plus_type = iter_type; + + gcc_assert (loop->cond_code == fd->loop.cond_code); + + if (POINTER_TYPE_P (iter_type)) + plus_type = sizetype; + if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type)) + diff_type = signed_type_for (diff_type); + + tree b = loop->n1; + tree e = loop->n2; + tree s = loop->step; + bool up = loop->cond_code == LT_EXPR; + tree dir = build_int_cst (diff_type, up ? +1 : -1); + bool negating; + tree expr; + + b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE, + true, GSI_SAME_STMT); + e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE, + true, GSI_SAME_STMT); + + /* Convert the step, avoiding possible unsigned->signed overflow. */ + negating = !up && TYPE_UNSIGNED (TREE_TYPE (s)); + if (negating) + s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s); + s = fold_convert (diff_type, s); + if (negating) + s = fold_build1 (NEGATE_EXPR, diff_type, s); + s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE, + true, GSI_SAME_STMT); + + /* Determine the range, avoiding possible unsigned->signed overflow. */ + negating = !up && TYPE_UNSIGNED (iter_type); + expr = fold_build2 (MINUS_EXPR, plus_type, + fold_convert (plus_type, negating ? b : e), + fold_convert (plus_type, negating ? e : b)); + expr = fold_convert (diff_type, expr); + if (negating) + expr = fold_build1 (NEGATE_EXPR, diff_type, expr); + tree range = force_gimple_operand_gsi + (gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT); + + /* Determine number of iterations. */ + expr = fold_build2 (MINUS_EXPR, diff_type, range, dir); + expr = fold_build2 (PLUS_EXPR, diff_type, expr, s); + expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s); + + tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE, + true, GSI_SAME_STMT); + + counts[ix].base = b; + counts[ix].iters = iters; + counts[ix].step = s; + + total = fold_build2 (MULT_EXPR, bound_type, total, + fold_convert (bound_type, iters)); + } + + return total; +} + +/* Emit initializers for collapsed loop members. IVAR is the outer + loop iteration variable, from which collapsed loop iteration values + are calculated. COUNTS array has been initialized by + expand_oacc_collapse_inits. */ + +static void +expand_oacc_collapse_vars (const struct omp_for_data *fd, + gimple_stmt_iterator *gsi, + const oacc_collapse *counts, tree ivar) +{ + tree ivar_type = TREE_TYPE (ivar); + + /* The most rapidly changing iteration variable is the innermost + one. */ + for (int ix = fd->collapse; ix--;) + { + const omp_for_data_loop *loop = &fd->loops[ix]; + const oacc_collapse *collapse = &counts[ix]; + tree iter_type = TREE_TYPE (loop->v); + tree diff_type = TREE_TYPE (collapse->step); + tree plus_type = iter_type; + enum tree_code plus_code = PLUS_EXPR; + tree expr; + + if (POINTER_TYPE_P (iter_type)) + { + plus_code = POINTER_PLUS_EXPR; + plus_type = sizetype; + } + + expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, ivar, + fold_convert (ivar_type, collapse->iters)); + expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr), + collapse->step); + expr = fold_build2 (plus_code, iter_type, collapse->base, + fold_convert (plus_type, expr)); + expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE, + true, GSI_SAME_STMT); + gassign *ass = gimple_build_assign (loop->v, expr); + gsi_insert_before (gsi, ass, GSI_SAME_STMT); + + if (ix) + { + expr = fold_build2 (TRUNC_DIV_EXPR, ivar_type, ivar, + fold_convert (ivar_type, collapse->iters)); + ivar = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE, + true, GSI_SAME_STMT); + } + } +} + +/* Helper function for expand_omp_{for_*,simd}. If this is the outermost + of the combined collapse > 1 loop constructs, generate code like: + if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB; + if (cond3 is <) + adj = STEP3 - 1; + else + adj = STEP3 + 1; + count3 = (adj + N32 - N31) / STEP3; + if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB; + if (cond2 is <) + adj = STEP2 - 1; + else + adj = STEP2 + 1; + count2 = (adj + N22 - N21) / STEP2; + if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB; + if (cond1 is <) + adj = STEP1 - 1; + else + adj = STEP1 + 1; + count1 = (adj + N12 - N11) / STEP1; + count = count1 * count2 * count3; + Furthermore, if ZERO_ITER_BB is NULL, create a BB which does: + count = 0; + and set ZERO_ITER_BB to that bb. If this isn't the outermost + of the combined loop constructs, just initialize COUNTS array + from the _looptemp_ clauses. */ + +/* NOTE: It *could* be better to moosh all of the BBs together, + creating one larger BB with all the computation and the unexpected + jump at the end. I.e. + + bool zero3, zero2, zero1, zero; + + zero3 = N32 c3 N31; + count3 = (N32 - N31) /[cl] STEP3; + zero2 = N22 c2 N21; + count2 = (N22 - N21) /[cl] STEP2; + zero1 = N12 c1 N11; + count1 = (N12 - N11) /[cl] STEP1; + zero = zero3 || zero2 || zero1; + count = count1 * count2 * count3; + if (__builtin_expect(zero, false)) goto zero_iter_bb; + + After all, we expect the zero=false, and thus we expect to have to + evaluate all of the comparison expressions, so short-circuiting + oughtn't be a win. Since the condition isn't protecting a + denominator, we're not concerned about divide-by-zero, so we can + fully evaluate count even if a numerator turned out to be wrong. + + It seems like putting this all together would create much better + scheduling opportunities, and less pressure on the chip's branch + predictor. */ + +static void +expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi, + basic_block &entry_bb, tree *counts, + basic_block &zero_iter1_bb, int &first_zero_iter1, + basic_block &zero_iter2_bb, int &first_zero_iter2, + basic_block &l2_dom_bb) +{ + tree t, type = TREE_TYPE (fd->loop.v); + edge e, ne; + int i; + + /* Collapsed loops need work for expansion into SSA form. */ + gcc_assert (!gimple_in_ssa_p (cfun)); + + if (gimple_omp_for_combined_into_p (fd->for_stmt) + && TREE_CODE (fd->loop.n2) != INTEGER_CST) + { + gcc_assert (fd->ordered == 0); + /* First two _looptemp_ clauses are for istart/iend, counts[0] + isn't supposed to be handled, as the inner loop doesn't + use it. */ + tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), + OMP_CLAUSE__LOOPTEMP_); + gcc_assert (innerc); + for (i = 0; i < fd->collapse; i++) + { + innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), + OMP_CLAUSE__LOOPTEMP_); + gcc_assert (innerc); + if (i) + counts[i] = OMP_CLAUSE_DECL (innerc); + else + counts[0] = NULL_TREE; + } + return; + } + + for (i = fd->collapse; i < fd->ordered; i++) + { + tree itype = TREE_TYPE (fd->loops[i].v); + counts[i] = NULL_TREE; + t = fold_binary (fd->loops[i].cond_code, boolean_type_node, + fold_convert (itype, fd->loops[i].n1), + fold_convert (itype, fd->loops[i].n2)); + if (t && integer_zerop (t)) + { + for (i = fd->collapse; i < fd->ordered; i++) + counts[i] = build_int_cst (type, 0); + break; + } + } + for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++) + { + tree itype = TREE_TYPE (fd->loops[i].v); + + if (i >= fd->collapse && counts[i]) + continue; + if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse) + && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node, + fold_convert (itype, fd->loops[i].n1), + fold_convert (itype, fd->loops[i].n2))) + == NULL_TREE || !integer_onep (t))) + { + gcond *cond_stmt; + tree n1, n2; + n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1)); + n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE, + true, GSI_SAME_STMT); + n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2)); + n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE, + true, GSI_SAME_STMT); + cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2, + NULL_TREE, NULL_TREE); + gsi_insert_before (gsi, cond_stmt, GSI_SAME_STMT); + if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), + expand_omp_regimplify_p, NULL, NULL) + || walk_tree (gimple_cond_rhs_ptr (cond_stmt), + expand_omp_regimplify_p, NULL, NULL)) + { + *gsi = gsi_for_stmt (cond_stmt); + gimple_regimplify_operands (cond_stmt, gsi); + } + e = split_block (entry_bb, cond_stmt); + basic_block &zero_iter_bb + = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb; + int &first_zero_iter + = i < fd->collapse ? first_zero_iter1 : first_zero_iter2; + if (zero_iter_bb == NULL) + { + gassign *assign_stmt; + first_zero_iter = i; + zero_iter_bb = create_empty_bb (entry_bb); + add_bb_to_loop (zero_iter_bb, entry_bb->loop_father); + *gsi = gsi_after_labels (zero_iter_bb); + if (i < fd->collapse) + assign_stmt = gimple_build_assign (fd->loop.n2, + build_zero_cst (type)); + else + { + counts[i] = create_tmp_reg (type, ".count"); + assign_stmt + = gimple_build_assign (counts[i], build_zero_cst (type)); + } + gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT); + set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb, + entry_bb); + } + ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE); + ne->probability = REG_BR_PROB_BASE / 2000 - 1; + e->flags = EDGE_TRUE_VALUE; + e->probability = REG_BR_PROB_BASE - ne->probability; + if (l2_dom_bb == NULL) + l2_dom_bb = entry_bb; + entry_bb = e->dest; + *gsi = gsi_last_bb (entry_bb); + } + + if (POINTER_TYPE_P (itype)) + itype = signed_type_for (itype); + t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR + ? -1 : 1)); + t = fold_build2 (PLUS_EXPR, itype, + fold_convert (itype, fd->loops[i].step), t); + t = fold_build2 (PLUS_EXPR, itype, t, + fold_convert (itype, fd->loops[i].n2)); + t = fold_build2 (MINUS_EXPR, itype, t, + fold_convert (itype, fd->loops[i].n1)); + /* ?? We could probably use CEIL_DIV_EXPR instead of + TRUNC_DIV_EXPR and adjusting by hand. Unless we can't + generate the same code in the end because generically we + don't know that the values involved must be negative for + GT?? */ + if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR) + t = fold_build2 (TRUNC_DIV_EXPR, itype, + fold_build1 (NEGATE_EXPR, itype, t), + fold_build1 (NEGATE_EXPR, itype, + fold_convert (itype, + fd->loops[i].step))); + else + t = fold_build2 (TRUNC_DIV_EXPR, itype, t, + fold_convert (itype, fd->loops[i].step)); + t = fold_convert (type, t); + if (TREE_CODE (t) == INTEGER_CST) + counts[i] = t; + else + { + if (i < fd->collapse || i != first_zero_iter2) + counts[i] = create_tmp_reg (type, ".count"); + expand_omp_build_assign (gsi, counts[i], t); + } + if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse) + { + if (i == 0) + t = counts[0]; + else + t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]); + expand_omp_build_assign (gsi, fd->loop.n2, t); + } + } +} + +/* Helper function for expand_omp_{for_*,simd}. Generate code like: + T = V; + V3 = N31 + (T % count3) * STEP3; + T = T / count3; + V2 = N21 + (T % count2) * STEP2; + T = T / count2; + V1 = N11 + T * STEP1; + if this loop doesn't have an inner loop construct combined with it. + If it does have an inner loop construct combined with it and the + iteration count isn't known constant, store values from counts array + into its _looptemp_ temporaries instead. */ + +static void +expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi, + tree *counts, gimple *inner_stmt, tree startvar) +{ + int i; + if (gimple_omp_for_combined_p (fd->for_stmt)) + { + /* If fd->loop.n2 is constant, then no propagation of the counts + is needed, they are constant. */ + if (TREE_CODE (fd->loop.n2) == INTEGER_CST) + return; + + tree clauses = gimple_code (inner_stmt) != GIMPLE_OMP_FOR + ? gimple_omp_taskreg_clauses (inner_stmt) + : gimple_omp_for_clauses (inner_stmt); + /* First two _looptemp_ clauses are for istart/iend, counts[0] + isn't supposed to be handled, as the inner loop doesn't + use it. */ + tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_); + gcc_assert (innerc); + for (i = 0; i < fd->collapse; i++) + { + innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), + OMP_CLAUSE__LOOPTEMP_); + gcc_assert (innerc); + if (i) + { + tree tem = OMP_CLAUSE_DECL (innerc); + tree t = fold_convert (TREE_TYPE (tem), counts[i]); + t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE, + false, GSI_CONTINUE_LINKING); + gassign *stmt = gimple_build_assign (tem, t); + gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING); + } + } + return; + } + + tree type = TREE_TYPE (fd->loop.v); + tree tem = create_tmp_reg (type, ".tem"); + gassign *stmt = gimple_build_assign (tem, startvar); + gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING); + + for (i = fd->collapse - 1; i >= 0; i--) + { + tree vtype = TREE_TYPE (fd->loops[i].v), itype, t; + itype = vtype; + if (POINTER_TYPE_P (vtype)) + itype = signed_type_for (vtype); + if (i != 0) + t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]); + else + t = tem; + t = fold_convert (itype, t); + t = fold_build2 (MULT_EXPR, itype, t, + fold_convert (itype, fd->loops[i].step)); + if (POINTER_TYPE_P (vtype)) + t = fold_build_pointer_plus (fd->loops[i].n1, t); + else + t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t); + t = force_gimple_operand_gsi (gsi, t, + DECL_P (fd->loops[i].v) + && TREE_ADDRESSABLE (fd->loops[i].v), + NULL_TREE, false, + GSI_CONTINUE_LINKING); + stmt = gimple_build_assign (fd->loops[i].v, t); + gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING); + if (i != 0) + { + t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]); + t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE, + false, GSI_CONTINUE_LINKING); + stmt = gimple_build_assign (tem, t); + gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING); + } + } +} + +/* Helper function for expand_omp_for_*. Generate code like: + L10: + V3 += STEP3; + if (V3 cond3 N32) goto BODY_BB; else goto L11; + L11: + V3 = N31; + V2 += STEP2; + if (V2 cond2 N22) goto BODY_BB; else goto L12; + L12: + V2 = N21; + V1 += STEP1; + goto BODY_BB; */ + +static basic_block +extract_omp_for_update_vars (struct omp_for_data *fd, basic_block cont_bb, + basic_block body_bb) +{ + basic_block last_bb, bb, collapse_bb = NULL; + int i; + gimple_stmt_iterator gsi; + edge e; + tree t; + gimple *stmt; + + last_bb = cont_bb; + for (i = fd->collapse - 1; i >= 0; i--) + { + tree vtype = TREE_TYPE (fd->loops[i].v); + + bb = create_empty_bb (last_bb); + add_bb_to_loop (bb, last_bb->loop_father); + gsi = gsi_start_bb (bb); + + if (i < fd->collapse - 1) + { + e = make_edge (last_bb, bb, EDGE_FALSE_VALUE); + e->probability = REG_BR_PROB_BASE / 8; + + t = fd->loops[i + 1].n1; + t = force_gimple_operand_gsi (&gsi, t, + DECL_P (fd->loops[i + 1].v) + && TREE_ADDRESSABLE (fd->loops[i + + 1].v), + NULL_TREE, false, + GSI_CONTINUE_LINKING); + stmt = gimple_build_assign (fd->loops[i + 1].v, t); + gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); + } + else + collapse_bb = bb; + + set_immediate_dominator (CDI_DOMINATORS, bb, last_bb); + + if (POINTER_TYPE_P (vtype)) + t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step); + else + t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step); + t = force_gimple_operand_gsi (&gsi, t, + DECL_P (fd->loops[i].v) + && TREE_ADDRESSABLE (fd->loops[i].v), + NULL_TREE, false, GSI_CONTINUE_LINKING); + stmt = gimple_build_assign (fd->loops[i].v, t); + gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); + + if (i > 0) + { + t = fd->loops[i].n2; + t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, + false, GSI_CONTINUE_LINKING); + tree v = fd->loops[i].v; + if (DECL_P (v) && TREE_ADDRESSABLE (v)) + v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE, + false, GSI_CONTINUE_LINKING); + t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t); + stmt = gimple_build_cond_empty (t); + gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); + e = make_edge (bb, body_bb, EDGE_TRUE_VALUE); + e->probability = REG_BR_PROB_BASE * 7 / 8; + } + else + make_edge (bb, body_bb, EDGE_FALLTHRU); + last_bb = bb; + } + + return collapse_bb; +} + +/* Expand #pragma omp ordered depend(source). */ + +static void +expand_omp_ordered_source (gimple_stmt_iterator *gsi, struct omp_for_data *fd, + tree *counts, location_t loc) +{ + enum built_in_function source_ix + = fd->iter_type == long_integer_type_node + ? BUILT_IN_GOMP_DOACROSS_POST : BUILT_IN_GOMP_DOACROSS_ULL_POST; + gimple *g + = gimple_build_call (builtin_decl_explicit (source_ix), 1, + build_fold_addr_expr (counts[fd->ordered])); + gimple_set_location (g, loc); + gsi_insert_before (gsi, g, GSI_SAME_STMT); +} + +/* Expand a single depend from #pragma omp ordered depend(sink:...). */ + +static void +expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd, + tree *counts, tree c, location_t loc) +{ + auto_vec<tree, 10> args; + enum built_in_function sink_ix + = fd->iter_type == long_integer_type_node + ? BUILT_IN_GOMP_DOACROSS_WAIT : BUILT_IN_GOMP_DOACROSS_ULL_WAIT; + tree t, off, coff = NULL_TREE, deps = OMP_CLAUSE_DECL (c), cond = NULL_TREE; + int i; + gimple_stmt_iterator gsi2 = *gsi; + bool warned_step = false; + + for (i = 0; i < fd->ordered; i++) + { + tree step = NULL_TREE; + off = TREE_PURPOSE (deps); + if (TREE_CODE (off) == TRUNC_DIV_EXPR) + { + step = TREE_OPERAND (off, 1); + off = TREE_OPERAND (off, 0); + } + if (!integer_zerop (off)) + { + gcc_assert (fd->loops[i].cond_code == LT_EXPR + || fd->loops[i].cond_code == GT_EXPR); + bool forward = fd->loops[i].cond_code == LT_EXPR; + if (step) + { + /* Non-simple Fortran DO loops. If step is variable, + we don't know at compile even the direction, so can't + warn. */ + if (TREE_CODE (step) != INTEGER_CST) + break; + forward = tree_int_cst_sgn (step) != -1; + } + if (forward ^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) + warning_at (loc, 0, "%<depend(sink)%> clause waiting for " + "lexically later iteration"); + break; + } + deps = TREE_CHAIN (deps); + } + /* If all offsets corresponding to the collapsed loops are zero, + this depend clause can be ignored. FIXME: but there is still a + flush needed. We need to emit one __sync_synchronize () for it + though (perhaps conditionally)? Solve this together with the + conservative dependence folding optimization. + if (i >= fd->collapse) + return; */ + + deps = OMP_CLAUSE_DECL (c); + gsi_prev (&gsi2); + edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2)); + edge e2 = split_block_after_labels (e1->dest); + + gsi2 = gsi_after_labels (e1->dest); + *gsi = gsi_last_bb (e1->src); + for (i = 0; i < fd->ordered; i++) + { + tree itype = TREE_TYPE (fd->loops[i].v); + tree step = NULL_TREE; + tree orig_off = NULL_TREE; + if (POINTER_TYPE_P (itype)) + itype = sizetype; + if (i) + deps = TREE_CHAIN (deps); + off = TREE_PURPOSE (deps); + if (TREE_CODE (off) == TRUNC_DIV_EXPR) + { + step = TREE_OPERAND (off, 1); + off = TREE_OPERAND (off, 0); + gcc_assert (fd->loops[i].cond_code == LT_EXPR + && integer_onep (fd->loops[i].step) + && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))); + } + tree s = fold_convert_loc (loc, itype, step ? step : fd->loops[i].step); + if (step) + { + off = fold_convert_loc (loc, itype, off); + orig_off = off; + off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s); + } + + if (integer_zerop (off)) + t = boolean_true_node; + else + { + tree a; + tree co = fold_convert_loc (loc, itype, off); + if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))) + { + if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) + co = fold_build1_loc (loc, NEGATE_EXPR, itype, co); + a = fold_build2_loc (loc, POINTER_PLUS_EXPR, + TREE_TYPE (fd->loops[i].v), fd->loops[i].v, + co); + } + else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) + a = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v), + fd->loops[i].v, co); + else + a = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (fd->loops[i].v), + fd->loops[i].v, co); + if (step) + { + tree t1, t2; + if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) + t1 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a, + fd->loops[i].n1); + else + t1 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a, + fd->loops[i].n2); + if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) + t2 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a, + fd->loops[i].n2); + else + t2 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a, + fd->loops[i].n1); + t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, + step, build_int_cst (TREE_TYPE (step), 0)); + if (TREE_CODE (step) != INTEGER_CST) + { + t1 = unshare_expr (t1); + t1 = force_gimple_operand_gsi (gsi, t1, true, NULL_TREE, + false, GSI_CONTINUE_LINKING); + t2 = unshare_expr (t2); + t2 = force_gimple_operand_gsi (gsi, t2, true, NULL_TREE, + false, GSI_CONTINUE_LINKING); + } + t = fold_build3_loc (loc, COND_EXPR, boolean_type_node, + t, t2, t1); + } + else if (fd->loops[i].cond_code == LT_EXPR) + { + if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) + t = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a, + fd->loops[i].n1); + else + t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a, + fd->loops[i].n2); + } + else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) + t = fold_build2_loc (loc, GT_EXPR, boolean_type_node, a, + fd->loops[i].n2); + else + t = fold_build2_loc (loc, LE_EXPR, boolean_type_node, a, + fd->loops[i].n1); + } + if (cond) + cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, cond, t); + else + cond = t; + + off = fold_convert_loc (loc, itype, off); + + if (step + || (fd->loops[i].cond_code == LT_EXPR + ? !integer_onep (fd->loops[i].step) + : !integer_minus_onep (fd->loops[i].step))) + { + if (step == NULL_TREE + && TYPE_UNSIGNED (itype) + && fd->loops[i].cond_code == GT_EXPR) + t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off, + fold_build1_loc (loc, NEGATE_EXPR, itype, + s)); + else + t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, + orig_off ? orig_off : off, s); + t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t, + build_int_cst (itype, 0)); + if (integer_zerop (t) && !warned_step) + { + warning_at (loc, 0, "%<depend(sink)%> refers to iteration never " + "in the iteration space"); + warned_step = true; + } + cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, + cond, t); + } + + if (i <= fd->collapse - 1 && fd->collapse > 1) + t = fd->loop.v; + else if (counts[i]) + t = counts[i]; + else + { + t = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v), + fd->loops[i].v, fd->loops[i].n1); + t = fold_convert_loc (loc, fd->iter_type, t); + } + if (step) + /* We have divided off by step already earlier. */; + else if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR) + off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, + fold_build1_loc (loc, NEGATE_EXPR, itype, + s)); + else + off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s); + if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) + off = fold_build1_loc (loc, NEGATE_EXPR, itype, off); + off = fold_convert_loc (loc, fd->iter_type, off); + if (i <= fd->collapse - 1 && fd->collapse > 1) + { + if (i) + off = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, coff, + off); + if (i < fd->collapse - 1) + { + coff = fold_build2_loc (loc, MULT_EXPR, fd->iter_type, off, + counts[i]); + continue; + } + } + off = unshare_expr (off); + t = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, t, off); + t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE, + true, GSI_SAME_STMT); + args.safe_push (t); + } + gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix), args); + gimple_set_location (g, loc); + gsi_insert_before (&gsi2, g, GSI_SAME_STMT); + + cond = unshare_expr (cond); + cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, false, + GSI_CONTINUE_LINKING); + gsi_insert_after (gsi, gimple_build_cond_empty (cond), GSI_NEW_STMT); + edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE); + e3->probability = REG_BR_PROB_BASE / 8; + e1->probability = REG_BR_PROB_BASE - e3->probability; + e1->flags = EDGE_TRUE_VALUE; + set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src); + + *gsi = gsi_after_labels (e2->dest); +} + +/* Expand all #pragma omp ordered depend(source) and + #pragma omp ordered depend(sink:...) constructs in the current + #pragma omp for ordered(n) region. */ + +static void +expand_omp_ordered_source_sink (struct omp_region *region, + struct omp_for_data *fd, tree *counts, + basic_block cont_bb) +{ + struct omp_region *inner; + int i; + for (i = fd->collapse - 1; i < fd->ordered; i++) + if (i == fd->collapse - 1 && fd->collapse > 1) + counts[i] = NULL_TREE; + else if (i >= fd->collapse && !cont_bb) + counts[i] = build_zero_cst (fd->iter_type); + else if (!POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)) + && integer_onep (fd->loops[i].step)) + counts[i] = NULL_TREE; + else + counts[i] = create_tmp_var (fd->iter_type, ".orditer"); + tree atype + = build_array_type_nelts (fd->iter_type, fd->ordered - fd->collapse + 1); + counts[fd->ordered] = create_tmp_var (atype, ".orditera"); + TREE_ADDRESSABLE (counts[fd->ordered]) = 1; + + for (inner = region->inner; inner; inner = inner->next) + if (inner->type == GIMPLE_OMP_ORDERED) + { + gomp_ordered *ord_stmt = inner->ord_stmt; + gimple_stmt_iterator gsi = gsi_for_stmt (ord_stmt); + location_t loc = gimple_location (ord_stmt); + tree c; + for (c = gimple_omp_ordered_clauses (ord_stmt); + c; c = OMP_CLAUSE_CHAIN (c)) + if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE) + break; + if (c) + expand_omp_ordered_source (&gsi, fd, counts, loc); + for (c = gimple_omp_ordered_clauses (ord_stmt); + c; c = OMP_CLAUSE_CHAIN (c)) + if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK) + expand_omp_ordered_sink (&gsi, fd, counts, c, loc); + gsi_remove (&gsi, true); + } +} + +/* Wrap the body into fd->ordered - fd->collapse loops that aren't + collapsed. */ + +static basic_block +expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts, + basic_block cont_bb, basic_block body_bb, + bool ordered_lastprivate) +{ + if (fd->ordered == fd->collapse) + return cont_bb; + + if (!cont_bb) + { + gimple_stmt_iterator gsi = gsi_after_labels (body_bb); + for (int i = fd->collapse; i < fd->ordered; i++) + { + tree type = TREE_TYPE (fd->loops[i].v); + tree n1 = fold_convert (type, fd->loops[i].n1); + expand_omp_build_assign (&gsi, fd->loops[i].v, n1); + tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered], + size_int (i - fd->collapse + 1), + NULL_TREE, NULL_TREE); + expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type)); + } + return NULL; + } + + for (int i = fd->ordered - 1; i >= fd->collapse; i--) + { + tree t, type = TREE_TYPE (fd->loops[i].v); + gimple_stmt_iterator gsi = gsi_after_labels (body_bb); + expand_omp_build_assign (&gsi, fd->loops[i].v, + fold_convert (type, fd->loops[i].n1)); + if (counts[i]) + expand_omp_build_assign (&gsi, counts[i], + build_zero_cst (fd->iter_type)); + tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered], + size_int (i - fd->collapse + 1), + NULL_TREE, NULL_TREE); + expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type)); + if (!gsi_end_p (gsi)) + gsi_prev (&gsi); + else + gsi = gsi_last_bb (body_bb); + edge e1 = split_block (body_bb, gsi_stmt (gsi)); + basic_block new_body = e1->dest; + if (body_bb == cont_bb) + cont_bb = new_body; + edge e2 = NULL; + basic_block new_header; + if (EDGE_COUNT (cont_bb->preds) > 0) + { + gsi = gsi_last_bb (cont_bb); + if (POINTER_TYPE_P (type)) + t = fold_build_pointer_plus (fd->loops[i].v, + fold_convert (sizetype, + fd->loops[i].step)); + else + t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v, + fold_convert (type, fd->loops[i].step)); + expand_omp_build_assign (&gsi, fd->loops[i].v, t); + if (counts[i]) + { + t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[i], + build_int_cst (fd->iter_type, 1)); + expand_omp_build_assign (&gsi, counts[i], t); + t = counts[i]; + } + else + { + t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[i].v), + fd->loops[i].v, fd->loops[i].n1); + t = fold_convert (fd->iter_type, t); + t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, + true, GSI_SAME_STMT); + } + aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered], + size_int (i - fd->collapse + 1), + NULL_TREE, NULL_TREE); + expand_omp_build_assign (&gsi, aref, t); + gsi_prev (&gsi); + e2 = split_block (cont_bb, gsi_stmt (gsi)); + new_header = e2->dest; + } + else + new_header = cont_bb; + gsi = gsi_after_labels (new_header); + tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE, + true, GSI_SAME_STMT); + tree n2 + = force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2), + true, NULL_TREE, true, GSI_SAME_STMT); + t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2); + gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_NEW_STMT); + edge e3 = split_block (new_header, gsi_stmt (gsi)); + cont_bb = e3->dest; + remove_edge (e1); + make_edge (body_bb, new_header, EDGE_FALLTHRU); + e3->flags = EDGE_FALSE_VALUE; + e3->probability = REG_BR_PROB_BASE / 8; + e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE); + e1->probability = REG_BR_PROB_BASE - e3->probability; + + set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb); + set_immediate_dominator (CDI_DOMINATORS, new_body, new_header); + + if (e2) + { + struct loop *loop = alloc_loop (); + loop->header = new_header; + loop->latch = e2->src; + add_loop (loop, body_bb->loop_father); + } + } + + /* If there are any lastprivate clauses and it is possible some loops + might have zero iterations, ensure all the decls are initialized, + otherwise we could crash evaluating C++ class iterators with lastprivate + clauses. */ + bool need_inits = false; + for (int i = fd->collapse; ordered_lastprivate && i < fd->ordered; i++) + if (need_inits) + { + tree type = TREE_TYPE (fd->loops[i].v); + gimple_stmt_iterator gsi = gsi_after_labels (body_bb); + expand_omp_build_assign (&gsi, fd->loops[i].v, + fold_convert (type, fd->loops[i].n1)); + } + else + { + tree type = TREE_TYPE (fd->loops[i].v); + tree this_cond = fold_build2 (fd->loops[i].cond_code, + boolean_type_node, + fold_convert (type, fd->loops[i].n1), + fold_convert (type, fd->loops[i].n2)); + if (!integer_onep (this_cond)) + need_inits = true; + } + + return cont_bb; +} + +/* A subroutine of expand_omp_for. Generate code for a parallel + loop with any schedule. Given parameters: + + for (V = N1; V cond N2; V += STEP) BODY; + + where COND is "<" or ">", we generate pseudocode + + more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0); + if (more) goto L0; else goto L3; + L0: + V = istart0; + iend = iend0; + L1: + BODY; + V += STEP; + if (V cond iend) goto L1; else goto L2; + L2: + if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3; + L3: + + If this is a combined omp parallel loop, instead of the call to + GOMP_loop_foo_start, we call GOMP_loop_foo_next. + If this is gimple_omp_for_combined_p loop, then instead of assigning + V and iend in L0 we assign the first two _looptemp_ clause decls of the + inner GIMPLE_OMP_FOR and V += STEP; and + if (V cond iend) goto L1; else goto L2; are removed. + + For collapsed loops, given parameters: + collapse(3) + for (V1 = N11; V1 cond1 N12; V1 += STEP1) + for (V2 = N21; V2 cond2 N22; V2 += STEP2) + for (V3 = N31; V3 cond3 N32; V3 += STEP3) + BODY; + + we generate pseudocode + + if (__builtin_expect (N32 cond3 N31, 0)) goto Z0; + if (cond3 is <) + adj = STEP3 - 1; + else + adj = STEP3 + 1; + count3 = (adj + N32 - N31) / STEP3; + if (__builtin_expect (N22 cond2 N21, 0)) goto Z0; + if (cond2 is <) + adj = STEP2 - 1; + else + adj = STEP2 + 1; + count2 = (adj + N22 - N21) / STEP2; + if (__builtin_expect (N12 cond1 N11, 0)) goto Z0; + if (cond1 is <) + adj = STEP1 - 1; + else + adj = STEP1 + 1; + count1 = (adj + N12 - N11) / STEP1; + count = count1 * count2 * count3; + goto Z1; + Z0: + count = 0; + Z1: + more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0); + if (more) goto L0; else goto L3; + L0: + V = istart0; + T = V; + V3 = N31 + (T % count3) * STEP3; + T = T / count3; + V2 = N21 + (T % count2) * STEP2; + T = T / count2; + V1 = N11 + T * STEP1; + iend = iend0; + L1: + BODY; + V += 1; + if (V < iend) goto L10; else goto L2; + L10: + V3 += STEP3; + if (V3 cond3 N32) goto L1; else goto L11; + L11: + V3 = N31; + V2 += STEP2; + if (V2 cond2 N22) goto L1; else goto L12; + L12: + V2 = N21; + V1 += STEP1; + goto L1; + L2: + if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3; + L3: + + */ + +static void +expand_omp_for_generic (struct omp_region *region, + struct omp_for_data *fd, + enum built_in_function start_fn, + enum built_in_function next_fn, + gimple *inner_stmt) +{ + tree type, istart0, iend0, iend; + tree t, vmain, vback, bias = NULL_TREE; + basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb; + basic_block l2_bb = NULL, l3_bb = NULL; + gimple_stmt_iterator gsi; + gassign *assign_stmt; + bool in_combined_parallel = is_combined_parallel (region); + bool broken_loop = region->cont == NULL; + edge e, ne; + tree *counts = NULL; + int i; + bool ordered_lastprivate = false; + + gcc_assert (!broken_loop || !in_combined_parallel); + gcc_assert (fd->iter_type == long_integer_type_node + || !in_combined_parallel); + + entry_bb = region->entry; + cont_bb = region->cont; + collapse_bb = NULL; + gcc_assert (EDGE_COUNT (entry_bb->succs) == 2); + gcc_assert (broken_loop + || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest); + l0_bb = split_edge (FALLTHRU_EDGE (entry_bb)); + l1_bb = single_succ (l0_bb); + if (!broken_loop) + { + l2_bb = create_empty_bb (cont_bb); + gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb + || (single_succ_edge (BRANCH_EDGE (cont_bb)->dest)->dest + == l1_bb)); + gcc_assert (EDGE_COUNT (cont_bb->succs) == 2); + } + else + l2_bb = NULL; + l3_bb = BRANCH_EDGE (entry_bb)->dest; + exit_bb = region->exit; + + gsi = gsi_last_bb (entry_bb); + + gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); + if (fd->ordered + && omp_find_clause (gimple_omp_for_clauses (gsi_stmt (gsi)), + OMP_CLAUSE_LASTPRIVATE)) + ordered_lastprivate = false; + if (fd->collapse > 1 || fd->ordered) + { + int first_zero_iter1 = -1, first_zero_iter2 = -1; + basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL; + + counts = XALLOCAVEC (tree, fd->ordered ? fd->ordered + 1 : fd->collapse); + expand_omp_for_init_counts (fd, &gsi, entry_bb, counts, + zero_iter1_bb, first_zero_iter1, + zero_iter2_bb, first_zero_iter2, l2_dom_bb); + + if (zero_iter1_bb) + { + /* Some counts[i] vars might be uninitialized if + some loop has zero iterations. But the body shouldn't + be executed in that case, so just avoid uninit warnings. */ + for (i = first_zero_iter1; + i < (fd->ordered ? fd->ordered : fd->collapse); i++) + if (SSA_VAR_P (counts[i])) + TREE_NO_WARNING (counts[i]) = 1; + gsi_prev (&gsi); + e = split_block (entry_bb, gsi_stmt (gsi)); + entry_bb = e->dest; + make_edge (zero_iter1_bb, entry_bb, EDGE_FALLTHRU); + gsi = gsi_last_bb (entry_bb); + set_immediate_dominator (CDI_DOMINATORS, entry_bb, + get_immediate_dominator (CDI_DOMINATORS, + zero_iter1_bb)); + } + if (zero_iter2_bb) + { + /* Some counts[i] vars might be uninitialized if + some loop has zero iterations. But the body shouldn't + be executed in that case, so just avoid uninit warnings. */ + for (i = first_zero_iter2; i < fd->ordered; i++) + if (SSA_VAR_P (counts[i])) + TREE_NO_WARNING (counts[i]) = 1; + if (zero_iter1_bb) + make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU); + else + { + gsi_prev (&gsi); + e = split_block (entry_bb, gsi_stmt (gsi)); + entry_bb = e->dest; + make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU); + gsi = gsi_last_bb (entry_bb); + set_immediate_dominator (CDI_DOMINATORS, entry_bb, + get_immediate_dominator + (CDI_DOMINATORS, zero_iter2_bb)); + } + } + if (fd->collapse == 1) + { + counts[0] = fd->loop.n2; + fd->loop = fd->loops[0]; + } + } + + type = TREE_TYPE (fd->loop.v); + istart0 = create_tmp_var (fd->iter_type, ".istart0"); + iend0 = create_tmp_var (fd->iter_type, ".iend0"); + TREE_ADDRESSABLE (istart0) = 1; + TREE_ADDRESSABLE (iend0) = 1; + + /* See if we need to bias by LLONG_MIN. */ + if (fd->iter_type == long_long_unsigned_type_node + && TREE_CODE (type) == INTEGER_TYPE + && !TYPE_UNSIGNED (type) + && fd->ordered == 0) + { + tree n1, n2; + + if (fd->loop.cond_code == LT_EXPR) + { + n1 = fd->loop.n1; + n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step); + } + else + { + n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step); + n2 = fd->loop.n1; + } + if (TREE_CODE (n1) != INTEGER_CST + || TREE_CODE (n2) != INTEGER_CST + || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0))) + bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type)); + } + + gimple_stmt_iterator gsif = gsi; + gsi_prev (&gsif); + + tree arr = NULL_TREE; + if (in_combined_parallel) + { + gcc_assert (fd->ordered == 0); + /* In a combined parallel loop, emit a call to + GOMP_loop_foo_next. */ + t = build_call_expr (builtin_decl_explicit (next_fn), 2, + build_fold_addr_expr (istart0), + build_fold_addr_expr (iend0)); + } + else + { + tree t0, t1, t2, t3, t4; + /* If this is not a combined parallel loop, emit a call to + GOMP_loop_foo_start in ENTRY_BB. */ + t4 = build_fold_addr_expr (iend0); + t3 = build_fold_addr_expr (istart0); + if (fd->ordered) + { + t0 = build_int_cst (unsigned_type_node, + fd->ordered - fd->collapse + 1); + arr = create_tmp_var (build_array_type_nelts (fd->iter_type, + fd->ordered + - fd->collapse + 1), + ".omp_counts"); + DECL_NAMELESS (arr) = 1; + TREE_ADDRESSABLE (arr) = 1; + TREE_STATIC (arr) = 1; + vec<constructor_elt, va_gc> *v; + vec_alloc (v, fd->ordered - fd->collapse + 1); + int idx; + + for (idx = 0; idx < fd->ordered - fd->collapse + 1; idx++) + { + tree c; + if (idx == 0 && fd->collapse > 1) + c = fd->loop.n2; + else + c = counts[idx + fd->collapse - 1]; + tree purpose = size_int (idx); + CONSTRUCTOR_APPEND_ELT (v, purpose, c); + if (TREE_CODE (c) != INTEGER_CST) + TREE_STATIC (arr) = 0; + } + + DECL_INITIAL (arr) = build_constructor (TREE_TYPE (arr), v); + if (!TREE_STATIC (arr)) + force_gimple_operand_gsi (&gsi, build1 (DECL_EXPR, + void_type_node, arr), + true, NULL_TREE, true, GSI_SAME_STMT); + t1 = build_fold_addr_expr (arr); + t2 = NULL_TREE; + } + else + { + t2 = fold_convert (fd->iter_type, fd->loop.step); + t1 = fd->loop.n2; + t0 = fd->loop.n1; + if (gimple_omp_for_combined_into_p (fd->for_stmt)) + { + tree innerc + = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), + OMP_CLAUSE__LOOPTEMP_); + gcc_assert (innerc); + t0 = OMP_CLAUSE_DECL (innerc); + innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), + OMP_CLAUSE__LOOPTEMP_); + gcc_assert (innerc); + t1 = OMP_CLAUSE_DECL (innerc); + } + if (POINTER_TYPE_P (TREE_TYPE (t0)) + && TYPE_PRECISION (TREE_TYPE (t0)) + != TYPE_PRECISION (fd->iter_type)) + { + /* Avoid casting pointers to integer of a different size. */ + tree itype = signed_type_for (type); + t1 = fold_convert (fd->iter_type, fold_convert (itype, t1)); + t0 = fold_convert (fd->iter_type, fold_convert (itype, t0)); + } + else + { + t1 = fold_convert (fd->iter_type, t1); + t0 = fold_convert (fd->iter_type, t0); + } + if (bias) + { + t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias); + t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias); + } + } + if (fd->iter_type == long_integer_type_node || fd->ordered) + { + if (fd->chunk_size) + { + t = fold_convert (fd->iter_type, fd->chunk_size); + t = omp_adjust_chunk_size (t, fd->simd_schedule); + if (fd->ordered) + t = build_call_expr (builtin_decl_explicit (start_fn), + 5, t0, t1, t, t3, t4); + else + t = build_call_expr (builtin_decl_explicit (start_fn), + 6, t0, t1, t2, t, t3, t4); + } + else if (fd->ordered) + t = build_call_expr (builtin_decl_explicit (start_fn), + 4, t0, t1, t3, t4); + else + t = build_call_expr (builtin_decl_explicit (start_fn), + 5, t0, t1, t2, t3, t4); + } + else + { + tree t5; + tree c_bool_type; + tree bfn_decl; + + /* The GOMP_loop_ull_*start functions have additional boolean + argument, true for < loops and false for > loops. + In Fortran, the C bool type can be different from + boolean_type_node. */ + bfn_decl = builtin_decl_explicit (start_fn); + c_bool_type = TREE_TYPE (TREE_TYPE (bfn_decl)); + t5 = build_int_cst (c_bool_type, + fd->loop.cond_code == LT_EXPR ? 1 : 0); + if (fd->chunk_size) + { + tree bfn_decl = builtin_decl_explicit (start_fn); + t = fold_convert (fd->iter_type, fd->chunk_size); + t = omp_adjust_chunk_size (t, fd->simd_schedule); + t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4); + } + else + t = build_call_expr (builtin_decl_explicit (start_fn), + 6, t5, t0, t1, t2, t3, t4); + } + } + if (TREE_TYPE (t) != boolean_type_node) + t = fold_build2 (NE_EXPR, boolean_type_node, + t, build_int_cst (TREE_TYPE (t), 0)); + t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, + true, GSI_SAME_STMT); + if (arr && !TREE_STATIC (arr)) + { + tree clobber = build_constructor (TREE_TYPE (arr), NULL); + TREE_THIS_VOLATILE (clobber) = 1; + gsi_insert_before (&gsi, gimple_build_assign (arr, clobber), + GSI_SAME_STMT); + } + gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT); + + /* Remove the GIMPLE_OMP_FOR statement. */ + gsi_remove (&gsi, true); + + if (gsi_end_p (gsif)) + gsif = gsi_after_labels (gsi_bb (gsif)); + gsi_next (&gsif); + + /* Iteration setup for sequential loop goes in L0_BB. */ + tree startvar = fd->loop.v; + tree endvar = NULL_TREE; + + if (gimple_omp_for_combined_p (fd->for_stmt)) + { + gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR + && gimple_omp_for_kind (inner_stmt) + == GF_OMP_FOR_KIND_SIMD); + tree innerc = omp_find_clause (gimple_omp_for_clauses (inner_stmt), + OMP_CLAUSE__LOOPTEMP_); + gcc_assert (innerc); + startvar = OMP_CLAUSE_DECL (innerc); + innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), + OMP_CLAUSE__LOOPTEMP_); + gcc_assert (innerc); + endvar = OMP_CLAUSE_DECL (innerc); + } + + gsi = gsi_start_bb (l0_bb); + t = istart0; + if (fd->ordered && fd->collapse == 1) + t = fold_build2 (MULT_EXPR, fd->iter_type, t, + fold_convert (fd->iter_type, fd->loop.step)); + else if (bias) + t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias); + if (fd->ordered && fd->collapse == 1) + { + if (POINTER_TYPE_P (TREE_TYPE (startvar))) + t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar), + fd->loop.n1, fold_convert (sizetype, t)); + else + { + t = fold_convert (TREE_TYPE (startvar), t); + t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar), + fd->loop.n1, t); + } + } + else + { + if (POINTER_TYPE_P (TREE_TYPE (startvar))) + t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t); + t = fold_convert (TREE_TYPE (startvar), t); + } + t = force_gimple_operand_gsi (&gsi, t, + DECL_P (startvar) + && TREE_ADDRESSABLE (startvar), + NULL_TREE, false, GSI_CONTINUE_LINKING); + assign_stmt = gimple_build_assign (startvar, t); + gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); + + t = iend0; + if (fd->ordered && fd->collapse == 1) + t = fold_build2 (MULT_EXPR, fd->iter_type, t, + fold_convert (fd->iter_type, fd->loop.step)); + else if (bias) + t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias); + if (fd->ordered && fd->collapse == 1) + { + if (POINTER_TYPE_P (TREE_TYPE (startvar))) + t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar), + fd->loop.n1, fold_convert (sizetype, t)); + else + { + t = fold_convert (TREE_TYPE (startvar), t); + t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar), + fd->loop.n1, t); + } + } + else + { + if (POINTER_TYPE_P (TREE_TYPE (startvar))) + t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t); + t = fold_convert (TREE_TYPE (startvar), t); + } + iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, + false, GSI_CONTINUE_LINKING); + if (endvar) + { + assign_stmt = gimple_build_assign (endvar, iend); + gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); + if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (iend))) + assign_stmt = gimple_build_assign (fd->loop.v, iend); + else + assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend); + gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); + } + /* Handle linear clause adjustments. */ + tree itercnt = NULL_TREE; + if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR) + for (tree c = gimple_omp_for_clauses (fd->for_stmt); + c; c = OMP_CLAUSE_CHAIN (c)) + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR + && !OMP_CLAUSE_LINEAR_NO_COPYIN (c)) + { + tree d = OMP_CLAUSE_DECL (c); + bool is_ref = omp_is_reference (d); + tree t = d, a, dest; + if (is_ref) + t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t); + tree type = TREE_TYPE (t); + if (POINTER_TYPE_P (type)) + type = sizetype; + dest = unshare_expr (t); + tree v = create_tmp_var (TREE_TYPE (t), NULL); + expand_omp_build_assign (&gsif, v, t); + if (itercnt == NULL_TREE) + { + itercnt = startvar; + tree n1 = fd->loop.n1; + if (POINTER_TYPE_P (TREE_TYPE (itercnt))) + { + itercnt + = fold_convert (signed_type_for (TREE_TYPE (itercnt)), + itercnt); + n1 = fold_convert (TREE_TYPE (itercnt), n1); + } + itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt), + itercnt, n1); + itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt), + itercnt, fd->loop.step); + itercnt = force_gimple_operand_gsi (&gsi, itercnt, true, + NULL_TREE, false, + GSI_CONTINUE_LINKING); + } + a = fold_build2 (MULT_EXPR, type, + fold_convert (type, itercnt), + fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c))); + t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR + : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a); + t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, + false, GSI_CONTINUE_LINKING); + assign_stmt = gimple_build_assign (dest, t); + gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); + } + if (fd->collapse > 1) + expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar); + + if (fd->ordered) + { + /* Until now, counts array contained number of iterations or + variable containing it for ith loop. From now on, we need + those counts only for collapsed loops, and only for the 2nd + till the last collapsed one. Move those one element earlier, + we'll use counts[fd->collapse - 1] for the first source/sink + iteration counter and so on and counts[fd->ordered] + as the array holding the current counter values for + depend(source). */ + if (fd->collapse > 1) + memmove (counts, counts + 1, (fd->collapse - 1) * sizeof (counts[0])); + if (broken_loop) + { + int i; + for (i = fd->collapse; i < fd->ordered; i++) + { + tree type = TREE_TYPE (fd->loops[i].v); + tree this_cond + = fold_build2 (fd->loops[i].cond_code, boolean_type_node, + fold_convert (type, fd->loops[i].n1), + fold_convert (type, fd->loops[i].n2)); + if (!integer_onep (this_cond)) + break; + } + if (i < fd->ordered) + { + cont_bb + = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb); + add_bb_to_loop (cont_bb, l1_bb->loop_father); + gimple_stmt_iterator gsi = gsi_after_labels (cont_bb); + gimple *g = gimple_build_omp_continue (fd->loop.v, fd->loop.v); + gsi_insert_before (&gsi, g, GSI_SAME_STMT); + make_edge (cont_bb, l3_bb, EDGE_FALLTHRU); + make_edge (cont_bb, l1_bb, 0); + l2_bb = create_empty_bb (cont_bb); + broken_loop = false; + } + } + expand_omp_ordered_source_sink (region, fd, counts, cont_bb); + cont_bb = expand_omp_for_ordered_loops (fd, counts, cont_bb, l1_bb, + ordered_lastprivate); + if (counts[fd->collapse - 1]) + { + gcc_assert (fd->collapse == 1); + gsi = gsi_last_bb (l0_bb); + expand_omp_build_assign (&gsi, counts[fd->collapse - 1], + istart0, true); + gsi = gsi_last_bb (cont_bb); + t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[fd->collapse - 1], + build_int_cst (fd->iter_type, 1)); + expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t); + tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered], + size_zero_node, NULL_TREE, NULL_TREE); + expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]); + t = counts[fd->collapse - 1]; + } + else if (fd->collapse > 1) + t = fd->loop.v; + else + { + t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v), + fd->loops[0].v, fd->loops[0].n1); + t = fold_convert (fd->iter_type, t); + } + gsi = gsi_last_bb (l0_bb); + tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered], + size_zero_node, NULL_TREE, NULL_TREE); + t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, + false, GSI_CONTINUE_LINKING); + expand_omp_build_assign (&gsi, aref, t, true); + } + + if (!broken_loop) + { + /* Code to control the increment and predicate for the sequential + loop goes in the CONT_BB. */ + gsi = gsi_last_bb (cont_bb); + gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi)); + gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE); + vmain = gimple_omp_continue_control_use (cont_stmt); + vback = gimple_omp_continue_control_def (cont_stmt); + + if (!gimple_omp_for_combined_p (fd->for_stmt)) + { + if (POINTER_TYPE_P (type)) + t = fold_build_pointer_plus (vmain, fd->loop.step); + else + t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step); + t = force_gimple_operand_gsi (&gsi, t, + DECL_P (vback) + && TREE_ADDRESSABLE (vback), + NULL_TREE, true, GSI_SAME_STMT); + assign_stmt = gimple_build_assign (vback, t); + gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); + + if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE) + { + if (fd->collapse > 1) + t = fd->loop.v; + else + { + t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v), + fd->loops[0].v, fd->loops[0].n1); + t = fold_convert (fd->iter_type, t); + } + tree aref = build4 (ARRAY_REF, fd->iter_type, + counts[fd->ordered], size_zero_node, + NULL_TREE, NULL_TREE); + t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, + true, GSI_SAME_STMT); + expand_omp_build_assign (&gsi, aref, t); + } + + t = build2 (fd->loop.cond_code, boolean_type_node, + DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback, + iend); + gcond *cond_stmt = gimple_build_cond_empty (t); + gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT); + } + + /* Remove GIMPLE_OMP_CONTINUE. */ + gsi_remove (&gsi, true); + + if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt)) + collapse_bb = extract_omp_for_update_vars (fd, cont_bb, l1_bb); + + /* Emit code to get the next parallel iteration in L2_BB. */ + gsi = gsi_start_bb (l2_bb); + + t = build_call_expr (builtin_decl_explicit (next_fn), 2, + build_fold_addr_expr (istart0), + build_fold_addr_expr (iend0)); + t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, + false, GSI_CONTINUE_LINKING); + if (TREE_TYPE (t) != boolean_type_node) + t = fold_build2 (NE_EXPR, boolean_type_node, + t, build_int_cst (TREE_TYPE (t), 0)); + gcond *cond_stmt = gimple_build_cond_empty (t); + gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING); + } + + /* Add the loop cleanup function. */ + gsi = gsi_last_bb (exit_bb); + if (gimple_omp_return_nowait_p (gsi_stmt (gsi))) + t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT); + else if (gimple_omp_return_lhs (gsi_stmt (gsi))) + t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL); + else + t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END); + gcall *call_stmt = gimple_build_call (t, 0); + if (gimple_omp_return_lhs (gsi_stmt (gsi))) + gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi))); + gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT); + if (fd->ordered) + { + tree arr = counts[fd->ordered]; + tree clobber = build_constructor (TREE_TYPE (arr), NULL); + TREE_THIS_VOLATILE (clobber) = 1; + gsi_insert_after (&gsi, gimple_build_assign (arr, clobber), + GSI_SAME_STMT); + } + gsi_remove (&gsi, true); + + /* Connect the new blocks. */ + find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE; + find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE; + + if (!broken_loop) + { + gimple_seq phis; + + e = find_edge (cont_bb, l3_bb); + ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE); + + phis = phi_nodes (l3_bb); + for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple *phi = gsi_stmt (gsi); + SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne), + PHI_ARG_DEF_FROM_EDGE (phi, e)); + } + remove_edge (e); + + make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE); + e = find_edge (cont_bb, l1_bb); + if (e == NULL) + { + e = BRANCH_EDGE (cont_bb); + gcc_assert (single_succ (e->dest) == l1_bb); + } + if (gimple_omp_for_combined_p (fd->for_stmt)) + { + remove_edge (e); + e = NULL; + } + else if (fd->collapse > 1) + { + remove_edge (e); + e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE); + } + else + e->flags = EDGE_TRUE_VALUE; + if (e) + { + e->probability = REG_BR_PROB_BASE * 7 / 8; + find_edge (cont_bb, l2_bb)->probability = REG_BR_PROB_BASE / 8; + } + else + { + e = find_edge (cont_bb, l2_bb); + e->flags = EDGE_FALLTHRU; + } + make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE); + + if (gimple_in_ssa_p (cfun)) + { + /* Add phis to the outer loop that connect to the phis in the inner, + original loop, and move the loop entry value of the inner phi to + the loop entry value of the outer phi. */ + gphi_iterator psi; + for (psi = gsi_start_phis (l3_bb); !gsi_end_p (psi); gsi_next (&psi)) + { + source_location locus; + gphi *nphi; + gphi *exit_phi = psi.phi (); + + edge l2_to_l3 = find_edge (l2_bb, l3_bb); + tree exit_res = PHI_ARG_DEF_FROM_EDGE (exit_phi, l2_to_l3); + + basic_block latch = BRANCH_EDGE (cont_bb)->dest; + edge latch_to_l1 = find_edge (latch, l1_bb); + gphi *inner_phi + = find_phi_with_arg_on_edge (exit_res, latch_to_l1); + + tree t = gimple_phi_result (exit_phi); + tree new_res = copy_ssa_name (t, NULL); + nphi = create_phi_node (new_res, l0_bb); + + edge l0_to_l1 = find_edge (l0_bb, l1_bb); + t = PHI_ARG_DEF_FROM_EDGE (inner_phi, l0_to_l1); + locus = gimple_phi_arg_location_from_edge (inner_phi, l0_to_l1); + edge entry_to_l0 = find_edge (entry_bb, l0_bb); + add_phi_arg (nphi, t, entry_to_l0, locus); + + edge l2_to_l0 = find_edge (l2_bb, l0_bb); + add_phi_arg (nphi, exit_res, l2_to_l0, UNKNOWN_LOCATION); + + add_phi_arg (inner_phi, new_res, l0_to_l1, UNKNOWN_LOCATION); + }; + } + + set_immediate_dominator (CDI_DOMINATORS, l2_bb, + recompute_dominator (CDI_DOMINATORS, l2_bb)); + set_immediate_dominator (CDI_DOMINATORS, l3_bb, + recompute_dominator (CDI_DOMINATORS, l3_bb)); + set_immediate_dominator (CDI_DOMINATORS, l0_bb, + recompute_dominator (CDI_DOMINATORS, l0_bb)); + set_immediate_dominator (CDI_DOMINATORS, l1_bb, + recompute_dominator (CDI_DOMINATORS, l1_bb)); + + /* We enter expand_omp_for_generic with a loop. This original loop may + have its own loop struct, or it may be part of an outer loop struct + (which may be the fake loop). */ + struct loop *outer_loop = entry_bb->loop_father; + bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop; + + add_bb_to_loop (l2_bb, outer_loop); + + /* We've added a new loop around the original loop. Allocate the + corresponding loop struct. */ + struct loop *new_loop = alloc_loop (); + new_loop->header = l0_bb; + new_loop->latch = l2_bb; + add_loop (new_loop, outer_loop); + + /* Allocate a loop structure for the original loop unless we already + had one. */ + if (!orig_loop_has_loop_struct + && !gimple_omp_for_combined_p (fd->for_stmt)) + { + struct loop *orig_loop = alloc_loop (); + orig_loop->header = l1_bb; + /* The loop may have multiple latches. */ + add_loop (orig_loop, new_loop); + } + } +} + +/* A subroutine of expand_omp_for. Generate code for a parallel + loop with static schedule and no specified chunk size. Given + parameters: + + for (V = N1; V cond N2; V += STEP) BODY; + + where COND is "<" or ">", we generate pseudocode + + if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2; + if (cond is <) + adj = STEP - 1; + else + adj = STEP + 1; + if ((__typeof (V)) -1 > 0 && cond is >) + n = -(adj + N2 - N1) / -STEP; + else + n = (adj + N2 - N1) / STEP; + q = n / nthreads; + tt = n % nthreads; + if (threadid < tt) goto L3; else goto L4; + L3: + tt = 0; + q = q + 1; + L4: + s0 = q * threadid + tt; + e0 = s0 + q; + V = s0 * STEP + N1; + if (s0 >= e0) goto L2; else goto L0; + L0: + e = e0 * STEP + N1; + L1: + BODY; + V += STEP; + if (V cond e) goto L1; + L2: +*/ + +static void +expand_omp_for_static_nochunk (struct omp_region *region, + struct omp_for_data *fd, + gimple *inner_stmt) +{ + tree n, q, s0, e0, e, t, tt, nthreads, threadid; + tree type, itype, vmain, vback; + basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb; + basic_block body_bb, cont_bb, collapse_bb = NULL; + basic_block fin_bb; + gimple_stmt_iterator gsi; + edge ep; + bool broken_loop = region->cont == NULL; + tree *counts = NULL; + tree n1, n2, step; + + itype = type = TREE_TYPE (fd->loop.v); + if (POINTER_TYPE_P (type)) + itype = signed_type_for (type); + + entry_bb = region->entry; + cont_bb = region->cont; + gcc_assert (EDGE_COUNT (entry_bb->succs) == 2); + fin_bb = BRANCH_EDGE (entry_bb)->dest; + gcc_assert (broken_loop + || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest)); + seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb)); + body_bb = single_succ (seq_start_bb); + if (!broken_loop) + { + gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb + || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb); + gcc_assert (EDGE_COUNT (cont_bb->succs) == 2); + } + exit_bb = region->exit; + + /* Iteration space partitioning goes in ENTRY_BB. */ + gsi = gsi_last_bb (entry_bb); + gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); + + if (fd->collapse > 1) + { + int first_zero_iter = -1, dummy = -1; + basic_block l2_dom_bb = NULL, dummy_bb = NULL; + + counts = XALLOCAVEC (tree, fd->collapse); + expand_omp_for_init_counts (fd, &gsi, entry_bb, counts, + fin_bb, first_zero_iter, + dummy_bb, dummy, l2_dom_bb); + t = NULL_TREE; + } + else if (gimple_omp_for_combined_into_p (fd->for_stmt)) + t = integer_one_node; + else + t = fold_binary (fd->loop.cond_code, boolean_type_node, + fold_convert (type, fd->loop.n1), + fold_convert (type, fd->loop.n2)); + if (fd->collapse == 1 + && TYPE_UNSIGNED (type) + && (t == NULL_TREE || !integer_onep (t))) + { + n1 = fold_convert (type, unshare_expr (fd->loop.n1)); + n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE, + true, GSI_SAME_STMT); + n2 = fold_convert (type, unshare_expr (fd->loop.n2)); + n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE, + true, GSI_SAME_STMT); + gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2, + NULL_TREE, NULL_TREE); + gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT); + if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), + expand_omp_regimplify_p, NULL, NULL) + || walk_tree (gimple_cond_rhs_ptr (cond_stmt), + expand_omp_regimplify_p, NULL, NULL)) + { + gsi = gsi_for_stmt (cond_stmt); + gimple_regimplify_operands (cond_stmt, &gsi); + } + ep = split_block (entry_bb, cond_stmt); + ep->flags = EDGE_TRUE_VALUE; + entry_bb = ep->dest; + ep->probability = REG_BR_PROB_BASE - (REG_BR_PROB_BASE / 2000 - 1); + ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE); + ep->probability = REG_BR_PROB_BASE / 2000 - 1; + if (gimple_in_ssa_p (cfun)) + { + int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx; + for (gphi_iterator gpi = gsi_start_phis (fin_bb); + !gsi_end_p (gpi); gsi_next (&gpi)) + { + gphi *phi = gpi.phi (); + add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx), + ep, UNKNOWN_LOCATION); + } + } + gsi = gsi_last_bb (entry_bb); + } + + switch (gimple_omp_for_kind (fd->for_stmt)) + { + case GF_OMP_FOR_KIND_FOR: + nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS); + threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM); + break; + case GF_OMP_FOR_KIND_DISTRIBUTE: + nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS); + threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM); + break; + default: + gcc_unreachable (); + } + nthreads = build_call_expr (nthreads, 0); + nthreads = fold_convert (itype, nthreads); + nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE, + true, GSI_SAME_STMT); + threadid = build_call_expr (threadid, 0); + threadid = fold_convert (itype, threadid); + threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE, + true, GSI_SAME_STMT); + + n1 = fd->loop.n1; + n2 = fd->loop.n2; + step = fd->loop.step; + if (gimple_omp_for_combined_into_p (fd->for_stmt)) + { + tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), + OMP_CLAUSE__LOOPTEMP_); + gcc_assert (innerc); + n1 = OMP_CLAUSE_DECL (innerc); + innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), + OMP_CLAUSE__LOOPTEMP_); + gcc_assert (innerc); + n2 = OMP_CLAUSE_DECL (innerc); + } + n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1), + true, NULL_TREE, true, GSI_SAME_STMT); + n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2), + true, NULL_TREE, true, GSI_SAME_STMT); + step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step), + true, NULL_TREE, true, GSI_SAME_STMT); + + t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1)); + t = fold_build2 (PLUS_EXPR, itype, step, t); + t = fold_build2 (PLUS_EXPR, itype, t, n2); + t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1)); + if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR) + t = fold_build2 (TRUNC_DIV_EXPR, itype, + fold_build1 (NEGATE_EXPR, itype, t), + fold_build1 (NEGATE_EXPR, itype, step)); + else + t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step); + t = fold_convert (itype, t); + n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT); + + q = create_tmp_reg (itype, "q"); + t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads); + t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT); + gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT); + + tt = create_tmp_reg (itype, "tt"); + t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads); + t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT); + gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT); + + t = build2 (LT_EXPR, boolean_type_node, threadid, tt); + gcond *cond_stmt = gimple_build_cond_empty (t); + gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT); + + second_bb = split_block (entry_bb, cond_stmt)->dest; + gsi = gsi_last_bb (second_bb); + gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); + + gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)), + GSI_SAME_STMT); + gassign *assign_stmt + = gimple_build_assign (q, PLUS_EXPR, q, build_int_cst (itype, 1)); + gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); + + third_bb = split_block (second_bb, assign_stmt)->dest; + gsi = gsi_last_bb (third_bb); + gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); + + t = build2 (MULT_EXPR, itype, q, threadid); + t = build2 (PLUS_EXPR, itype, t, tt); + s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT); + + t = fold_build2 (PLUS_EXPR, itype, s0, q); + e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT); + + t = build2 (GE_EXPR, boolean_type_node, s0, e0); + gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT); + + /* Remove the GIMPLE_OMP_FOR statement. */ + gsi_remove (&gsi, true); + + /* Setup code for sequential iteration goes in SEQ_START_BB. */ + gsi = gsi_start_bb (seq_start_bb); + + tree startvar = fd->loop.v; + tree endvar = NULL_TREE; + + if (gimple_omp_for_combined_p (fd->for_stmt)) + { + tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL + ? gimple_omp_parallel_clauses (inner_stmt) + : gimple_omp_for_clauses (inner_stmt); + tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_); + gcc_assert (innerc); + startvar = OMP_CLAUSE_DECL (innerc); + innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), + OMP_CLAUSE__LOOPTEMP_); + gcc_assert (innerc); + endvar = OMP_CLAUSE_DECL (innerc); + if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST + && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE) + { + int i; + for (i = 1; i < fd->collapse; i++) + { + innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), + OMP_CLAUSE__LOOPTEMP_); + gcc_assert (innerc); + } + innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), + OMP_CLAUSE__LOOPTEMP_); + if (innerc) + { + /* If needed (distribute parallel for with lastprivate), + propagate down the total number of iterations. */ + tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)), + fd->loop.n2); + t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false, + GSI_CONTINUE_LINKING); + assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t); + gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); + } + } + } + t = fold_convert (itype, s0); + t = fold_build2 (MULT_EXPR, itype, t, step); + if (POINTER_TYPE_P (type)) + t = fold_build_pointer_plus (n1, t); + else + t = fold_build2 (PLUS_EXPR, type, t, n1); + t = fold_convert (TREE_TYPE (startvar), t); + t = force_gimple_operand_gsi (&gsi, t, + DECL_P (startvar) + && TREE_ADDRESSABLE (startvar), + NULL_TREE, false, GSI_CONTINUE_LINKING); + assign_stmt = gimple_build_assign (startvar, t); + gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); + + t = fold_convert (itype, e0); + t = fold_build2 (MULT_EXPR, itype, t, step); + if (POINTER_TYPE_P (type)) + t = fold_build_pointer_plus (n1, t); + else + t = fold_build2 (PLUS_EXPR, type, t, n1); + t = fold_convert (TREE_TYPE (startvar), t); + e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, + false, GSI_CONTINUE_LINKING); + if (endvar) + { + assign_stmt = gimple_build_assign (endvar, e); + gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); + if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e))) + assign_stmt = gimple_build_assign (fd->loop.v, e); + else + assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e); + gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); + } + /* Handle linear clause adjustments. */ + tree itercnt = NULL_TREE; + if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR) + for (tree c = gimple_omp_for_clauses (fd->for_stmt); + c; c = OMP_CLAUSE_CHAIN (c)) + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR + && !OMP_CLAUSE_LINEAR_NO_COPYIN (c)) + { + tree d = OMP_CLAUSE_DECL (c); + bool is_ref = omp_is_reference (d); + tree t = d, a, dest; + if (is_ref) + t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t); + if (itercnt == NULL_TREE) + { + if (gimple_omp_for_combined_into_p (fd->for_stmt)) + { + itercnt = fold_build2 (MINUS_EXPR, itype, + fold_convert (itype, n1), + fold_convert (itype, fd->loop.n1)); + itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step); + itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0); + itercnt = force_gimple_operand_gsi (&gsi, itercnt, true, + NULL_TREE, false, + GSI_CONTINUE_LINKING); + } + else + itercnt = s0; + } + tree type = TREE_TYPE (t); + if (POINTER_TYPE_P (type)) + type = sizetype; + a = fold_build2 (MULT_EXPR, type, + fold_convert (type, itercnt), + fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c))); + dest = unshare_expr (t); + t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR + : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a); + t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, + false, GSI_CONTINUE_LINKING); + assign_stmt = gimple_build_assign (dest, t); + gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); + } + if (fd->collapse > 1) + expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar); + + if (!broken_loop) + { + /* The code controlling the sequential loop replaces the + GIMPLE_OMP_CONTINUE. */ + gsi = gsi_last_bb (cont_bb); + gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi)); + gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE); + vmain = gimple_omp_continue_control_use (cont_stmt); + vback = gimple_omp_continue_control_def (cont_stmt); + + if (!gimple_omp_for_combined_p (fd->for_stmt)) + { + if (POINTER_TYPE_P (type)) + t = fold_build_pointer_plus (vmain, step); + else + t = fold_build2 (PLUS_EXPR, type, vmain, step); + t = force_gimple_operand_gsi (&gsi, t, + DECL_P (vback) + && TREE_ADDRESSABLE (vback), + NULL_TREE, true, GSI_SAME_STMT); + assign_stmt = gimple_build_assign (vback, t); + gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); + + t = build2 (fd->loop.cond_code, boolean_type_node, + DECL_P (vback) && TREE_ADDRESSABLE (vback) + ? t : vback, e); + gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT); + } + + /* Remove the GIMPLE_OMP_CONTINUE statement. */ + gsi_remove (&gsi, true); + + if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt)) + collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb); + } + + /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */ + gsi = gsi_last_bb (exit_bb); + if (!gimple_omp_return_nowait_p (gsi_stmt (gsi))) + { + t = gimple_omp_return_lhs (gsi_stmt (gsi)); + gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT); + } + gsi_remove (&gsi, true); + + /* Connect all the blocks. */ + ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE); + ep->probability = REG_BR_PROB_BASE / 4 * 3; + ep = find_edge (entry_bb, second_bb); + ep->flags = EDGE_TRUE_VALUE; + ep->probability = REG_BR_PROB_BASE / 4; + find_edge (third_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE; + find_edge (third_bb, fin_bb)->flags = EDGE_TRUE_VALUE; + + if (!broken_loop) + { + ep = find_edge (cont_bb, body_bb); + if (ep == NULL) + { + ep = BRANCH_EDGE (cont_bb); + gcc_assert (single_succ (ep->dest) == body_bb); + } + if (gimple_omp_for_combined_p (fd->for_stmt)) + { + remove_edge (ep); + ep = NULL; + } + else if (fd->collapse > 1) + { + remove_edge (ep); + ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE); + } + else + ep->flags = EDGE_TRUE_VALUE; + find_edge (cont_bb, fin_bb)->flags + = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU; + } + + set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb); + set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb); + set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, third_bb); + + set_immediate_dominator (CDI_DOMINATORS, body_bb, + recompute_dominator (CDI_DOMINATORS, body_bb)); + set_immediate_dominator (CDI_DOMINATORS, fin_bb, + recompute_dominator (CDI_DOMINATORS, fin_bb)); + + struct loop *loop = body_bb->loop_father; + if (loop != entry_bb->loop_father) + { + gcc_assert (broken_loop || loop->header == body_bb); + gcc_assert (broken_loop + || loop->latch == region->cont + || single_pred (loop->latch) == region->cont); + return; + } + + if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt)) + { + loop = alloc_loop (); + loop->header = body_bb; + if (collapse_bb == NULL) + loop->latch = cont_bb; + add_loop (loop, body_bb->loop_father); + } +} + +/* Return phi in E->DEST with ARG on edge E. */ + +static gphi * +find_phi_with_arg_on_edge (tree arg, edge e) +{ + basic_block bb = e->dest; + + for (gphi_iterator gpi = gsi_start_phis (bb); + !gsi_end_p (gpi); + gsi_next (&gpi)) + { + gphi *phi = gpi.phi (); + if (PHI_ARG_DEF_FROM_EDGE (phi, e) == arg) + return phi; + } + + return NULL; +} + +/* A subroutine of expand_omp_for. Generate code for a parallel + loop with static schedule and a specified chunk size. Given + parameters: + + for (V = N1; V cond N2; V += STEP) BODY; + + where COND is "<" or ">", we generate pseudocode + + if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2; + if (cond is <) + adj = STEP - 1; + else + adj = STEP + 1; + if ((__typeof (V)) -1 > 0 && cond is >) + n = -(adj + N2 - N1) / -STEP; + else + n = (adj + N2 - N1) / STEP; + trip = 0; + V = threadid * CHUNK * STEP + N1; -- this extra definition of V is + here so that V is defined + if the loop is not entered + L0: + s0 = (trip * nthreads + threadid) * CHUNK; + e0 = min(s0 + CHUNK, n); + if (s0 < n) goto L1; else goto L4; + L1: + V = s0 * STEP + N1; + e = e0 * STEP + N1; + L2: + BODY; + V += STEP; + if (V cond e) goto L2; else goto L3; + L3: + trip += 1; + goto L0; + L4: +*/ + +static void +expand_omp_for_static_chunk (struct omp_region *region, + struct omp_for_data *fd, gimple *inner_stmt) +{ + tree n, s0, e0, e, t; + tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid; + tree type, itype, vmain, vback, vextra; + basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb; + basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb; + gimple_stmt_iterator gsi; + edge se; + bool broken_loop = region->cont == NULL; + tree *counts = NULL; + tree n1, n2, step; + + itype = type = TREE_TYPE (fd->loop.v); + if (POINTER_TYPE_P (type)) + itype = signed_type_for (type); + + entry_bb = region->entry; + se = split_block (entry_bb, last_stmt (entry_bb)); + entry_bb = se->src; + iter_part_bb = se->dest; + cont_bb = region->cont; + gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2); + fin_bb = BRANCH_EDGE (iter_part_bb)->dest; + gcc_assert (broken_loop + || fin_bb == FALLTHRU_EDGE (cont_bb)->dest); + seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb)); + body_bb = single_succ (seq_start_bb); + if (!broken_loop) + { + gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb + || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb); + gcc_assert (EDGE_COUNT (cont_bb->succs) == 2); + trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb)); + } + exit_bb = region->exit; + + /* Trip and adjustment setup goes in ENTRY_BB. */ + gsi = gsi_last_bb (entry_bb); + gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); + + if (fd->collapse > 1) + { + int first_zero_iter = -1, dummy = -1; + basic_block l2_dom_bb = NULL, dummy_bb = NULL; + + counts = XALLOCAVEC (tree, fd->collapse); + expand_omp_for_init_counts (fd, &gsi, entry_bb, counts, + fin_bb, first_zero_iter, + dummy_bb, dummy, l2_dom_bb); + t = NULL_TREE; + } + else if (gimple_omp_for_combined_into_p (fd->for_stmt)) + t = integer_one_node; + else + t = fold_binary (fd->loop.cond_code, boolean_type_node, + fold_convert (type, fd->loop.n1), + fold_convert (type, fd->loop.n2)); + if (fd->collapse == 1 + && TYPE_UNSIGNED (type) + && (t == NULL_TREE || !integer_onep (t))) + { + n1 = fold_convert (type, unshare_expr (fd->loop.n1)); + n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE, + true, GSI_SAME_STMT); + n2 = fold_convert (type, unshare_expr (fd->loop.n2)); + n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE, + true, GSI_SAME_STMT); + gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2, + NULL_TREE, NULL_TREE); + gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT); + if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), + expand_omp_regimplify_p, NULL, NULL) + || walk_tree (gimple_cond_rhs_ptr (cond_stmt), + expand_omp_regimplify_p, NULL, NULL)) + { + gsi = gsi_for_stmt (cond_stmt); + gimple_regimplify_operands (cond_stmt, &gsi); + } + se = split_block (entry_bb, cond_stmt); + se->flags = EDGE_TRUE_VALUE; + entry_bb = se->dest; + se->probability = REG_BR_PROB_BASE - (REG_BR_PROB_BASE / 2000 - 1); + se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE); + se->probability = REG_BR_PROB_BASE / 2000 - 1; + if (gimple_in_ssa_p (cfun)) + { + int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx; + for (gphi_iterator gpi = gsi_start_phis (fin_bb); + !gsi_end_p (gpi); gsi_next (&gpi)) + { + gphi *phi = gpi.phi (); + add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx), + se, UNKNOWN_LOCATION); + } + } + gsi = gsi_last_bb (entry_bb); + } + + switch (gimple_omp_for_kind (fd->for_stmt)) + { + case GF_OMP_FOR_KIND_FOR: + nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS); + threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM); + break; + case GF_OMP_FOR_KIND_DISTRIBUTE: + nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS); + threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM); + break; + default: + gcc_unreachable (); + } + nthreads = build_call_expr (nthreads, 0); + nthreads = fold_convert (itype, nthreads); + nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE, + true, GSI_SAME_STMT); + threadid = build_call_expr (threadid, 0); + threadid = fold_convert (itype, threadid); + threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE, + true, GSI_SAME_STMT); + + n1 = fd->loop.n1; + n2 = fd->loop.n2; + step = fd->loop.step; + if (gimple_omp_for_combined_into_p (fd->for_stmt)) + { + tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), + OMP_CLAUSE__LOOPTEMP_); + gcc_assert (innerc); + n1 = OMP_CLAUSE_DECL (innerc); + innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), + OMP_CLAUSE__LOOPTEMP_); + gcc_assert (innerc); + n2 = OMP_CLAUSE_DECL (innerc); + } + n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1), + true, NULL_TREE, true, GSI_SAME_STMT); + n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2), + true, NULL_TREE, true, GSI_SAME_STMT); + step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step), + true, NULL_TREE, true, GSI_SAME_STMT); + tree chunk_size = fold_convert (itype, fd->chunk_size); + chunk_size = omp_adjust_chunk_size (chunk_size, fd->simd_schedule); + chunk_size + = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true, + GSI_SAME_STMT); + + t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1)); + t = fold_build2 (PLUS_EXPR, itype, step, t); + t = fold_build2 (PLUS_EXPR, itype, t, n2); + t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1)); + if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR) + t = fold_build2 (TRUNC_DIV_EXPR, itype, + fold_build1 (NEGATE_EXPR, itype, t), + fold_build1 (NEGATE_EXPR, itype, step)); + else + t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step); + t = fold_convert (itype, t); + n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, + true, GSI_SAME_STMT); + + trip_var = create_tmp_reg (itype, ".trip"); + if (gimple_in_ssa_p (cfun)) + { + trip_init = make_ssa_name (trip_var); + trip_main = make_ssa_name (trip_var); + trip_back = make_ssa_name (trip_var); + } + else + { + trip_init = trip_var; + trip_main = trip_var; + trip_back = trip_var; + } + + gassign *assign_stmt + = gimple_build_assign (trip_init, build_int_cst (itype, 0)); + gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); + + t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size); + t = fold_build2 (MULT_EXPR, itype, t, step); + if (POINTER_TYPE_P (type)) + t = fold_build_pointer_plus (n1, t); + else + t = fold_build2 (PLUS_EXPR, type, t, n1); + vextra = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, + true, GSI_SAME_STMT); + + /* Remove the GIMPLE_OMP_FOR. */ + gsi_remove (&gsi, true); + + gimple_stmt_iterator gsif = gsi; + + /* Iteration space partitioning goes in ITER_PART_BB. */ + gsi = gsi_last_bb (iter_part_bb); + + t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads); + t = fold_build2 (PLUS_EXPR, itype, t, threadid); + t = fold_build2 (MULT_EXPR, itype, t, chunk_size); + s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, + false, GSI_CONTINUE_LINKING); + + t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size); + t = fold_build2 (MIN_EXPR, itype, t, n); + e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, + false, GSI_CONTINUE_LINKING); + + t = build2 (LT_EXPR, boolean_type_node, s0, n); + gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_CONTINUE_LINKING); + + /* Setup code for sequential iteration goes in SEQ_START_BB. */ + gsi = gsi_start_bb (seq_start_bb); + + tree startvar = fd->loop.v; + tree endvar = NULL_TREE; + + if (gimple_omp_for_combined_p (fd->for_stmt)) + { + tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL + ? gimple_omp_parallel_clauses (inner_stmt) + : gimple_omp_for_clauses (inner_stmt); + tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_); + gcc_assert (innerc); + startvar = OMP_CLAUSE_DECL (innerc); + innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), + OMP_CLAUSE__LOOPTEMP_); + gcc_assert (innerc); + endvar = OMP_CLAUSE_DECL (innerc); + if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST + && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE) + { + int i; + for (i = 1; i < fd->collapse; i++) + { + innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), + OMP_CLAUSE__LOOPTEMP_); + gcc_assert (innerc); + } + innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), + OMP_CLAUSE__LOOPTEMP_); + if (innerc) + { + /* If needed (distribute parallel for with lastprivate), + propagate down the total number of iterations. */ + tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)), + fd->loop.n2); + t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false, + GSI_CONTINUE_LINKING); + assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t); + gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); + } + } + } + + t = fold_convert (itype, s0); + t = fold_build2 (MULT_EXPR, itype, t, step); + if (POINTER_TYPE_P (type)) + t = fold_build_pointer_plus (n1, t); + else + t = fold_build2 (PLUS_EXPR, type, t, n1); + t = fold_convert (TREE_TYPE (startvar), t); + t = force_gimple_operand_gsi (&gsi, t, + DECL_P (startvar) + && TREE_ADDRESSABLE (startvar), + NULL_TREE, false, GSI_CONTINUE_LINKING); + assign_stmt = gimple_build_assign (startvar, t); + gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); + + t = fold_convert (itype, e0); + t = fold_build2 (MULT_EXPR, itype, t, step); + if (POINTER_TYPE_P (type)) + t = fold_build_pointer_plus (n1, t); + else + t = fold_build2 (PLUS_EXPR, type, t, n1); + t = fold_convert (TREE_TYPE (startvar), t); + e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, + false, GSI_CONTINUE_LINKING); + if (endvar) + { + assign_stmt = gimple_build_assign (endvar, e); + gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); + if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e))) + assign_stmt = gimple_build_assign (fd->loop.v, e); + else + assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e); + gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); + } + /* Handle linear clause adjustments. */ + tree itercnt = NULL_TREE, itercntbias = NULL_TREE; + if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR) + for (tree c = gimple_omp_for_clauses (fd->for_stmt); + c; c = OMP_CLAUSE_CHAIN (c)) + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR + && !OMP_CLAUSE_LINEAR_NO_COPYIN (c)) + { + tree d = OMP_CLAUSE_DECL (c); + bool is_ref = omp_is_reference (d); + tree t = d, a, dest; + if (is_ref) + t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t); + tree type = TREE_TYPE (t); + if (POINTER_TYPE_P (type)) + type = sizetype; + dest = unshare_expr (t); + tree v = create_tmp_var (TREE_TYPE (t), NULL); + expand_omp_build_assign (&gsif, v, t); + if (itercnt == NULL_TREE) + { + if (gimple_omp_for_combined_into_p (fd->for_stmt)) + { + itercntbias + = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1), + fold_convert (itype, fd->loop.n1)); + itercntbias = fold_build2 (EXACT_DIV_EXPR, itype, + itercntbias, step); + itercntbias + = force_gimple_operand_gsi (&gsif, itercntbias, true, + NULL_TREE, true, + GSI_SAME_STMT); + itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0); + itercnt = force_gimple_operand_gsi (&gsi, itercnt, true, + NULL_TREE, false, + GSI_CONTINUE_LINKING); + } + else + itercnt = s0; + } + a = fold_build2 (MULT_EXPR, type, + fold_convert (type, itercnt), + fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c))); + t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR + : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a); + t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, + false, GSI_CONTINUE_LINKING); + assign_stmt = gimple_build_assign (dest, t); + gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); + } + if (fd->collapse > 1) + expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar); + + if (!broken_loop) + { + /* The code controlling the sequential loop goes in CONT_BB, + replacing the GIMPLE_OMP_CONTINUE. */ + gsi = gsi_last_bb (cont_bb); + gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi)); + vmain = gimple_omp_continue_control_use (cont_stmt); + vback = gimple_omp_continue_control_def (cont_stmt); + + if (!gimple_omp_for_combined_p (fd->for_stmt)) + { + if (POINTER_TYPE_P (type)) + t = fold_build_pointer_plus (vmain, step); + else + t = fold_build2 (PLUS_EXPR, type, vmain, step); + if (DECL_P (vback) && TREE_ADDRESSABLE (vback)) + t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, + true, GSI_SAME_STMT); + assign_stmt = gimple_build_assign (vback, t); + gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); + + if (tree_int_cst_equal (fd->chunk_size, integer_one_node)) + t = build2 (EQ_EXPR, boolean_type_node, + build_int_cst (itype, 0), + build_int_cst (itype, 1)); + else + t = build2 (fd->loop.cond_code, boolean_type_node, + DECL_P (vback) && TREE_ADDRESSABLE (vback) + ? t : vback, e); + gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT); + } + + /* Remove GIMPLE_OMP_CONTINUE. */ + gsi_remove (&gsi, true); + + if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt)) + collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb); + + /* Trip update code goes into TRIP_UPDATE_BB. */ + gsi = gsi_start_bb (trip_update_bb); + + t = build_int_cst (itype, 1); + t = build2 (PLUS_EXPR, itype, trip_main, t); + assign_stmt = gimple_build_assign (trip_back, t); + gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); + } + + /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */ + gsi = gsi_last_bb (exit_bb); + if (!gimple_omp_return_nowait_p (gsi_stmt (gsi))) + { + t = gimple_omp_return_lhs (gsi_stmt (gsi)); + gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT); + } + gsi_remove (&gsi, true); + + /* Connect the new blocks. */ + find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE; + find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE; + + if (!broken_loop) + { + se = find_edge (cont_bb, body_bb); + if (se == NULL) + { + se = BRANCH_EDGE (cont_bb); + gcc_assert (single_succ (se->dest) == body_bb); + } + if (gimple_omp_for_combined_p (fd->for_stmt)) + { + remove_edge (se); + se = NULL; + } + else if (fd->collapse > 1) + { + remove_edge (se); + se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE); + } + else + se->flags = EDGE_TRUE_VALUE; + find_edge (cont_bb, trip_update_bb)->flags + = se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU; + + redirect_edge_and_branch (single_succ_edge (trip_update_bb), iter_part_bb); + } + + if (gimple_in_ssa_p (cfun)) + { + gphi_iterator psi; + gphi *phi; + edge re, ene; + edge_var_map *vm; + size_t i; + + gcc_assert (fd->collapse == 1 && !broken_loop); + + /* When we redirect the edge from trip_update_bb to iter_part_bb, we + remove arguments of the phi nodes in fin_bb. We need to create + appropriate phi nodes in iter_part_bb instead. */ + se = find_edge (iter_part_bb, fin_bb); + re = single_succ_edge (trip_update_bb); + vec<edge_var_map> *head = redirect_edge_var_map_vector (re); + ene = single_succ_edge (entry_bb); + + psi = gsi_start_phis (fin_bb); + for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm); + gsi_next (&psi), ++i) + { + gphi *nphi; + source_location locus; + + phi = psi.phi (); + t = gimple_phi_result (phi); + gcc_assert (t == redirect_edge_var_map_result (vm)); + + if (!single_pred_p (fin_bb)) + t = copy_ssa_name (t, phi); + + nphi = create_phi_node (t, iter_part_bb); + + t = PHI_ARG_DEF_FROM_EDGE (phi, se); + locus = gimple_phi_arg_location_from_edge (phi, se); + + /* A special case -- fd->loop.v is not yet computed in + iter_part_bb, we need to use vextra instead. */ + if (t == fd->loop.v) + t = vextra; + add_phi_arg (nphi, t, ene, locus); + locus = redirect_edge_var_map_location (vm); + tree back_arg = redirect_edge_var_map_def (vm); + add_phi_arg (nphi, back_arg, re, locus); + edge ce = find_edge (cont_bb, body_bb); + if (ce == NULL) + { + ce = BRANCH_EDGE (cont_bb); + gcc_assert (single_succ (ce->dest) == body_bb); + ce = single_succ_edge (ce->dest); + } + gphi *inner_loop_phi = find_phi_with_arg_on_edge (back_arg, ce); + gcc_assert (inner_loop_phi != NULL); + add_phi_arg (inner_loop_phi, gimple_phi_result (nphi), + find_edge (seq_start_bb, body_bb), locus); + + if (!single_pred_p (fin_bb)) + add_phi_arg (phi, gimple_phi_result (nphi), se, locus); + } + gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ())); + redirect_edge_var_map_clear (re); + if (single_pred_p (fin_bb)) + while (1) + { + psi = gsi_start_phis (fin_bb); + if (gsi_end_p (psi)) + break; + remove_phi_node (&psi, false); + } + + /* Make phi node for trip. */ + phi = create_phi_node (trip_main, iter_part_bb); + add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb), + UNKNOWN_LOCATION); + add_phi_arg (phi, trip_init, single_succ_edge (entry_bb), + UNKNOWN_LOCATION); + } + + if (!broken_loop) + set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb); + set_immediate_dominator (CDI_DOMINATORS, iter_part_bb, + recompute_dominator (CDI_DOMINATORS, iter_part_bb)); + set_immediate_dominator (CDI_DOMINATORS, fin_bb, + recompute_dominator (CDI_DOMINATORS, fin_bb)); + set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, + recompute_dominator (CDI_DOMINATORS, seq_start_bb)); + set_immediate_dominator (CDI_DOMINATORS, body_bb, + recompute_dominator (CDI_DOMINATORS, body_bb)); + + if (!broken_loop) + { + struct loop *loop = body_bb->loop_father; + struct loop *trip_loop = alloc_loop (); + trip_loop->header = iter_part_bb; + trip_loop->latch = trip_update_bb; + add_loop (trip_loop, iter_part_bb->loop_father); + + if (loop != entry_bb->loop_father) + { + gcc_assert (loop->header == body_bb); + gcc_assert (loop->latch == region->cont + || single_pred (loop->latch) == region->cont); + trip_loop->inner = loop; + return; + } + + if (!gimple_omp_for_combined_p (fd->for_stmt)) + { + loop = alloc_loop (); + loop->header = body_bb; + if (collapse_bb == NULL) + loop->latch = cont_bb; + add_loop (loop, trip_loop); + } + } +} + +/* A subroutine of expand_omp_for. Generate code for _Cilk_for loop. + Given parameters: + for (V = N1; V cond N2; V += STEP) BODY; + + where COND is "<" or ">" or "!=", we generate pseudocode + + for (ind_var = low; ind_var < high; ind_var++) + { + V = n1 + (ind_var * STEP) + + <BODY> + } + + In the above pseudocode, low and high are function parameters of the + child function. In the function below, we are inserting a temp. + variable that will be making a call to two OMP functions that will not be + found in the body of _Cilk_for (since OMP_FOR cannot be mixed + with _Cilk_for). These functions are replaced with low and high + by the function that handles taskreg. */ + + +static void +expand_cilk_for (struct omp_region *region, struct omp_for_data *fd) +{ + bool broken_loop = region->cont == NULL; + basic_block entry_bb = region->entry; + basic_block cont_bb = region->cont; + + gcc_assert (EDGE_COUNT (entry_bb->succs) == 2); + gcc_assert (broken_loop + || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest); + basic_block l0_bb = FALLTHRU_EDGE (entry_bb)->dest; + basic_block l1_bb, l2_bb; + + if (!broken_loop) + { + gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb); + gcc_assert (EDGE_COUNT (cont_bb->succs) == 2); + l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest; + l2_bb = BRANCH_EDGE (entry_bb)->dest; + } + else + { + BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL; + l1_bb = split_edge (BRANCH_EDGE (entry_bb)); + l2_bb = single_succ (l1_bb); + } + basic_block exit_bb = region->exit; + basic_block l2_dom_bb = NULL; + + gimple_stmt_iterator gsi = gsi_last_bb (entry_bb); + + /* Below statements until the "tree high_val = ..." are pseudo statements + used to pass information to be used by expand_omp_taskreg. + low_val and high_val will be replaced by the __low and __high + parameter from the child function. + + The call_exprs part is a place-holder, it is mainly used + to distinctly identify to the top-level part that this is + where we should put low and high (reasoning given in header + comment). */ + + tree child_fndecl + = gimple_omp_parallel_child_fn ( + as_a <gomp_parallel *> (last_stmt (region->outer->entry))); + tree t, low_val = NULL_TREE, high_val = NULL_TREE; + for (t = DECL_ARGUMENTS (child_fndecl); t; t = TREE_CHAIN (t)) + { + if (!strcmp (IDENTIFIER_POINTER (DECL_NAME (t)), "__high")) + high_val = t; + else if (!strcmp (IDENTIFIER_POINTER (DECL_NAME (t)), "__low")) + low_val = t; + } + gcc_assert (low_val && high_val); + + tree type = TREE_TYPE (low_val); + tree ind_var = create_tmp_reg (type, "__cilk_ind_var"); + gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); + + /* Not needed in SSA form right now. */ + gcc_assert (!gimple_in_ssa_p (cfun)); + if (l2_dom_bb == NULL) + l2_dom_bb = l1_bb; + + tree n1 = low_val; + tree n2 = high_val; + + gimple *stmt = gimple_build_assign (ind_var, n1); + + /* Replace the GIMPLE_OMP_FOR statement. */ + gsi_replace (&gsi, stmt, true); + + if (!broken_loop) + { + /* Code to control the increment goes in the CONT_BB. */ + gsi = gsi_last_bb (cont_bb); + stmt = gsi_stmt (gsi); + gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE); + stmt = gimple_build_assign (ind_var, PLUS_EXPR, ind_var, + build_one_cst (type)); + + /* Replace GIMPLE_OMP_CONTINUE. */ + gsi_replace (&gsi, stmt, true); + } + + /* Emit the condition in L1_BB. */ + gsi = gsi_after_labels (l1_bb); + t = fold_build2 (MULT_EXPR, TREE_TYPE (fd->loop.step), + fold_convert (TREE_TYPE (fd->loop.step), ind_var), + fd->loop.step); + if (POINTER_TYPE_P (TREE_TYPE (fd->loop.n1))) + t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (fd->loop.n1), + fd->loop.n1, fold_convert (sizetype, t)); + else + t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loop.n1), + fd->loop.n1, fold_convert (TREE_TYPE (fd->loop.n1), t)); + t = fold_convert (TREE_TYPE (fd->loop.v), t); + expand_omp_build_assign (&gsi, fd->loop.v, t); + + /* The condition is always '<' since the runtime will fill in the low + and high values. */ + stmt = gimple_build_cond (LT_EXPR, ind_var, n2, NULL_TREE, NULL_TREE); + gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); + + /* Remove GIMPLE_OMP_RETURN. */ + gsi = gsi_last_bb (exit_bb); + gsi_remove (&gsi, true); + + /* Connect the new blocks. */ + remove_edge (FALLTHRU_EDGE (entry_bb)); + + edge e, ne; + if (!broken_loop) + { + remove_edge (BRANCH_EDGE (entry_bb)); + make_edge (entry_bb, l1_bb, EDGE_FALLTHRU); + + e = BRANCH_EDGE (l1_bb); + ne = FALLTHRU_EDGE (l1_bb); + e->flags = EDGE_TRUE_VALUE; + } + else + { + single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU; + + ne = single_succ_edge (l1_bb); + e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE); + + } + ne->flags = EDGE_FALSE_VALUE; + e->probability = REG_BR_PROB_BASE * 7 / 8; + ne->probability = REG_BR_PROB_BASE / 8; + + set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb); + set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb); + set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb); + + if (!broken_loop) + { + struct loop *loop = alloc_loop (); + loop->header = l1_bb; + loop->latch = cont_bb; + add_loop (loop, l1_bb->loop_father); + loop->safelen = INT_MAX; + } + + /* Pick the correct library function based on the precision of the + induction variable type. */ + tree lib_fun = NULL_TREE; + if (TYPE_PRECISION (type) == 32) + lib_fun = cilk_for_32_fndecl; + else if (TYPE_PRECISION (type) == 64) + lib_fun = cilk_for_64_fndecl; + else + gcc_unreachable (); + + gcc_assert (fd->sched_kind == OMP_CLAUSE_SCHEDULE_CILKFOR); + + /* WS_ARGS contains the library function flavor to call: + __libcilkrts_cilk_for_64 or __libcilkrts_cilk_for_32), and the + user-defined grain value. If the user does not define one, then zero + is passed in by the parser. */ + vec_alloc (region->ws_args, 2); + region->ws_args->quick_push (lib_fun); + region->ws_args->quick_push (fd->chunk_size); +} + +/* A subroutine of expand_omp_for. Generate code for a simd non-worksharing + loop. Given parameters: + + for (V = N1; V cond N2; V += STEP) BODY; + + where COND is "<" or ">", we generate pseudocode + + V = N1; + goto L1; + L0: + BODY; + V += STEP; + L1: + if (V cond N2) goto L0; else goto L2; + L2: + + For collapsed loops, given parameters: + collapse(3) + for (V1 = N11; V1 cond1 N12; V1 += STEP1) + for (V2 = N21; V2 cond2 N22; V2 += STEP2) + for (V3 = N31; V3 cond3 N32; V3 += STEP3) + BODY; + + we generate pseudocode + + if (cond3 is <) + adj = STEP3 - 1; + else + adj = STEP3 + 1; + count3 = (adj + N32 - N31) / STEP3; + if (cond2 is <) + adj = STEP2 - 1; + else + adj = STEP2 + 1; + count2 = (adj + N22 - N21) / STEP2; + if (cond1 is <) + adj = STEP1 - 1; + else + adj = STEP1 + 1; + count1 = (adj + N12 - N11) / STEP1; + count = count1 * count2 * count3; + V = 0; + V1 = N11; + V2 = N21; + V3 = N31; + goto L1; + L0: + BODY; + V += 1; + V3 += STEP3; + V2 += (V3 cond3 N32) ? 0 : STEP2; + V3 = (V3 cond3 N32) ? V3 : N31; + V1 += (V2 cond2 N22) ? 0 : STEP1; + V2 = (V2 cond2 N22) ? V2 : N21; + L1: + if (V < count) goto L0; else goto L2; + L2: + + */ + +static void +expand_omp_simd (struct omp_region *region, struct omp_for_data *fd) +{ + tree type, t; + basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb; + gimple_stmt_iterator gsi; + gimple *stmt; + gcond *cond_stmt; + bool broken_loop = region->cont == NULL; + edge e, ne; + tree *counts = NULL; + int i; + int safelen_int = INT_MAX; + tree safelen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), + OMP_CLAUSE_SAFELEN); + tree simduid = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), + OMP_CLAUSE__SIMDUID_); + tree n1, n2; + + if (safelen) + { + safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen); + if (TREE_CODE (safelen) != INTEGER_CST) + safelen_int = 0; + else if (tree_fits_uhwi_p (safelen) && tree_to_uhwi (safelen) < INT_MAX) + safelen_int = tree_to_uhwi (safelen); + if (safelen_int == 1) + safelen_int = 0; + } + type = TREE_TYPE (fd->loop.v); + entry_bb = region->entry; + cont_bb = region->cont; + gcc_assert (EDGE_COUNT (entry_bb->succs) == 2); + gcc_assert (broken_loop + || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest); + l0_bb = FALLTHRU_EDGE (entry_bb)->dest; + if (!broken_loop) + { + gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb); + gcc_assert (EDGE_COUNT (cont_bb->succs) == 2); + l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest; + l2_bb = BRANCH_EDGE (entry_bb)->dest; + } + else + { + BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL; + l1_bb = split_edge (BRANCH_EDGE (entry_bb)); + l2_bb = single_succ (l1_bb); + } + exit_bb = region->exit; + l2_dom_bb = NULL; + + gsi = gsi_last_bb (entry_bb); + + gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); + /* Not needed in SSA form right now. */ + gcc_assert (!gimple_in_ssa_p (cfun)); + if (fd->collapse > 1) + { + int first_zero_iter = -1, dummy = -1; + basic_block zero_iter_bb = l2_bb, dummy_bb = NULL; + + counts = XALLOCAVEC (tree, fd->collapse); + expand_omp_for_init_counts (fd, &gsi, entry_bb, counts, + zero_iter_bb, first_zero_iter, + dummy_bb, dummy, l2_dom_bb); + } + if (l2_dom_bb == NULL) + l2_dom_bb = l1_bb; + + n1 = fd->loop.n1; + n2 = fd->loop.n2; + if (gimple_omp_for_combined_into_p (fd->for_stmt)) + { + tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), + OMP_CLAUSE__LOOPTEMP_); + gcc_assert (innerc); + n1 = OMP_CLAUSE_DECL (innerc); + innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), + OMP_CLAUSE__LOOPTEMP_); + gcc_assert (innerc); + n2 = OMP_CLAUSE_DECL (innerc); + } + tree step = fd->loop.step; + + bool is_simt = (safelen_int > 1 + && omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), + OMP_CLAUSE__SIMT_)); + tree simt_lane = NULL_TREE, simt_maxlane = NULL_TREE; + if (is_simt) + { + cfun->curr_properties &= ~PROP_gimple_lomp_dev; + simt_lane = create_tmp_var (unsigned_type_node); + gimple *g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0); + gimple_call_set_lhs (g, simt_lane); + gsi_insert_before (&gsi, g, GSI_SAME_STMT); + tree offset = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, + fold_convert (TREE_TYPE (step), simt_lane)); + n1 = fold_convert (type, n1); + if (POINTER_TYPE_P (type)) + n1 = fold_build_pointer_plus (n1, offset); + else + n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, offset)); + + /* Collapsed loops not handled for SIMT yet: limit to one lane only. */ + if (fd->collapse > 1) + simt_maxlane = build_one_cst (unsigned_type_node); + else if (safelen_int < omp_max_simt_vf ()) + simt_maxlane = build_int_cst (unsigned_type_node, safelen_int); + tree vf + = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF, + unsigned_type_node, 0); + if (simt_maxlane) + vf = fold_build2 (MIN_EXPR, unsigned_type_node, vf, simt_maxlane); + vf = fold_convert (TREE_TYPE (step), vf); + step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf); + } + + expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1)); + if (fd->collapse > 1) + { + if (gimple_omp_for_combined_into_p (fd->for_stmt)) + { + gsi_prev (&gsi); + expand_omp_for_init_vars (fd, &gsi, counts, NULL, n1); + gsi_next (&gsi); + } + else + for (i = 0; i < fd->collapse; i++) + { + tree itype = TREE_TYPE (fd->loops[i].v); + if (POINTER_TYPE_P (itype)) + itype = signed_type_for (itype); + t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1); + expand_omp_build_assign (&gsi, fd->loops[i].v, t); + } + } + + /* Remove the GIMPLE_OMP_FOR statement. */ + gsi_remove (&gsi, true); + + if (!broken_loop) + { + /* Code to control the increment goes in the CONT_BB. */ + gsi = gsi_last_bb (cont_bb); + stmt = gsi_stmt (gsi); + gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE); + + if (POINTER_TYPE_P (type)) + t = fold_build_pointer_plus (fd->loop.v, step); + else + t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step); + expand_omp_build_assign (&gsi, fd->loop.v, t); + + if (fd->collapse > 1) + { + i = fd->collapse - 1; + if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))) + { + t = fold_convert (sizetype, fd->loops[i].step); + t = fold_build_pointer_plus (fd->loops[i].v, t); + } + else + { + t = fold_convert (TREE_TYPE (fd->loops[i].v), + fd->loops[i].step); + t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v), + fd->loops[i].v, t); + } + expand_omp_build_assign (&gsi, fd->loops[i].v, t); + + for (i = fd->collapse - 1; i > 0; i--) + { + tree itype = TREE_TYPE (fd->loops[i].v); + tree itype2 = TREE_TYPE (fd->loops[i - 1].v); + if (POINTER_TYPE_P (itype2)) + itype2 = signed_type_for (itype2); + t = build3 (COND_EXPR, itype2, + build2 (fd->loops[i].cond_code, boolean_type_node, + fd->loops[i].v, + fold_convert (itype, fd->loops[i].n2)), + build_int_cst (itype2, 0), + fold_convert (itype2, fd->loops[i - 1].step)); + if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i - 1].v))) + t = fold_build_pointer_plus (fd->loops[i - 1].v, t); + else + t = fold_build2 (PLUS_EXPR, itype2, fd->loops[i - 1].v, t); + expand_omp_build_assign (&gsi, fd->loops[i - 1].v, t); + + t = build3 (COND_EXPR, itype, + build2 (fd->loops[i].cond_code, boolean_type_node, + fd->loops[i].v, + fold_convert (itype, fd->loops[i].n2)), + fd->loops[i].v, + fold_convert (itype, fd->loops[i].n1)); + expand_omp_build_assign (&gsi, fd->loops[i].v, t); + } + } + + /* Remove GIMPLE_OMP_CONTINUE. */ + gsi_remove (&gsi, true); + } + + /* Emit the condition in L1_BB. */ + gsi = gsi_start_bb (l1_bb); + + t = fold_convert (type, n2); + t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, + false, GSI_CONTINUE_LINKING); + tree v = fd->loop.v; + if (DECL_P (v) && TREE_ADDRESSABLE (v)) + v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE, + false, GSI_CONTINUE_LINKING); + t = build2 (fd->loop.cond_code, boolean_type_node, v, t); + cond_stmt = gimple_build_cond_empty (t); + gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING); + if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p, + NULL, NULL) + || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p, + NULL, NULL)) + { + gsi = gsi_for_stmt (cond_stmt); + gimple_regimplify_operands (cond_stmt, &gsi); + } + + /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop. */ + if (is_simt) + { + gsi = gsi_start_bb (l2_bb); + step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), fd->loop.step, step); + if (POINTER_TYPE_P (type)) + t = fold_build_pointer_plus (fd->loop.v, step); + else + t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step); + expand_omp_build_assign (&gsi, fd->loop.v, t); + } + + /* Remove GIMPLE_OMP_RETURN. */ + gsi = gsi_last_bb (exit_bb); + gsi_remove (&gsi, true); + + /* Connect the new blocks. */ + remove_edge (FALLTHRU_EDGE (entry_bb)); + + if (!broken_loop) + { + remove_edge (BRANCH_EDGE (entry_bb)); + make_edge (entry_bb, l1_bb, EDGE_FALLTHRU); + + e = BRANCH_EDGE (l1_bb); + ne = FALLTHRU_EDGE (l1_bb); + e->flags = EDGE_TRUE_VALUE; + } + else + { + single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU; + + ne = single_succ_edge (l1_bb); + e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE); + + } + ne->flags = EDGE_FALSE_VALUE; + e->probability = REG_BR_PROB_BASE * 7 / 8; + ne->probability = REG_BR_PROB_BASE / 8; + + set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb); + set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb); + + if (simt_maxlane) + { + cond_stmt = gimple_build_cond (LT_EXPR, simt_lane, simt_maxlane, + NULL_TREE, NULL_TREE); + gsi = gsi_last_bb (entry_bb); + gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT); + make_edge (entry_bb, l2_bb, EDGE_FALSE_VALUE); + FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE; + FALLTHRU_EDGE (entry_bb)->probability = REG_BR_PROB_BASE * 7 / 8; + BRANCH_EDGE (entry_bb)->probability = REG_BR_PROB_BASE / 8; + l2_dom_bb = entry_bb; + } + set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb); + + if (!broken_loop) + { + struct loop *loop = alloc_loop (); + loop->header = l1_bb; + loop->latch = cont_bb; + add_loop (loop, l1_bb->loop_father); + loop->safelen = safelen_int; + if (simduid) + { + loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid); + cfun->has_simduid_loops = true; + } + /* If not -fno-tree-loop-vectorize, hint that we want to vectorize + the loop. */ + if ((flag_tree_loop_vectorize + || (!global_options_set.x_flag_tree_loop_vectorize + && !global_options_set.x_flag_tree_vectorize)) + && flag_tree_loop_optimize + && loop->safelen > 1) + { + loop->force_vectorize = true; + cfun->has_force_vectorize_loops = true; + } + } + else if (simduid) + cfun->has_simduid_loops = true; +} + +/* Taskloop construct is represented after gimplification with + two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched + in between them. This routine expands the outer GIMPLE_OMP_FOR, + which should just compute all the needed loop temporaries + for GIMPLE_OMP_TASK. */ + +static void +expand_omp_taskloop_for_outer (struct omp_region *region, + struct omp_for_data *fd, + gimple *inner_stmt) +{ + tree type, bias = NULL_TREE; + basic_block entry_bb, cont_bb, exit_bb; + gimple_stmt_iterator gsi; + gassign *assign_stmt; + tree *counts = NULL; + int i; + + gcc_assert (inner_stmt); + gcc_assert (region->cont); + gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_TASK + && gimple_omp_task_taskloop_p (inner_stmt)); + type = TREE_TYPE (fd->loop.v); + + /* See if we need to bias by LLONG_MIN. */ + if (fd->iter_type == long_long_unsigned_type_node + && TREE_CODE (type) == INTEGER_TYPE + && !TYPE_UNSIGNED (type)) + { + tree n1, n2; + + if (fd->loop.cond_code == LT_EXPR) + { + n1 = fd->loop.n1; + n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step); + } + else + { + n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step); + n2 = fd->loop.n1; + } + if (TREE_CODE (n1) != INTEGER_CST + || TREE_CODE (n2) != INTEGER_CST + || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0))) + bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type)); + } + + entry_bb = region->entry; + cont_bb = region->cont; + gcc_assert (EDGE_COUNT (entry_bb->succs) == 2); + gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest); + exit_bb = region->exit; + + gsi = gsi_last_bb (entry_bb); + gimple *for_stmt = gsi_stmt (gsi); + gcc_assert (gimple_code (for_stmt) == GIMPLE_OMP_FOR); + if (fd->collapse > 1) + { + int first_zero_iter = -1, dummy = -1; + basic_block zero_iter_bb = NULL, dummy_bb = NULL, l2_dom_bb = NULL; + + counts = XALLOCAVEC (tree, fd->collapse); + expand_omp_for_init_counts (fd, &gsi, entry_bb, counts, + zero_iter_bb, first_zero_iter, + dummy_bb, dummy, l2_dom_bb); + + if (zero_iter_bb) + { + /* Some counts[i] vars might be uninitialized if + some loop has zero iterations. But the body shouldn't + be executed in that case, so just avoid uninit warnings. */ + for (i = first_zero_iter; i < fd->collapse; i++) + if (SSA_VAR_P (counts[i])) + TREE_NO_WARNING (counts[i]) = 1; + gsi_prev (&gsi); + edge e = split_block (entry_bb, gsi_stmt (gsi)); + entry_bb = e->dest; + make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU); + gsi = gsi_last_bb (entry_bb); + set_immediate_dominator (CDI_DOMINATORS, entry_bb, + get_immediate_dominator (CDI_DOMINATORS, + zero_iter_bb)); + } + } + + tree t0, t1; + t1 = fd->loop.n2; + t0 = fd->loop.n1; + if (POINTER_TYPE_P (TREE_TYPE (t0)) + && TYPE_PRECISION (TREE_TYPE (t0)) + != TYPE_PRECISION (fd->iter_type)) + { + /* Avoid casting pointers to integer of a different size. */ + tree itype = signed_type_for (type); + t1 = fold_convert (fd->iter_type, fold_convert (itype, t1)); + t0 = fold_convert (fd->iter_type, fold_convert (itype, t0)); + } + else + { + t1 = fold_convert (fd->iter_type, t1); + t0 = fold_convert (fd->iter_type, t0); + } + if (bias) + { + t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias); + t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias); + } + + tree innerc = omp_find_clause (gimple_omp_task_clauses (inner_stmt), + OMP_CLAUSE__LOOPTEMP_); + gcc_assert (innerc); + tree startvar = OMP_CLAUSE_DECL (innerc); + innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_); + gcc_assert (innerc); + tree endvar = OMP_CLAUSE_DECL (innerc); + if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST) + { + gcc_assert (innerc); + for (i = 1; i < fd->collapse; i++) + { + innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), + OMP_CLAUSE__LOOPTEMP_); + gcc_assert (innerc); + } + innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), + OMP_CLAUSE__LOOPTEMP_); + if (innerc) + { + /* If needed (inner taskloop has lastprivate clause), propagate + down the total number of iterations. */ + tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false, + NULL_TREE, false, + GSI_CONTINUE_LINKING); + assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t); + gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); + } + } + + t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false, + GSI_CONTINUE_LINKING); + assign_stmt = gimple_build_assign (startvar, t0); + gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); + + t1 = force_gimple_operand_gsi (&gsi, t1, false, NULL_TREE, false, + GSI_CONTINUE_LINKING); + assign_stmt = gimple_build_assign (endvar, t1); + gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); + if (fd->collapse > 1) + expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar); + + /* Remove the GIMPLE_OMP_FOR statement. */ + gsi = gsi_for_stmt (for_stmt); + gsi_remove (&gsi, true); + + gsi = gsi_last_bb (cont_bb); + gsi_remove (&gsi, true); + + gsi = gsi_last_bb (exit_bb); + gsi_remove (&gsi, true); + + FALLTHRU_EDGE (entry_bb)->probability = REG_BR_PROB_BASE; + remove_edge (BRANCH_EDGE (entry_bb)); + FALLTHRU_EDGE (cont_bb)->probability = REG_BR_PROB_BASE; + remove_edge (BRANCH_EDGE (cont_bb)); + set_immediate_dominator (CDI_DOMINATORS, exit_bb, cont_bb); + set_immediate_dominator (CDI_DOMINATORS, region->entry, + recompute_dominator (CDI_DOMINATORS, region->entry)); +} + +/* Taskloop construct is represented after gimplification with + two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched + in between them. This routine expands the inner GIMPLE_OMP_FOR. + GOMP_taskloop{,_ull} function arranges for each task to be given just + a single range of iterations. */ + +static void +expand_omp_taskloop_for_inner (struct omp_region *region, + struct omp_for_data *fd, + gimple *inner_stmt) +{ + tree e, t, type, itype, vmain, vback, bias = NULL_TREE; + basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL; + basic_block fin_bb; + gimple_stmt_iterator gsi; + edge ep; + bool broken_loop = region->cont == NULL; + tree *counts = NULL; + tree n1, n2, step; + + itype = type = TREE_TYPE (fd->loop.v); + if (POINTER_TYPE_P (type)) + itype = signed_type_for (type); + + /* See if we need to bias by LLONG_MIN. */ + if (fd->iter_type == long_long_unsigned_type_node + && TREE_CODE (type) == INTEGER_TYPE + && !TYPE_UNSIGNED (type)) + { + tree n1, n2; + + if (fd->loop.cond_code == LT_EXPR) + { + n1 = fd->loop.n1; + n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step); + } + else + { + n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step); + n2 = fd->loop.n1; + } + if (TREE_CODE (n1) != INTEGER_CST + || TREE_CODE (n2) != INTEGER_CST + || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0))) + bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type)); + } + + entry_bb = region->entry; + cont_bb = region->cont; + gcc_assert (EDGE_COUNT (entry_bb->succs) == 2); + fin_bb = BRANCH_EDGE (entry_bb)->dest; + gcc_assert (broken_loop + || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest)); + body_bb = FALLTHRU_EDGE (entry_bb)->dest; + if (!broken_loop) + { + gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb); + gcc_assert (EDGE_COUNT (cont_bb->succs) == 2); + } + exit_bb = region->exit; + + /* Iteration space partitioning goes in ENTRY_BB. */ + gsi = gsi_last_bb (entry_bb); + gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); + + if (fd->collapse > 1) + { + int first_zero_iter = -1, dummy = -1; + basic_block l2_dom_bb = NULL, dummy_bb = NULL; + + counts = XALLOCAVEC (tree, fd->collapse); + expand_omp_for_init_counts (fd, &gsi, entry_bb, counts, + fin_bb, first_zero_iter, + dummy_bb, dummy, l2_dom_bb); + t = NULL_TREE; + } + else + t = integer_one_node; + + step = fd->loop.step; + tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), + OMP_CLAUSE__LOOPTEMP_); + gcc_assert (innerc); + n1 = OMP_CLAUSE_DECL (innerc); + innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_); + gcc_assert (innerc); + n2 = OMP_CLAUSE_DECL (innerc); + if (bias) + { + n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias); + n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias); + } + n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1), + true, NULL_TREE, true, GSI_SAME_STMT); + n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2), + true, NULL_TREE, true, GSI_SAME_STMT); + step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step), + true, NULL_TREE, true, GSI_SAME_STMT); + + tree startvar = fd->loop.v; + tree endvar = NULL_TREE; + + if (gimple_omp_for_combined_p (fd->for_stmt)) + { + tree clauses = gimple_omp_for_clauses (inner_stmt); + tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_); + gcc_assert (innerc); + startvar = OMP_CLAUSE_DECL (innerc); + innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), + OMP_CLAUSE__LOOPTEMP_); + gcc_assert (innerc); + endvar = OMP_CLAUSE_DECL (innerc); + } + t = fold_convert (TREE_TYPE (startvar), n1); + t = force_gimple_operand_gsi (&gsi, t, + DECL_P (startvar) + && TREE_ADDRESSABLE (startvar), + NULL_TREE, false, GSI_CONTINUE_LINKING); + gimple *assign_stmt = gimple_build_assign (startvar, t); + gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); + + t = fold_convert (TREE_TYPE (startvar), n2); + e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, + false, GSI_CONTINUE_LINKING); + if (endvar) + { + assign_stmt = gimple_build_assign (endvar, e); + gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); + if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e))) + assign_stmt = gimple_build_assign (fd->loop.v, e); + else + assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e); + gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); + } + if (fd->collapse > 1) + expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar); + + if (!broken_loop) + { + /* The code controlling the sequential loop replaces the + GIMPLE_OMP_CONTINUE. */ + gsi = gsi_last_bb (cont_bb); + gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi)); + gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE); + vmain = gimple_omp_continue_control_use (cont_stmt); + vback = gimple_omp_continue_control_def (cont_stmt); + + if (!gimple_omp_for_combined_p (fd->for_stmt)) + { + if (POINTER_TYPE_P (type)) + t = fold_build_pointer_plus (vmain, step); + else + t = fold_build2 (PLUS_EXPR, type, vmain, step); + t = force_gimple_operand_gsi (&gsi, t, + DECL_P (vback) + && TREE_ADDRESSABLE (vback), + NULL_TREE, true, GSI_SAME_STMT); + assign_stmt = gimple_build_assign (vback, t); + gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); + + t = build2 (fd->loop.cond_code, boolean_type_node, + DECL_P (vback) && TREE_ADDRESSABLE (vback) + ? t : vback, e); + gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT); + } + + /* Remove the GIMPLE_OMP_CONTINUE statement. */ + gsi_remove (&gsi, true); + + if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt)) + collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb); + } + + /* Remove the GIMPLE_OMP_FOR statement. */ + gsi = gsi_for_stmt (fd->for_stmt); + gsi_remove (&gsi, true); + + /* Remove the GIMPLE_OMP_RETURN statement. */ + gsi = gsi_last_bb (exit_bb); + gsi_remove (&gsi, true); + + FALLTHRU_EDGE (entry_bb)->probability = REG_BR_PROB_BASE; + if (!broken_loop) + remove_edge (BRANCH_EDGE (entry_bb)); + else + { + remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb)); + region->outer->cont = NULL; + } + + /* Connect all the blocks. */ + if (!broken_loop) + { + ep = find_edge (cont_bb, body_bb); + if (gimple_omp_for_combined_p (fd->for_stmt)) + { + remove_edge (ep); + ep = NULL; + } + else if (fd->collapse > 1) + { + remove_edge (ep); + ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE); + } + else + ep->flags = EDGE_TRUE_VALUE; + find_edge (cont_bb, fin_bb)->flags + = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU; + } + + set_immediate_dominator (CDI_DOMINATORS, body_bb, + recompute_dominator (CDI_DOMINATORS, body_bb)); + if (!broken_loop) + set_immediate_dominator (CDI_DOMINATORS, fin_bb, + recompute_dominator (CDI_DOMINATORS, fin_bb)); + + if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt)) + { + struct loop *loop = alloc_loop (); + loop->header = body_bb; + if (collapse_bb == NULL) + loop->latch = cont_bb; + add_loop (loop, body_bb->loop_father); + } +} + +/* A subroutine of expand_omp_for. Generate code for an OpenACC + partitioned loop. The lowering here is abstracted, in that the + loop parameters are passed through internal functions, which are + further lowered by oacc_device_lower, once we get to the target + compiler. The loop is of the form: + + for (V = B; V LTGT E; V += S) {BODY} + + where LTGT is < or >. We may have a specified chunking size, CHUNKING + (constant 0 for no chunking) and we will have a GWV partitioning + mask, specifying dimensions over which the loop is to be + partitioned (see note below). We generate code that looks like: + + <entry_bb> [incoming FALL->body, BRANCH->exit] + typedef signedintify (typeof (V)) T; // underlying signed integral type + T range = E - B; + T chunk_no = 0; + T DIR = LTGT == '<' ? +1 : -1; + T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV); + T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV); + + <head_bb> [created by splitting end of entry_bb] + T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no); + T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset); + if (!(offset LTGT bound)) goto bottom_bb; + + <body_bb> [incoming] + V = B + offset; + {BODY} + + <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb] + offset += step; + if (offset LTGT bound) goto body_bb; [*] + + <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb + chunk_no++; + if (chunk < chunk_max) goto head_bb; + + <exit_bb> [incoming] + V = B + ((range -/+ 1) / S +/- 1) * S [*] + + [*] Needed if V live at end of loop + + Note: CHUNKING & GWV mask are specified explicitly here. This is a + transition, and will be specified by a more general mechanism shortly. + */ + +static void +expand_oacc_for (struct omp_region *region, struct omp_for_data *fd) +{ + tree v = fd->loop.v; + enum tree_code cond_code = fd->loop.cond_code; + enum tree_code plus_code = PLUS_EXPR; + + tree chunk_size = integer_minus_one_node; + tree gwv = integer_zero_node; + tree iter_type = TREE_TYPE (v); + tree diff_type = iter_type; + tree plus_type = iter_type; + struct oacc_collapse *counts = NULL; + + gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt) + == GF_OMP_FOR_KIND_OACC_LOOP); + gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt)); + gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR); + + if (POINTER_TYPE_P (iter_type)) + { + plus_code = POINTER_PLUS_EXPR; + plus_type = sizetype; + } + if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type)) + diff_type = signed_type_for (diff_type); + + basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */ + basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */ + basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE */ + basic_block bottom_bb = NULL; + + /* entry_bb has two sucessors; the branch edge is to the exit + block, fallthrough edge to body. */ + gcc_assert (EDGE_COUNT (entry_bb->succs) == 2 + && BRANCH_EDGE (entry_bb)->dest == exit_bb); + + /* If cont_bb non-NULL, it has 2 successors. The branch successor is + body_bb, or to a block whose only successor is the body_bb. Its + fallthrough successor is the final block (same as the branch + successor of the entry_bb). */ + if (cont_bb) + { + basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest; + basic_block bed = BRANCH_EDGE (cont_bb)->dest; + + gcc_assert (FALLTHRU_EDGE (cont_bb)->dest == exit_bb); + gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb); + } + else + gcc_assert (!gimple_in_ssa_p (cfun)); + + /* The exit block only has entry_bb and cont_bb as predecessors. */ + gcc_assert (EDGE_COUNT (exit_bb->preds) == 1 + (cont_bb != NULL)); + + tree chunk_no; + tree chunk_max = NULL_TREE; + tree bound, offset; + tree step = create_tmp_var (diff_type, ".step"); + bool up = cond_code == LT_EXPR; + tree dir = build_int_cst (diff_type, up ? +1 : -1); + bool chunking = !gimple_in_ssa_p (cfun);; + bool negating; + + /* SSA instances. */ + tree offset_incr = NULL_TREE; + tree offset_init = NULL_TREE; + + gimple_stmt_iterator gsi; + gassign *ass; + gcall *call; + gimple *stmt; + tree expr; + location_t loc; + edge split, be, fte; + + /* Split the end of entry_bb to create head_bb. */ + split = split_block (entry_bb, last_stmt (entry_bb)); + basic_block head_bb = split->dest; + entry_bb = split->src; + + /* Chunk setup goes at end of entry_bb, replacing the omp_for. */ + gsi = gsi_last_bb (entry_bb); + gomp_for *for_stmt = as_a <gomp_for *> (gsi_stmt (gsi)); + loc = gimple_location (for_stmt); + + if (gimple_in_ssa_p (cfun)) + { + offset_init = gimple_omp_for_index (for_stmt, 0); + gcc_assert (integer_zerop (fd->loop.n1)); + /* The SSA parallelizer does gang parallelism. */ + gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG)); + } + + if (fd->collapse > 1) + { + counts = XALLOCAVEC (struct oacc_collapse, fd->collapse); + tree total = expand_oacc_collapse_init (fd, &gsi, counts, + TREE_TYPE (fd->loop.n2)); + + if (SSA_VAR_P (fd->loop.n2)) + { + total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE, + true, GSI_SAME_STMT); + ass = gimple_build_assign (fd->loop.n2, total); + gsi_insert_before (&gsi, ass, GSI_SAME_STMT); + } + + } + + tree b = fd->loop.n1; + tree e = fd->loop.n2; + tree s = fd->loop.step; + + b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT); + e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT); + + /* Convert the step, avoiding possible unsigned->signed overflow. */ + negating = !up && TYPE_UNSIGNED (TREE_TYPE (s)); + if (negating) + s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s); + s = fold_convert (diff_type, s); + if (negating) + s = fold_build1 (NEGATE_EXPR, diff_type, s); + s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT); + + if (!chunking) + chunk_size = integer_zero_node; + expr = fold_convert (diff_type, chunk_size); + chunk_size = force_gimple_operand_gsi (&gsi, expr, true, + NULL_TREE, true, GSI_SAME_STMT); + /* Determine the range, avoiding possible unsigned->signed overflow. */ + negating = !up && TYPE_UNSIGNED (iter_type); + expr = fold_build2 (MINUS_EXPR, plus_type, + fold_convert (plus_type, negating ? b : e), + fold_convert (plus_type, negating ? e : b)); + expr = fold_convert (diff_type, expr); + if (negating) + expr = fold_build1 (NEGATE_EXPR, diff_type, expr); + tree range = force_gimple_operand_gsi (&gsi, expr, true, + NULL_TREE, true, GSI_SAME_STMT); + + chunk_no = build_int_cst (diff_type, 0); + if (chunking) + { + gcc_assert (!gimple_in_ssa_p (cfun)); + + expr = chunk_no; + chunk_max = create_tmp_var (diff_type, ".chunk_max"); + chunk_no = create_tmp_var (diff_type, ".chunk_no"); + + ass = gimple_build_assign (chunk_no, expr); + gsi_insert_before (&gsi, ass, GSI_SAME_STMT); + + call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, + build_int_cst (integer_type_node, + IFN_GOACC_LOOP_CHUNKS), + dir, range, s, chunk_size, gwv); + gimple_call_set_lhs (call, chunk_max); + gimple_set_location (call, loc); + gsi_insert_before (&gsi, call, GSI_SAME_STMT); + } + else + chunk_size = chunk_no; + + call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, + build_int_cst (integer_type_node, + IFN_GOACC_LOOP_STEP), + dir, range, s, chunk_size, gwv); + gimple_call_set_lhs (call, step); + gimple_set_location (call, loc); + gsi_insert_before (&gsi, call, GSI_SAME_STMT); + + /* Remove the GIMPLE_OMP_FOR. */ + gsi_remove (&gsi, true); + + /* Fixup edges from head_bb */ + be = BRANCH_EDGE (head_bb); + fte = FALLTHRU_EDGE (head_bb); + be->flags |= EDGE_FALSE_VALUE; + fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE; + + basic_block body_bb = fte->dest; + + if (gimple_in_ssa_p (cfun)) + { + gsi = gsi_last_bb (cont_bb); + gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi)); + + offset = gimple_omp_continue_control_use (cont_stmt); + offset_incr = gimple_omp_continue_control_def (cont_stmt); + } + else + { + offset = create_tmp_var (diff_type, ".offset"); + offset_init = offset_incr = offset; + } + bound = create_tmp_var (TREE_TYPE (offset), ".bound"); + + /* Loop offset & bound go into head_bb. */ + gsi = gsi_start_bb (head_bb); + + call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, + build_int_cst (integer_type_node, + IFN_GOACC_LOOP_OFFSET), + dir, range, s, + chunk_size, gwv, chunk_no); + gimple_call_set_lhs (call, offset_init); + gimple_set_location (call, loc); + gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING); + + call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, + build_int_cst (integer_type_node, + IFN_GOACC_LOOP_BOUND), + dir, range, s, + chunk_size, gwv, offset_init); + gimple_call_set_lhs (call, bound); + gimple_set_location (call, loc); + gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING); + + expr = build2 (cond_code, boolean_type_node, offset_init, bound); + gsi_insert_after (&gsi, gimple_build_cond_empty (expr), + GSI_CONTINUE_LINKING); + + /* V assignment goes into body_bb. */ + if (!gimple_in_ssa_p (cfun)) + { + gsi = gsi_start_bb (body_bb); + + expr = build2 (plus_code, iter_type, b, + fold_convert (plus_type, offset)); + expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE, + true, GSI_SAME_STMT); + ass = gimple_build_assign (v, expr); + gsi_insert_before (&gsi, ass, GSI_SAME_STMT); + if (fd->collapse > 1) + expand_oacc_collapse_vars (fd, &gsi, counts, v); + } + + /* Loop increment goes into cont_bb. If this is not a loop, we + will have spawned threads as if it was, and each one will + execute one iteration. The specification is not explicit about + whether such constructs are ill-formed or not, and they can + occur, especially when noreturn routines are involved. */ + if (cont_bb) + { + gsi = gsi_last_bb (cont_bb); + gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi)); + loc = gimple_location (cont_stmt); + + /* Increment offset. */ + if (gimple_in_ssa_p (cfun)) + expr= build2 (plus_code, iter_type, offset, + fold_convert (plus_type, step)); + else + expr = build2 (PLUS_EXPR, diff_type, offset, step); + expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE, + true, GSI_SAME_STMT); + ass = gimple_build_assign (offset_incr, expr); + gsi_insert_before (&gsi, ass, GSI_SAME_STMT); + expr = build2 (cond_code, boolean_type_node, offset_incr, bound); + gsi_insert_before (&gsi, gimple_build_cond_empty (expr), GSI_SAME_STMT); + + /* Remove the GIMPLE_OMP_CONTINUE. */ + gsi_remove (&gsi, true); + + /* Fixup edges from cont_bb */ + be = BRANCH_EDGE (cont_bb); + fte = FALLTHRU_EDGE (cont_bb); + be->flags |= EDGE_TRUE_VALUE; + fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE; + + if (chunking) + { + /* Split the beginning of exit_bb to make bottom_bb. We + need to insert a nop at the start, because splitting is + after a stmt, not before. */ + gsi = gsi_start_bb (exit_bb); + stmt = gimple_build_nop (); + gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); + split = split_block (exit_bb, stmt); + bottom_bb = split->src; + exit_bb = split->dest; + gsi = gsi_last_bb (bottom_bb); + + /* Chunk increment and test goes into bottom_bb. */ + expr = build2 (PLUS_EXPR, diff_type, chunk_no, + build_int_cst (diff_type, 1)); + ass = gimple_build_assign (chunk_no, expr); + gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING); + + /* Chunk test at end of bottom_bb. */ + expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max); + gsi_insert_after (&gsi, gimple_build_cond_empty (expr), + GSI_CONTINUE_LINKING); + + /* Fixup edges from bottom_bb. */ + split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE; + make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE); + } + } + + gsi = gsi_last_bb (exit_bb); + gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN); + loc = gimple_location (gsi_stmt (gsi)); + + if (!gimple_in_ssa_p (cfun)) + { + /* Insert the final value of V, in case it is live. This is the + value for the only thread that survives past the join. */ + expr = fold_build2 (MINUS_EXPR, diff_type, range, dir); + expr = fold_build2 (PLUS_EXPR, diff_type, expr, s); + expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s); + expr = fold_build2 (MULT_EXPR, diff_type, expr, s); + expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr)); + expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE, + true, GSI_SAME_STMT); + ass = gimple_build_assign (v, expr); + gsi_insert_before (&gsi, ass, GSI_SAME_STMT); + } + + /* Remove the OMP_RETURN. */ + gsi_remove (&gsi, true); + + if (cont_bb) + { + /* We now have one or two nested loops. Update the loop + structures. */ + struct loop *parent = entry_bb->loop_father; + struct loop *body = body_bb->loop_father; + + if (chunking) + { + struct loop *chunk_loop = alloc_loop (); + chunk_loop->header = head_bb; + chunk_loop->latch = bottom_bb; + add_loop (chunk_loop, parent); + parent = chunk_loop; + } + else if (parent != body) + { + gcc_assert (body->header == body_bb); + gcc_assert (body->latch == cont_bb + || single_pred (body->latch) == cont_bb); + parent = NULL; + } + + if (parent) + { + struct loop *body_loop = alloc_loop (); + body_loop->header = body_bb; + body_loop->latch = cont_bb; + add_loop (body_loop, parent); + } + } +} + +/* Expand the OMP loop defined by REGION. */ + +static void +expand_omp_for (struct omp_region *region, gimple *inner_stmt) +{ + struct omp_for_data fd; + struct omp_for_data_loop *loops; + + loops + = (struct omp_for_data_loop *) + alloca (gimple_omp_for_collapse (last_stmt (region->entry)) + * sizeof (struct omp_for_data_loop)); + omp_extract_for_data (as_a <gomp_for *> (last_stmt (region->entry)), + &fd, loops); + region->sched_kind = fd.sched_kind; + region->sched_modifiers = fd.sched_modifiers; + + gcc_assert (EDGE_COUNT (region->entry->succs) == 2); + BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL; + FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL; + if (region->cont) + { + gcc_assert (EDGE_COUNT (region->cont->succs) == 2); + BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL; + FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL; + } + else + /* If there isn't a continue then this is a degerate case where + the introduction of abnormal edges during lowering will prevent + original loops from being detected. Fix that up. */ + loops_state_set (LOOPS_NEED_FIXUP); + + if (gimple_omp_for_kind (fd.for_stmt) & GF_OMP_FOR_SIMD) + expand_omp_simd (region, &fd); + else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_CILKFOR) + expand_cilk_for (region, &fd); + else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP) + { + gcc_assert (!inner_stmt); + expand_oacc_for (region, &fd); + } + else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP) + { + if (gimple_omp_for_combined_into_p (fd.for_stmt)) + expand_omp_taskloop_for_inner (region, &fd, inner_stmt); + else + expand_omp_taskloop_for_outer (region, &fd, inner_stmt); + } + else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC + && !fd.have_ordered) + { + if (fd.chunk_size == NULL) + expand_omp_for_static_nochunk (region, &fd, inner_stmt); + else + expand_omp_for_static_chunk (region, &fd, inner_stmt); + } + else + { + int fn_index, start_ix, next_ix; + + gcc_assert (gimple_omp_for_kind (fd.for_stmt) + == GF_OMP_FOR_KIND_FOR); + if (fd.chunk_size == NULL + && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC) + fd.chunk_size = integer_zero_node; + gcc_assert (fd.sched_kind != OMP_CLAUSE_SCHEDULE_AUTO); + switch (fd.sched_kind) + { + case OMP_CLAUSE_SCHEDULE_RUNTIME: + fn_index = 3; + break; + case OMP_CLAUSE_SCHEDULE_DYNAMIC: + case OMP_CLAUSE_SCHEDULE_GUIDED: + if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) + && !fd.ordered + && !fd.have_ordered) + { + fn_index = 3 + fd.sched_kind; + break; + } + /* FALLTHRU */ + default: + fn_index = fd.sched_kind; + break; + } + if (!fd.ordered) + fn_index += fd.have_ordered * 6; + if (fd.ordered) + start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index; + else + start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index; + next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index; + if (fd.iter_type == long_long_unsigned_type_node) + { + start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START + - (int)BUILT_IN_GOMP_LOOP_STATIC_START); + next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT + - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT); + } + expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix, + (enum built_in_function) next_ix, inner_stmt); + } + + if (gimple_in_ssa_p (cfun)) + update_ssa (TODO_update_ssa_only_virtuals); +} + +/* Expand code for an OpenMP sections directive. In pseudo code, we generate + + v = GOMP_sections_start (n); + L0: + switch (v) + { + case 0: + goto L2; + case 1: + section 1; + goto L1; + case 2: + ... + case n: + ... + default: + abort (); + } + L1: + v = GOMP_sections_next (); + goto L0; + L2: + reduction; + + If this is a combined parallel sections, replace the call to + GOMP_sections_start with call to GOMP_sections_next. */ + +static void +expand_omp_sections (struct omp_region *region) +{ + tree t, u, vin = NULL, vmain, vnext, l2; + unsigned len; + basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb; + gimple_stmt_iterator si, switch_si; + gomp_sections *sections_stmt; + gimple *stmt; + gomp_continue *cont; + edge_iterator ei; + edge e; + struct omp_region *inner; + unsigned i, casei; + bool exit_reachable = region->cont != NULL; + + gcc_assert (region->exit != NULL); + entry_bb = region->entry; + l0_bb = single_succ (entry_bb); + l1_bb = region->cont; + l2_bb = region->exit; + if (single_pred_p (l2_bb) && single_pred (l2_bb) == l0_bb) + l2 = gimple_block_label (l2_bb); + else + { + /* This can happen if there are reductions. */ + len = EDGE_COUNT (l0_bb->succs); + gcc_assert (len > 0); + e = EDGE_SUCC (l0_bb, len - 1); + si = gsi_last_bb (e->dest); + l2 = NULL_TREE; + if (gsi_end_p (si) + || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION) + l2 = gimple_block_label (e->dest); + else + FOR_EACH_EDGE (e, ei, l0_bb->succs) + { + si = gsi_last_bb (e->dest); + if (gsi_end_p (si) + || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION) + { + l2 = gimple_block_label (e->dest); + break; + } + } + } + if (exit_reachable) + default_bb = create_empty_bb (l1_bb->prev_bb); + else + default_bb = create_empty_bb (l0_bb); + + /* We will build a switch() with enough cases for all the + GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work + and a default case to abort if something goes wrong. */ + len = EDGE_COUNT (l0_bb->succs); + + /* Use vec::quick_push on label_vec throughout, since we know the size + in advance. */ + auto_vec<tree> label_vec (len); + + /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the + GIMPLE_OMP_SECTIONS statement. */ + si = gsi_last_bb (entry_bb); + sections_stmt = as_a <gomp_sections *> (gsi_stmt (si)); + gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS); + vin = gimple_omp_sections_control (sections_stmt); + if (!is_combined_parallel (region)) + { + /* If we are not inside a combined parallel+sections region, + call GOMP_sections_start. */ + t = build_int_cst (unsigned_type_node, len - 1); + u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START); + stmt = gimple_build_call (u, 1, t); + } + else + { + /* Otherwise, call GOMP_sections_next. */ + u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT); + stmt = gimple_build_call (u, 0); + } + gimple_call_set_lhs (stmt, vin); + gsi_insert_after (&si, stmt, GSI_SAME_STMT); + gsi_remove (&si, true); + + /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in + L0_BB. */ + switch_si = gsi_last_bb (l0_bb); + gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH); + if (exit_reachable) + { + cont = as_a <gomp_continue *> (last_stmt (l1_bb)); + gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE); + vmain = gimple_omp_continue_control_use (cont); + vnext = gimple_omp_continue_control_def (cont); + } + else + { + vmain = vin; + vnext = NULL_TREE; + } + + t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2); + label_vec.quick_push (t); + i = 1; + + /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR. */ + for (inner = region->inner, casei = 1; + inner; + inner = inner->next, i++, casei++) + { + basic_block s_entry_bb, s_exit_bb; + + /* Skip optional reduction region. */ + if (inner->type == GIMPLE_OMP_ATOMIC_LOAD) + { + --i; + --casei; + continue; + } + + s_entry_bb = inner->entry; + s_exit_bb = inner->exit; + + t = gimple_block_label (s_entry_bb); + u = build_int_cst (unsigned_type_node, casei); + u = build_case_label (u, NULL, t); + label_vec.quick_push (u); + + si = gsi_last_bb (s_entry_bb); + gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION); + gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si))); + gsi_remove (&si, true); + single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU; + + if (s_exit_bb == NULL) + continue; + + si = gsi_last_bb (s_exit_bb); + gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN); + gsi_remove (&si, true); + + single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU; + } + + /* Error handling code goes in DEFAULT_BB. */ + t = gimple_block_label (default_bb); + u = build_case_label (NULL, NULL, t); + make_edge (l0_bb, default_bb, 0); + add_bb_to_loop (default_bb, current_loops->tree_root); + + stmt = gimple_build_switch (vmain, u, label_vec); + gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT); + gsi_remove (&switch_si, true); + + si = gsi_start_bb (default_bb); + stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0); + gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING); + + if (exit_reachable) + { + tree bfn_decl; + + /* Code to get the next section goes in L1_BB. */ + si = gsi_last_bb (l1_bb); + gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE); + + bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT); + stmt = gimple_build_call (bfn_decl, 0); + gimple_call_set_lhs (stmt, vnext); + gsi_insert_after (&si, stmt, GSI_SAME_STMT); + gsi_remove (&si, true); + + single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU; + } + + /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB. */ + si = gsi_last_bb (l2_bb); + if (gimple_omp_return_nowait_p (gsi_stmt (si))) + t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT); + else if (gimple_omp_return_lhs (gsi_stmt (si))) + t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL); + else + t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END); + stmt = gimple_build_call (t, 0); + if (gimple_omp_return_lhs (gsi_stmt (si))) + gimple_call_set_lhs (stmt, gimple_omp_return_lhs (gsi_stmt (si))); + gsi_insert_after (&si, stmt, GSI_SAME_STMT); + gsi_remove (&si, true); + + set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb); +} + +/* Expand code for an OpenMP single directive. We've already expanded + much of the code, here we simply place the GOMP_barrier call. */ + +static void +expand_omp_single (struct omp_region *region) +{ + basic_block entry_bb, exit_bb; + gimple_stmt_iterator si; + + entry_bb = region->entry; + exit_bb = region->exit; + + si = gsi_last_bb (entry_bb); + gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE); + gsi_remove (&si, true); + single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU; + + si = gsi_last_bb (exit_bb); + if (!gimple_omp_return_nowait_p (gsi_stmt (si))) + { + tree t = gimple_omp_return_lhs (gsi_stmt (si)); + gsi_insert_after (&si, omp_build_barrier (t), GSI_SAME_STMT); + } + gsi_remove (&si, true); + single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU; +} + +/* Generic expansion for OpenMP synchronization directives: master, + ordered and critical. All we need to do here is remove the entry + and exit markers for REGION. */ + +static void +expand_omp_synch (struct omp_region *region) +{ + basic_block entry_bb, exit_bb; + gimple_stmt_iterator si; + + entry_bb = region->entry; + exit_bb = region->exit; + + si = gsi_last_bb (entry_bb); + gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE + || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER + || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP + || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED + || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL + || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS); + gsi_remove (&si, true); + single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU; + + if (exit_bb) + { + si = gsi_last_bb (exit_bb); + gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN); + gsi_remove (&si, true); + single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU; + } +} + +/* A subroutine of expand_omp_atomic. Attempt to implement the atomic + operation as a normal volatile load. */ + +static bool +expand_omp_atomic_load (basic_block load_bb, tree addr, + tree loaded_val, int index) +{ + enum built_in_function tmpbase; + gimple_stmt_iterator gsi; + basic_block store_bb; + location_t loc; + gimple *stmt; + tree decl, call, type, itype; + + gsi = gsi_last_bb (load_bb); + stmt = gsi_stmt (gsi); + gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD); + loc = gimple_location (stmt); + + /* ??? If the target does not implement atomic_load_optab[mode], and mode + is smaller than word size, then expand_atomic_load assumes that the load + is atomic. We could avoid the builtin entirely in this case. */ + + tmpbase = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1); + decl = builtin_decl_explicit (tmpbase); + if (decl == NULL_TREE) + return false; + + type = TREE_TYPE (loaded_val); + itype = TREE_TYPE (TREE_TYPE (decl)); + + call = build_call_expr_loc (loc, decl, 2, addr, + build_int_cst (NULL, + gimple_omp_atomic_seq_cst_p (stmt) + ? MEMMODEL_SEQ_CST + : MEMMODEL_RELAXED)); + if (!useless_type_conversion_p (type, itype)) + call = fold_build1_loc (loc, VIEW_CONVERT_EXPR, type, call); + call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call); + + force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT); + gsi_remove (&gsi, true); + + store_bb = single_succ (load_bb); + gsi = gsi_last_bb (store_bb); + gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE); + gsi_remove (&gsi, true); + + if (gimple_in_ssa_p (cfun)) + update_ssa (TODO_update_ssa_no_phi); + + return true; +} + +/* A subroutine of expand_omp_atomic. Attempt to implement the atomic + operation as a normal volatile store. */ + +static bool +expand_omp_atomic_store (basic_block load_bb, tree addr, + tree loaded_val, tree stored_val, int index) +{ + enum built_in_function tmpbase; + gimple_stmt_iterator gsi; + basic_block store_bb = single_succ (load_bb); + location_t loc; + gimple *stmt; + tree decl, call, type, itype; + machine_mode imode; + bool exchange; + + gsi = gsi_last_bb (load_bb); + stmt = gsi_stmt (gsi); + gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD); + + /* If the load value is needed, then this isn't a store but an exchange. */ + exchange = gimple_omp_atomic_need_value_p (stmt); + + gsi = gsi_last_bb (store_bb); + stmt = gsi_stmt (gsi); + gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE); + loc = gimple_location (stmt); + + /* ??? If the target does not implement atomic_store_optab[mode], and mode + is smaller than word size, then expand_atomic_store assumes that the store + is atomic. We could avoid the builtin entirely in this case. */ + + tmpbase = (exchange ? BUILT_IN_ATOMIC_EXCHANGE_N : BUILT_IN_ATOMIC_STORE_N); + tmpbase = (enum built_in_function) ((int) tmpbase + index + 1); + decl = builtin_decl_explicit (tmpbase); + if (decl == NULL_TREE) + return false; + + type = TREE_TYPE (stored_val); + + /* Dig out the type of the function's second argument. */ + itype = TREE_TYPE (decl); + itype = TYPE_ARG_TYPES (itype); + itype = TREE_CHAIN (itype); + itype = TREE_VALUE (itype); + imode = TYPE_MODE (itype); + + if (exchange && !can_atomic_exchange_p (imode, true)) + return false; + + if (!useless_type_conversion_p (itype, type)) + stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val); + call = build_call_expr_loc (loc, decl, 3, addr, stored_val, + build_int_cst (NULL, + gimple_omp_atomic_seq_cst_p (stmt) + ? MEMMODEL_SEQ_CST + : MEMMODEL_RELAXED)); + if (exchange) + { + if (!useless_type_conversion_p (type, itype)) + call = build1_loc (loc, VIEW_CONVERT_EXPR, type, call); + call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call); + } + + force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT); + gsi_remove (&gsi, true); + + /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above. */ + gsi = gsi_last_bb (load_bb); + gsi_remove (&gsi, true); + + if (gimple_in_ssa_p (cfun)) + update_ssa (TODO_update_ssa_no_phi); + + return true; +} + +/* A subroutine of expand_omp_atomic. Attempt to implement the atomic + operation as a __atomic_fetch_op builtin. INDEX is log2 of the + size of the data type, and thus usable to find the index of the builtin + decl. Returns false if the expression is not of the proper form. */ + +static bool +expand_omp_atomic_fetch_op (basic_block load_bb, + tree addr, tree loaded_val, + tree stored_val, int index) +{ + enum built_in_function oldbase, newbase, tmpbase; + tree decl, itype, call; + tree lhs, rhs; + basic_block store_bb = single_succ (load_bb); + gimple_stmt_iterator gsi; + gimple *stmt; + location_t loc; + enum tree_code code; + bool need_old, need_new; + machine_mode imode; + bool seq_cst; + + /* We expect to find the following sequences: + + load_bb: + GIMPLE_OMP_ATOMIC_LOAD (tmp, mem) + + store_bb: + val = tmp OP something; (or: something OP tmp) + GIMPLE_OMP_STORE (val) + + ???FIXME: Allow a more flexible sequence. + Perhaps use data flow to pick the statements. + + */ + + gsi = gsi_after_labels (store_bb); + stmt = gsi_stmt (gsi); + loc = gimple_location (stmt); + if (!is_gimple_assign (stmt)) + return false; + gsi_next (&gsi); + if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE) + return false; + need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi)); + need_old = gimple_omp_atomic_need_value_p (last_stmt (load_bb)); + seq_cst = gimple_omp_atomic_seq_cst_p (last_stmt (load_bb)); + gcc_checking_assert (!need_old || !need_new); + + if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0)) + return false; + + /* Check for one of the supported fetch-op operations. */ + code = gimple_assign_rhs_code (stmt); + switch (code) + { + case PLUS_EXPR: + case POINTER_PLUS_EXPR: + oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N; + newbase = BUILT_IN_ATOMIC_ADD_FETCH_N; + break; + case MINUS_EXPR: + oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N; + newbase = BUILT_IN_ATOMIC_SUB_FETCH_N; + break; + case BIT_AND_EXPR: + oldbase = BUILT_IN_ATOMIC_FETCH_AND_N; + newbase = BUILT_IN_ATOMIC_AND_FETCH_N; + break; + case BIT_IOR_EXPR: + oldbase = BUILT_IN_ATOMIC_FETCH_OR_N; + newbase = BUILT_IN_ATOMIC_OR_FETCH_N; + break; + case BIT_XOR_EXPR: + oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N; + newbase = BUILT_IN_ATOMIC_XOR_FETCH_N; + break; + default: + return false; + } + + /* Make sure the expression is of the proper form. */ + if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0)) + rhs = gimple_assign_rhs2 (stmt); + else if (commutative_tree_code (gimple_assign_rhs_code (stmt)) + && operand_equal_p (gimple_assign_rhs2 (stmt), loaded_val, 0)) + rhs = gimple_assign_rhs1 (stmt); + else + return false; + + tmpbase = ((enum built_in_function) + ((need_new ? newbase : oldbase) + index + 1)); + decl = builtin_decl_explicit (tmpbase); + if (decl == NULL_TREE) + return false; + itype = TREE_TYPE (TREE_TYPE (decl)); + imode = TYPE_MODE (itype); + + /* We could test all of the various optabs involved, but the fact of the + matter is that (with the exception of i486 vs i586 and xadd) all targets + that support any atomic operaton optab also implements compare-and-swap. + Let optabs.c take care of expanding any compare-and-swap loop. */ + if (!can_compare_and_swap_p (imode, true)) + return false; + + gsi = gsi_last_bb (load_bb); + gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD); + + /* OpenMP does not imply any barrier-like semantics on its atomic ops. + It only requires that the operation happen atomically. Thus we can + use the RELAXED memory model. */ + call = build_call_expr_loc (loc, decl, 3, addr, + fold_convert_loc (loc, itype, rhs), + build_int_cst (NULL, + seq_cst ? MEMMODEL_SEQ_CST + : MEMMODEL_RELAXED)); + + if (need_old || need_new) + { + lhs = need_old ? loaded_val : stored_val; + call = fold_convert_loc (loc, TREE_TYPE (lhs), call); + call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call); + } + else + call = fold_convert_loc (loc, void_type_node, call); + force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT); + gsi_remove (&gsi, true); + + gsi = gsi_last_bb (store_bb); + gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE); + gsi_remove (&gsi, true); + gsi = gsi_last_bb (store_bb); + stmt = gsi_stmt (gsi); + gsi_remove (&gsi, true); + + if (gimple_in_ssa_p (cfun)) + { + release_defs (stmt); + update_ssa (TODO_update_ssa_no_phi); + } + + return true; +} + +/* A subroutine of expand_omp_atomic. Implement the atomic operation as: + + oldval = *addr; + repeat: + newval = rhs; // with oldval replacing *addr in rhs + oldval = __sync_val_compare_and_swap (addr, oldval, newval); + if (oldval != newval) + goto repeat; + + INDEX is log2 of the size of the data type, and thus usable to find the + index of the builtin decl. */ + +static bool +expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb, + tree addr, tree loaded_val, tree stored_val, + int index) +{ + tree loadedi, storedi, initial, new_storedi, old_vali; + tree type, itype, cmpxchg, iaddr; + gimple_stmt_iterator si; + basic_block loop_header = single_succ (load_bb); + gimple *phi, *stmt; + edge e; + enum built_in_function fncode; + + /* ??? We need a non-pointer interface to __atomic_compare_exchange in + order to use the RELAXED memory model effectively. */ + fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N + + index + 1); + cmpxchg = builtin_decl_explicit (fncode); + if (cmpxchg == NULL_TREE) + return false; + type = TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (addr))); + itype = TREE_TYPE (TREE_TYPE (cmpxchg)); + + if (!can_compare_and_swap_p (TYPE_MODE (itype), true)) + return false; + + /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD. */ + si = gsi_last_bb (load_bb); + gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD); + + /* For floating-point values, we'll need to view-convert them to integers + so that we can perform the atomic compare and swap. Simplify the + following code by always setting up the "i"ntegral variables. */ + if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type)) + { + tree iaddr_val; + + iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode, + true)); + iaddr_val + = force_gimple_operand_gsi (&si, + fold_convert (TREE_TYPE (iaddr), addr), + false, NULL_TREE, true, GSI_SAME_STMT); + stmt = gimple_build_assign (iaddr, iaddr_val); + gsi_insert_before (&si, stmt, GSI_SAME_STMT); + loadedi = create_tmp_var (itype); + if (gimple_in_ssa_p (cfun)) + loadedi = make_ssa_name (loadedi); + } + else + { + iaddr = addr; + loadedi = loaded_val; + } + + fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1); + tree loaddecl = builtin_decl_explicit (fncode); + if (loaddecl) + initial + = fold_convert (TREE_TYPE (TREE_TYPE (iaddr)), + build_call_expr (loaddecl, 2, iaddr, + build_int_cst (NULL_TREE, + MEMMODEL_RELAXED))); + else + initial = build2 (MEM_REF, TREE_TYPE (TREE_TYPE (iaddr)), iaddr, + build_int_cst (TREE_TYPE (iaddr), 0)); + + initial + = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true, + GSI_SAME_STMT); + + /* Move the value to the LOADEDI temporary. */ + if (gimple_in_ssa_p (cfun)) + { + gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header))); + phi = create_phi_node (loadedi, loop_header); + SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)), + initial); + } + else + gsi_insert_before (&si, + gimple_build_assign (loadedi, initial), + GSI_SAME_STMT); + if (loadedi != loaded_val) + { + gimple_stmt_iterator gsi2; + tree x; + + x = build1 (VIEW_CONVERT_EXPR, type, loadedi); + gsi2 = gsi_start_bb (loop_header); + if (gimple_in_ssa_p (cfun)) + { + gassign *stmt; + x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE, + true, GSI_SAME_STMT); + stmt = gimple_build_assign (loaded_val, x); + gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT); + } + else + { + x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x); + force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE, + true, GSI_SAME_STMT); + } + } + gsi_remove (&si, true); + + si = gsi_last_bb (store_bb); + gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE); + + if (iaddr == addr) + storedi = stored_val; + else + storedi = + force_gimple_operand_gsi (&si, + build1 (VIEW_CONVERT_EXPR, itype, + stored_val), true, NULL_TREE, true, + GSI_SAME_STMT); + + /* Build the compare&swap statement. */ + new_storedi = build_call_expr (cmpxchg, 3, iaddr, loadedi, storedi); + new_storedi = force_gimple_operand_gsi (&si, + fold_convert (TREE_TYPE (loadedi), + new_storedi), + true, NULL_TREE, + true, GSI_SAME_STMT); + + if (gimple_in_ssa_p (cfun)) + old_vali = loadedi; + else + { + old_vali = create_tmp_var (TREE_TYPE (loadedi)); + stmt = gimple_build_assign (old_vali, loadedi); + gsi_insert_before (&si, stmt, GSI_SAME_STMT); + + stmt = gimple_build_assign (loadedi, new_storedi); + gsi_insert_before (&si, stmt, GSI_SAME_STMT); + } + + /* Note that we always perform the comparison as an integer, even for + floating point. This allows the atomic operation to properly + succeed even with NaNs and -0.0. */ + stmt = gimple_build_cond_empty + (build2 (NE_EXPR, boolean_type_node, + new_storedi, old_vali)); + gsi_insert_before (&si, stmt, GSI_SAME_STMT); + + /* Update cfg. */ + e = single_succ_edge (store_bb); + e->flags &= ~EDGE_FALLTHRU; + e->flags |= EDGE_FALSE_VALUE; + + e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE); + + /* Copy the new value to loadedi (we already did that before the condition + if we are not in SSA). */ + if (gimple_in_ssa_p (cfun)) + { + phi = gimple_seq_first_stmt (phi_nodes (loop_header)); + SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi); + } + + /* Remove GIMPLE_OMP_ATOMIC_STORE. */ + gsi_remove (&si, true); + + struct loop *loop = alloc_loop (); + loop->header = loop_header; + loop->latch = store_bb; + add_loop (loop, loop_header->loop_father); + + if (gimple_in_ssa_p (cfun)) + update_ssa (TODO_update_ssa_no_phi); + + return true; +} + +/* A subroutine of expand_omp_atomic. Implement the atomic operation as: + + GOMP_atomic_start (); + *addr = rhs; + GOMP_atomic_end (); + + The result is not globally atomic, but works so long as all parallel + references are within #pragma omp atomic directives. According to + responses received from omp@openmp.org, appears to be within spec. + Which makes sense, since that's how several other compilers handle + this situation as well. + LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're + expanding. STORED_VAL is the operand of the matching + GIMPLE_OMP_ATOMIC_STORE. + + We replace + GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with + loaded_val = *addr; + + and replace + GIMPLE_OMP_ATOMIC_STORE (stored_val) with + *addr = stored_val; +*/ + +static bool +expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb, + tree addr, tree loaded_val, tree stored_val) +{ + gimple_stmt_iterator si; + gassign *stmt; + tree t; + + si = gsi_last_bb (load_bb); + gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD); + + t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START); + t = build_call_expr (t, 0); + force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT); + + stmt = gimple_build_assign (loaded_val, build_simple_mem_ref (addr)); + gsi_insert_before (&si, stmt, GSI_SAME_STMT); + gsi_remove (&si, true); + + si = gsi_last_bb (store_bb); + gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE); + + stmt = gimple_build_assign (build_simple_mem_ref (unshare_expr (addr)), + stored_val); + gsi_insert_before (&si, stmt, GSI_SAME_STMT); + + t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END); + t = build_call_expr (t, 0); + force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT); + gsi_remove (&si, true); + + if (gimple_in_ssa_p (cfun)) + update_ssa (TODO_update_ssa_no_phi); + return true; +} + +/* Expand an GIMPLE_OMP_ATOMIC statement. We try to expand + using expand_omp_atomic_fetch_op. If it failed, we try to + call expand_omp_atomic_pipeline, and if it fails too, the + ultimate fallback is wrapping the operation in a mutex + (expand_omp_atomic_mutex). REGION is the atomic region built + by build_omp_regions_1(). */ + +static void +expand_omp_atomic (struct omp_region *region) +{ + basic_block load_bb = region->entry, store_bb = region->exit; + gomp_atomic_load *load = as_a <gomp_atomic_load *> (last_stmt (load_bb)); + gomp_atomic_store *store = as_a <gomp_atomic_store *> (last_stmt (store_bb)); + tree loaded_val = gimple_omp_atomic_load_lhs (load); + tree addr = gimple_omp_atomic_load_rhs (load); + tree stored_val = gimple_omp_atomic_store_val (store); + tree type = TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (addr))); + HOST_WIDE_INT index; + + /* Make sure the type is one of the supported sizes. */ + index = tree_to_uhwi (TYPE_SIZE_UNIT (type)); + index = exact_log2 (index); + if (index >= 0 && index <= 4) + { + unsigned int align = TYPE_ALIGN_UNIT (type); + + /* __sync builtins require strict data alignment. */ + if (exact_log2 (align) >= index) + { + /* Atomic load. */ + if (loaded_val == stored_val + && (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_INT + || GET_MODE_CLASS (TYPE_MODE (type)) == MODE_FLOAT) + && GET_MODE_BITSIZE (TYPE_MODE (type)) <= BITS_PER_WORD + && expand_omp_atomic_load (load_bb, addr, loaded_val, index)) + return; + + /* Atomic store. */ + if ((GET_MODE_CLASS (TYPE_MODE (type)) == MODE_INT + || GET_MODE_CLASS (TYPE_MODE (type)) == MODE_FLOAT) + && GET_MODE_BITSIZE (TYPE_MODE (type)) <= BITS_PER_WORD + && store_bb == single_succ (load_bb) + && first_stmt (store_bb) == store + && expand_omp_atomic_store (load_bb, addr, loaded_val, + stored_val, index)) + return; + + /* When possible, use specialized atomic update functions. */ + if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type)) + && store_bb == single_succ (load_bb) + && expand_omp_atomic_fetch_op (load_bb, addr, + loaded_val, stored_val, index)) + return; + + /* If we don't have specialized __sync builtins, try and implement + as a compare and swap loop. */ + if (expand_omp_atomic_pipeline (load_bb, store_bb, addr, + loaded_val, stored_val, index)) + return; + } + } + + /* The ultimate fallback is wrapping the operation in a mutex. */ + expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val); +} + +/* Mark the loops inside the kernels region starting at REGION_ENTRY and ending + at REGION_EXIT. */ + +static void +mark_loops_in_oacc_kernels_region (basic_block region_entry, + basic_block region_exit) +{ + struct loop *outer = region_entry->loop_father; + gcc_assert (region_exit == NULL || outer == region_exit->loop_father); + + /* Don't parallelize the kernels region if it contains more than one outer + loop. */ + unsigned int nr_outer_loops = 0; + struct loop *single_outer = NULL; + for (struct loop *loop = outer->inner; loop != NULL; loop = loop->next) + { + gcc_assert (loop_outer (loop) == outer); + + if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry)) + continue; + + if (region_exit != NULL + && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit)) + continue; + + nr_outer_loops++; + single_outer = loop; + } + if (nr_outer_loops != 1) + return; + + for (struct loop *loop = single_outer->inner; loop != NULL; loop = loop->inner) + if (loop->next) + return; + + /* Mark the loops in the region. */ + for (struct loop *loop = single_outer; loop != NULL; loop = loop->inner) + loop->in_oacc_kernels_region = true; +} + +/* Types used to pass grid and wortkgroup sizes to kernel invocation. */ + +struct GTY(()) grid_launch_attributes_trees +{ + tree kernel_dim_array_type; + tree kernel_lattrs_dimnum_decl; + tree kernel_lattrs_grid_decl; + tree kernel_lattrs_group_decl; + tree kernel_launch_attributes_type; +}; + +static GTY(()) struct grid_launch_attributes_trees *grid_attr_trees; + +/* Create types used to pass kernel launch attributes to target. */ + +static void +grid_create_kernel_launch_attr_types (void) +{ + if (grid_attr_trees) + return; + grid_attr_trees = ggc_alloc <grid_launch_attributes_trees> (); + + tree dim_arr_index_type + = build_index_type (build_int_cst (integer_type_node, 2)); + grid_attr_trees->kernel_dim_array_type + = build_array_type (uint32_type_node, dim_arr_index_type); + + grid_attr_trees->kernel_launch_attributes_type = make_node (RECORD_TYPE); + grid_attr_trees->kernel_lattrs_dimnum_decl + = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("ndim"), + uint32_type_node); + DECL_CHAIN (grid_attr_trees->kernel_lattrs_dimnum_decl) = NULL_TREE; + + grid_attr_trees->kernel_lattrs_grid_decl + = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("grid_size"), + grid_attr_trees->kernel_dim_array_type); + DECL_CHAIN (grid_attr_trees->kernel_lattrs_grid_decl) + = grid_attr_trees->kernel_lattrs_dimnum_decl; + grid_attr_trees->kernel_lattrs_group_decl + = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("group_size"), + grid_attr_trees->kernel_dim_array_type); + DECL_CHAIN (grid_attr_trees->kernel_lattrs_group_decl) + = grid_attr_trees->kernel_lattrs_grid_decl; + finish_builtin_struct (grid_attr_trees->kernel_launch_attributes_type, + "__gomp_kernel_launch_attributes", + grid_attr_trees->kernel_lattrs_group_decl, NULL_TREE); +} + +/* Insert before the current statement in GSI a store of VALUE to INDEX of + array (of type kernel_dim_array_type) FLD_DECL of RANGE_VAR. VALUE must be + of type uint32_type_node. */ + +static void +grid_insert_store_range_dim (gimple_stmt_iterator *gsi, tree range_var, + tree fld_decl, int index, tree value) +{ + tree ref = build4 (ARRAY_REF, uint32_type_node, + build3 (COMPONENT_REF, + grid_attr_trees->kernel_dim_array_type, + range_var, fld_decl, NULL_TREE), + build_int_cst (integer_type_node, index), + NULL_TREE, NULL_TREE); + gsi_insert_before (gsi, gimple_build_assign (ref, value), GSI_SAME_STMT); +} + +/* Return a tree representation of a pointer to a structure with grid and + work-group size information. Statements filling that information will be + inserted before GSI, TGT_STMT is the target statement which has the + necessary information in it. */ + +static tree +grid_get_kernel_launch_attributes (gimple_stmt_iterator *gsi, + gomp_target *tgt_stmt) +{ + grid_create_kernel_launch_attr_types (); + tree lattrs = create_tmp_var (grid_attr_trees->kernel_launch_attributes_type, + "__kernel_launch_attrs"); + + unsigned max_dim = 0; + for (tree clause = gimple_omp_target_clauses (tgt_stmt); + clause; + clause = OMP_CLAUSE_CHAIN (clause)) + { + if (OMP_CLAUSE_CODE (clause) != OMP_CLAUSE__GRIDDIM_) + continue; + + unsigned dim = OMP_CLAUSE__GRIDDIM__DIMENSION (clause); + max_dim = MAX (dim, max_dim); + + grid_insert_store_range_dim (gsi, lattrs, + grid_attr_trees->kernel_lattrs_grid_decl, + dim, OMP_CLAUSE__GRIDDIM__SIZE (clause)); + grid_insert_store_range_dim (gsi, lattrs, + grid_attr_trees->kernel_lattrs_group_decl, + dim, OMP_CLAUSE__GRIDDIM__GROUP (clause)); + } + + tree dimref = build3 (COMPONENT_REF, uint32_type_node, lattrs, + grid_attr_trees->kernel_lattrs_dimnum_decl, NULL_TREE); + gcc_checking_assert (max_dim <= 2); + tree dimensions = build_int_cstu (uint32_type_node, max_dim + 1); + gsi_insert_before (gsi, gimple_build_assign (dimref, dimensions), + GSI_SAME_STMT); + TREE_ADDRESSABLE (lattrs) = 1; + return build_fold_addr_expr (lattrs); +} + +/* Build target argument identifier from the DEVICE identifier, value + identifier ID and whether the element also has a SUBSEQUENT_PARAM. */ + +static tree +get_target_argument_identifier_1 (int device, bool subseqent_param, int id) +{ + tree t = build_int_cst (integer_type_node, device); + if (subseqent_param) + t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t, + build_int_cst (integer_type_node, + GOMP_TARGET_ARG_SUBSEQUENT_PARAM)); + t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t, + build_int_cst (integer_type_node, id)); + return t; +} + +/* Like above but return it in type that can be directly stored as an element + of the argument array. */ + +static tree +get_target_argument_identifier (int device, bool subseqent_param, int id) +{ + tree t = get_target_argument_identifier_1 (device, subseqent_param, id); + return fold_convert (ptr_type_node, t); +} + +/* Return a target argument consisting of DEVICE identifier, value identifier + ID, and the actual VALUE. */ + +static tree +get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id, + tree value) +{ + tree t = fold_build2 (LSHIFT_EXPR, integer_type_node, + fold_convert (integer_type_node, value), + build_int_cst (unsigned_type_node, + GOMP_TARGET_ARG_VALUE_SHIFT)); + t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t, + get_target_argument_identifier_1 (device, false, id)); + t = fold_convert (ptr_type_node, t); + return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT); +} + +/* If VALUE is an integer constant greater than -2^15 and smaller than 2^15, + push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it, + otherwise push an identifier (with DEVICE and ID) and the VALUE in two + arguments. */ + +static void +push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device, + int id, tree value, vec <tree> *args) +{ + if (tree_fits_shwi_p (value) + && tree_to_shwi (value) > -(1 << 15) + && tree_to_shwi (value) < (1 << 15)) + args->quick_push (get_target_argument_value (gsi, device, id, value)); + else + { + args->quick_push (get_target_argument_identifier (device, true, id)); + value = fold_convert (ptr_type_node, value); + value = force_gimple_operand_gsi (gsi, value, true, NULL, true, + GSI_SAME_STMT); + args->quick_push (value); + } +} + +/* Create an array of arguments that is then passed to GOMP_target. */ + +static tree +get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt) +{ + auto_vec <tree, 6> args; + tree clauses = gimple_omp_target_clauses (tgt_stmt); + tree t, c = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS); + if (c) + t = OMP_CLAUSE_NUM_TEAMS_EXPR (c); + else + t = integer_minus_one_node; + push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL, + GOMP_TARGET_ARG_NUM_TEAMS, t, &args); + + c = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT); + if (c) + t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c); + else + t = integer_minus_one_node; + push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL, + GOMP_TARGET_ARG_THREAD_LIMIT, t, + &args); + + /* Add HSA-specific grid sizes, if available. */ + if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt), + OMP_CLAUSE__GRIDDIM_)) + { + t = get_target_argument_identifier (GOMP_DEVICE_HSA, true, + GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES); + args.quick_push (t); + args.quick_push (grid_get_kernel_launch_attributes (gsi, tgt_stmt)); + } + + /* Produce more, perhaps device specific, arguments here. */ + + tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node, + args.length () + 1), + ".omp_target_args"); + for (unsigned i = 0; i < args.length (); i++) + { + tree ref = build4 (ARRAY_REF, ptr_type_node, argarray, + build_int_cst (integer_type_node, i), + NULL_TREE, NULL_TREE); + gsi_insert_before (gsi, gimple_build_assign (ref, args[i]), + GSI_SAME_STMT); + } + tree ref = build4 (ARRAY_REF, ptr_type_node, argarray, + build_int_cst (integer_type_node, args.length ()), + NULL_TREE, NULL_TREE); + gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node), + GSI_SAME_STMT); + TREE_ADDRESSABLE (argarray) = 1; + return build_fold_addr_expr (argarray); +} + +/* Expand the GIMPLE_OMP_TARGET starting at REGION. */ + +static void +expand_omp_target (struct omp_region *region) +{ + basic_block entry_bb, exit_bb, new_bb; + struct function *child_cfun; + tree child_fn, block, t; + gimple_stmt_iterator gsi; + gomp_target *entry_stmt; + gimple *stmt; + edge e; + bool offloaded, data_region; + + entry_stmt = as_a <gomp_target *> (last_stmt (region->entry)); + new_bb = region->entry; + + offloaded = is_gimple_omp_offloaded (entry_stmt); + switch (gimple_omp_target_kind (entry_stmt)) + { + case GF_OMP_TARGET_KIND_REGION: + case GF_OMP_TARGET_KIND_UPDATE: + case GF_OMP_TARGET_KIND_ENTER_DATA: + case GF_OMP_TARGET_KIND_EXIT_DATA: + case GF_OMP_TARGET_KIND_OACC_PARALLEL: + case GF_OMP_TARGET_KIND_OACC_KERNELS: + case GF_OMP_TARGET_KIND_OACC_UPDATE: + case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA: + case GF_OMP_TARGET_KIND_OACC_DECLARE: + data_region = false; + break; + case GF_OMP_TARGET_KIND_DATA: + case GF_OMP_TARGET_KIND_OACC_DATA: + case GF_OMP_TARGET_KIND_OACC_HOST_DATA: + data_region = true; + break; + default: + gcc_unreachable (); + } + + child_fn = NULL_TREE; + child_cfun = NULL; + if (offloaded) + { + child_fn = gimple_omp_target_child_fn (entry_stmt); + child_cfun = DECL_STRUCT_FUNCTION (child_fn); + } + + /* Supported by expand_omp_taskreg, but not here. */ + if (child_cfun != NULL) + gcc_checking_assert (!child_cfun->cfg); + gcc_checking_assert (!gimple_in_ssa_p (cfun)); + + entry_bb = region->entry; + exit_bb = region->exit; + + if (gimple_omp_target_kind (entry_stmt) == GF_OMP_TARGET_KIND_OACC_KERNELS) + mark_loops_in_oacc_kernels_region (region->entry, region->exit); + + if (offloaded) + { + unsigned srcidx, dstidx, num; + + /* If the offloading region needs data sent from the parent + function, then the very first statement (except possible + tree profile counter updates) of the offloading body + is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since + &.OMP_DATA_O is passed as an argument to the child function, + we need to replace it with the argument as seen by the child + function. + + In most cases, this will end up being the identity assignment + .OMP_DATA_I = .OMP_DATA_I. However, if the offloading body had + a function call that has been inlined, the original PARM_DECL + .OMP_DATA_I may have been converted into a different local + variable. In which case, we need to keep the assignment. */ + tree data_arg = gimple_omp_target_data_arg (entry_stmt); + if (data_arg) + { + basic_block entry_succ_bb = single_succ (entry_bb); + gimple_stmt_iterator gsi; + tree arg; + gimple *tgtcopy_stmt = NULL; + tree sender = TREE_VEC_ELT (data_arg, 0); + + for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi)) + { + gcc_assert (!gsi_end_p (gsi)); + stmt = gsi_stmt (gsi); + if (gimple_code (stmt) != GIMPLE_ASSIGN) + continue; + + if (gimple_num_ops (stmt) == 2) + { + tree arg = gimple_assign_rhs1 (stmt); + + /* We're ignoring the subcode because we're + effectively doing a STRIP_NOPS. */ + + if (TREE_CODE (arg) == ADDR_EXPR + && TREE_OPERAND (arg, 0) == sender) + { + tgtcopy_stmt = stmt; + break; + } + } + } + + gcc_assert (tgtcopy_stmt != NULL); + arg = DECL_ARGUMENTS (child_fn); + + gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg); + gsi_remove (&gsi, true); + } + + /* Declare local variables needed in CHILD_CFUN. */ + block = DECL_INITIAL (child_fn); + BLOCK_VARS (block) = vec2chain (child_cfun->local_decls); + /* The gimplifier could record temporaries in the offloading block + rather than in containing function's local_decls chain, + which would mean cgraph missed finalizing them. Do it now. */ + for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t)) + if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t)) + varpool_node::finalize_decl (t); + DECL_SAVED_TREE (child_fn) = NULL; + /* We'll create a CFG for child_fn, so no gimple body is needed. */ + gimple_set_body (child_fn, NULL); + TREE_USED (block) = 1; + + /* Reset DECL_CONTEXT on function arguments. */ + for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t)) + DECL_CONTEXT (t) = child_fn; + + /* Split ENTRY_BB at GIMPLE_*, + so that it can be moved to the child function. */ + gsi = gsi_last_bb (entry_bb); + stmt = gsi_stmt (gsi); + gcc_assert (stmt + && gimple_code (stmt) == gimple_code (entry_stmt)); + e = split_block (entry_bb, stmt); + gsi_remove (&gsi, true); + entry_bb = e->dest; + single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU; + + /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR. */ + if (exit_bb) + { + gsi = gsi_last_bb (exit_bb); + gcc_assert (!gsi_end_p (gsi) + && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN); + stmt = gimple_build_return (NULL); + gsi_insert_after (&gsi, stmt, GSI_SAME_STMT); + gsi_remove (&gsi, true); + } + + /* Move the offloading region into CHILD_CFUN. */ + + block = gimple_block (entry_stmt); + + new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block); + if (exit_bb) + single_succ_edge (new_bb)->flags = EDGE_FALLTHRU; + /* When the OMP expansion process cannot guarantee an up-to-date + loop tree arrange for the child function to fixup loops. */ + if (loops_state_satisfies_p (LOOPS_NEED_FIXUP)) + child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP; + + /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */ + num = vec_safe_length (child_cfun->local_decls); + for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++) + { + t = (*child_cfun->local_decls)[srcidx]; + if (DECL_CONTEXT (t) == cfun->decl) + continue; + if (srcidx != dstidx) + (*child_cfun->local_decls)[dstidx] = t; + dstidx++; + } + if (dstidx != num) + vec_safe_truncate (child_cfun->local_decls, dstidx); + + /* Inform the callgraph about the new function. */ + child_cfun->curr_properties = cfun->curr_properties; + child_cfun->has_simduid_loops |= cfun->has_simduid_loops; + child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops; + cgraph_node *node = cgraph_node::get_create (child_fn); + node->parallelized_function = 1; + cgraph_node::add_new_function (child_fn, true); + + /* Add the new function to the offload table. */ + if (ENABLE_OFFLOADING) + vec_safe_push (offload_funcs, child_fn); + + bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl) + && !DECL_ASSEMBLER_NAME_SET_P (child_fn); + + /* Fix the callgraph edges for child_cfun. Those for cfun will be + fixed in a following pass. */ + push_cfun (child_cfun); + if (need_asm) + assign_assembler_name_if_neeeded (child_fn); + cgraph_edge::rebuild_edges (); + + /* Some EH regions might become dead, see PR34608. If + pass_cleanup_cfg isn't the first pass to happen with the + new child, these dead EH edges might cause problems. + Clean them up now. */ + if (flag_exceptions) + { + basic_block bb; + bool changed = false; + + FOR_EACH_BB_FN (bb, cfun) + changed |= gimple_purge_dead_eh_edges (bb); + if (changed) + cleanup_tree_cfg (); + } + if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP)) + verify_loop_structure (); + pop_cfun (); + + if (dump_file && !gimple_in_ssa_p (cfun)) + { + omp_any_child_fn_dumped = true; + dump_function_header (dump_file, child_fn, dump_flags); + dump_function_to_file (child_fn, dump_file, dump_flags); + } + } + + /* Emit a library call to launch the offloading region, or do data + transfers. */ + tree t1, t2, t3, t4, device, cond, depend, c, clauses; + enum built_in_function start_ix; + location_t clause_loc; + unsigned int flags_i = 0; + bool oacc_kernels_p = false; + + switch (gimple_omp_target_kind (entry_stmt)) + { + case GF_OMP_TARGET_KIND_REGION: + start_ix = BUILT_IN_GOMP_TARGET; + break; + case GF_OMP_TARGET_KIND_DATA: + start_ix = BUILT_IN_GOMP_TARGET_DATA; + break; + case GF_OMP_TARGET_KIND_UPDATE: + start_ix = BUILT_IN_GOMP_TARGET_UPDATE; + break; + case GF_OMP_TARGET_KIND_ENTER_DATA: + start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA; + break; + case GF_OMP_TARGET_KIND_EXIT_DATA: + start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA; + flags_i |= GOMP_TARGET_FLAG_EXIT_DATA; + break; + case GF_OMP_TARGET_KIND_OACC_KERNELS: + oacc_kernels_p = true; + /* FALLTHROUGH */ + case GF_OMP_TARGET_KIND_OACC_PARALLEL: + start_ix = BUILT_IN_GOACC_PARALLEL; + break; + case GF_OMP_TARGET_KIND_OACC_DATA: + case GF_OMP_TARGET_KIND_OACC_HOST_DATA: + start_ix = BUILT_IN_GOACC_DATA_START; + break; + case GF_OMP_TARGET_KIND_OACC_UPDATE: + start_ix = BUILT_IN_GOACC_UPDATE; + break; + case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA: + start_ix = BUILT_IN_GOACC_ENTER_EXIT_DATA; + break; + case GF_OMP_TARGET_KIND_OACC_DECLARE: + start_ix = BUILT_IN_GOACC_DECLARE; + break; + default: + gcc_unreachable (); + } + + clauses = gimple_omp_target_clauses (entry_stmt); + + /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime + library choose) and there is no conditional. */ + cond = NULL_TREE; + device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV); + + c = omp_find_clause (clauses, OMP_CLAUSE_IF); + if (c) + cond = OMP_CLAUSE_IF_EXPR (c); + + c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE); + if (c) + { + /* Even if we pass it to all library function calls, it is currently only + defined/used for the OpenMP target ones. */ + gcc_checking_assert (start_ix == BUILT_IN_GOMP_TARGET + || start_ix == BUILT_IN_GOMP_TARGET_DATA + || start_ix == BUILT_IN_GOMP_TARGET_UPDATE + || start_ix == BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA); + + device = OMP_CLAUSE_DEVICE_ID (c); + clause_loc = OMP_CLAUSE_LOCATION (c); + } + else + clause_loc = gimple_location (entry_stmt); + + c = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT); + if (c) + flags_i |= GOMP_TARGET_FLAG_NOWAIT; + + /* Ensure 'device' is of the correct type. */ + device = fold_convert_loc (clause_loc, integer_type_node, device); + + /* If we found the clause 'if (cond)', build + (cond ? device : GOMP_DEVICE_HOST_FALLBACK). */ + if (cond) + { + cond = gimple_boolify (cond); + + basic_block cond_bb, then_bb, else_bb; + edge e; + tree tmp_var; + + tmp_var = create_tmp_var (TREE_TYPE (device)); + if (offloaded) + e = split_block_after_labels (new_bb); + else + { + gsi = gsi_last_bb (new_bb); + gsi_prev (&gsi); + e = split_block (new_bb, gsi_stmt (gsi)); + } + cond_bb = e->src; + new_bb = e->dest; + remove_edge (e); + + then_bb = create_empty_bb (cond_bb); + else_bb = create_empty_bb (then_bb); + set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb); + set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb); + + stmt = gimple_build_cond_empty (cond); + gsi = gsi_last_bb (cond_bb); + gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); + + gsi = gsi_start_bb (then_bb); + stmt = gimple_build_assign (tmp_var, device); + gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); + + gsi = gsi_start_bb (else_bb); + stmt = gimple_build_assign (tmp_var, + build_int_cst (integer_type_node, + GOMP_DEVICE_HOST_FALLBACK)); + gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); + + make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE); + make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE); + add_bb_to_loop (then_bb, cond_bb->loop_father); + add_bb_to_loop (else_bb, cond_bb->loop_father); + make_edge (then_bb, new_bb, EDGE_FALLTHRU); + make_edge (else_bb, new_bb, EDGE_FALLTHRU); + + device = tmp_var; + gsi = gsi_last_bb (new_bb); + } + else + { + gsi = gsi_last_bb (new_bb); + device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE, + true, GSI_SAME_STMT); + } + + t = gimple_omp_target_data_arg (entry_stmt); + if (t == NULL) + { + t1 = size_zero_node; + t2 = build_zero_cst (ptr_type_node); + t3 = t2; + t4 = t2; + } + else + { + t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1)))); + t1 = size_binop (PLUS_EXPR, t1, size_int (1)); + t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0)); + t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1)); + t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2)); + } + + gimple *g; + bool tagging = false; + /* The maximum number used by any start_ix, without varargs. */ + auto_vec<tree, 11> args; + args.quick_push (device); + if (offloaded) + args.quick_push (build_fold_addr_expr (child_fn)); + args.quick_push (t1); + args.quick_push (t2); + args.quick_push (t3); + args.quick_push (t4); + switch (start_ix) + { + case BUILT_IN_GOACC_DATA_START: + case BUILT_IN_GOACC_DECLARE: + case BUILT_IN_GOMP_TARGET_DATA: + break; + case BUILT_IN_GOMP_TARGET: + case BUILT_IN_GOMP_TARGET_UPDATE: + case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA: + args.quick_push (build_int_cst (unsigned_type_node, flags_i)); + c = omp_find_clause (clauses, OMP_CLAUSE_DEPEND); + if (c) + depend = OMP_CLAUSE_DECL (c); + else + depend = build_int_cst (ptr_type_node, 0); + args.quick_push (depend); + if (start_ix == BUILT_IN_GOMP_TARGET) + args.quick_push (get_target_arguments (&gsi, entry_stmt)); + break; + case BUILT_IN_GOACC_PARALLEL: + { + oacc_set_fn_attrib (child_fn, clauses, oacc_kernels_p, &args); + tagging = true; + } + /* FALLTHRU */ + case BUILT_IN_GOACC_ENTER_EXIT_DATA: + case BUILT_IN_GOACC_UPDATE: + { + tree t_async = NULL_TREE; + + /* If present, use the value specified by the respective + clause, making sure that is of the correct type. */ + c = omp_find_clause (clauses, OMP_CLAUSE_ASYNC); + if (c) + t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c), + integer_type_node, + OMP_CLAUSE_ASYNC_EXPR (c)); + else if (!tagging) + /* Default values for t_async. */ + t_async = fold_convert_loc (gimple_location (entry_stmt), + integer_type_node, + build_int_cst (integer_type_node, + GOMP_ASYNC_SYNC)); + if (tagging && t_async) + { + unsigned HOST_WIDE_INT i_async = GOMP_LAUNCH_OP_MAX; + + if (TREE_CODE (t_async) == INTEGER_CST) + { + /* See if we can pack the async arg in to the tag's + operand. */ + i_async = TREE_INT_CST_LOW (t_async); + if (i_async < GOMP_LAUNCH_OP_MAX) + t_async = NULL_TREE; + else + i_async = GOMP_LAUNCH_OP_MAX; + } + args.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC, NULL_TREE, + i_async)); + } + if (t_async) + args.safe_push (t_async); + + /* Save the argument index, and ... */ + unsigned t_wait_idx = args.length (); + unsigned num_waits = 0; + c = omp_find_clause (clauses, OMP_CLAUSE_WAIT); + if (!tagging || c) + /* ... push a placeholder. */ + args.safe_push (integer_zero_node); + + for (; c; c = OMP_CLAUSE_CHAIN (c)) + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT) + { + args.safe_push (fold_convert_loc (OMP_CLAUSE_LOCATION (c), + integer_type_node, + OMP_CLAUSE_WAIT_EXPR (c))); + num_waits++; + } + + if (!tagging || num_waits) + { + tree len; + + /* Now that we know the number, update the placeholder. */ + if (tagging) + len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, num_waits); + else + len = build_int_cst (integer_type_node, num_waits); + len = fold_convert_loc (gimple_location (entry_stmt), + unsigned_type_node, len); + args[t_wait_idx] = len; + } + } + break; + default: + gcc_unreachable (); + } + if (tagging) + /* Push terminal marker - zero. */ + args.safe_push (oacc_launch_pack (0, NULL_TREE, 0)); + + g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args); + gimple_set_location (g, gimple_location (entry_stmt)); + gsi_insert_before (&gsi, g, GSI_SAME_STMT); + if (!offloaded) + { + g = gsi_stmt (gsi); + gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET); + gsi_remove (&gsi, true); + } + if (data_region && region->exit) + { + gsi = gsi_last_bb (region->exit); + g = gsi_stmt (gsi); + gcc_assert (g && gimple_code (g) == GIMPLE_OMP_RETURN); + gsi_remove (&gsi, true); + } +} + +/* Expand KFOR loop as a HSA grifidied kernel, i.e. as a body only with + iteration variable derived from the thread number. INTRA_GROUP means this + is an expansion of a loop iterating over work-items within a separate + iteration over groups. */ + +static void +grid_expand_omp_for_loop (struct omp_region *kfor, bool intra_group) +{ + gimple_stmt_iterator gsi; + gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry)); + gcc_checking_assert (gimple_omp_for_kind (for_stmt) + == GF_OMP_FOR_KIND_GRID_LOOP); + size_t collapse = gimple_omp_for_collapse (for_stmt); + struct omp_for_data_loop *loops + = XALLOCAVEC (struct omp_for_data_loop, + gimple_omp_for_collapse (for_stmt)); + struct omp_for_data fd; + + remove_edge (BRANCH_EDGE (kfor->entry)); + basic_block body_bb = FALLTHRU_EDGE (kfor->entry)->dest; + + gcc_assert (kfor->cont); + omp_extract_for_data (for_stmt, &fd, loops); + + gsi = gsi_start_bb (body_bb); + + for (size_t dim = 0; dim < collapse; dim++) + { + tree type, itype; + itype = type = TREE_TYPE (fd.loops[dim].v); + if (POINTER_TYPE_P (type)) + itype = signed_type_for (type); + + tree n1 = fd.loops[dim].n1; + tree step = fd.loops[dim].step; + n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1), + true, NULL_TREE, true, GSI_SAME_STMT); + step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step), + true, NULL_TREE, true, GSI_SAME_STMT); + tree threadid; + if (gimple_omp_for_grid_group_iter (for_stmt)) + { + gcc_checking_assert (!intra_group); + threadid = build_call_expr (builtin_decl_explicit + (BUILT_IN_HSA_WORKGROUPID), 1, + build_int_cstu (unsigned_type_node, dim)); + } + else if (intra_group) + threadid = build_call_expr (builtin_decl_explicit + (BUILT_IN_HSA_WORKITEMID), 1, + build_int_cstu (unsigned_type_node, dim)); + else + threadid = build_call_expr (builtin_decl_explicit + (BUILT_IN_HSA_WORKITEMABSID), 1, + build_int_cstu (unsigned_type_node, dim)); + threadid = fold_convert (itype, threadid); + threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE, + true, GSI_SAME_STMT); + + tree startvar = fd.loops[dim].v; + tree t = fold_build2 (MULT_EXPR, itype, threadid, step); + if (POINTER_TYPE_P (type)) + t = fold_build_pointer_plus (n1, t); + else + t = fold_build2 (PLUS_EXPR, type, t, n1); + t = fold_convert (type, t); + t = force_gimple_operand_gsi (&gsi, t, + DECL_P (startvar) + && TREE_ADDRESSABLE (startvar), + NULL_TREE, true, GSI_SAME_STMT); + gassign *assign_stmt = gimple_build_assign (startvar, t); + gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); + } + /* Remove the omp for statement */ + gsi = gsi_last_bb (kfor->entry); + gsi_remove (&gsi, true); + + /* Remove the GIMPLE_OMP_CONTINUE statement. */ + gsi = gsi_last_bb (kfor->cont); + gcc_assert (!gsi_end_p (gsi) + && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_CONTINUE); + gsi_remove (&gsi, true); + + /* Replace the GIMPLE_OMP_RETURN with a barrier, if necessary. */ + gsi = gsi_last_bb (kfor->exit); + gcc_assert (!gsi_end_p (gsi) + && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN); + if (intra_group) + gsi_insert_before (&gsi, omp_build_barrier (NULL_TREE), GSI_SAME_STMT); + gsi_remove (&gsi, true); + + /* Fixup the much simpler CFG. */ + remove_edge (find_edge (kfor->cont, body_bb)); + + if (kfor->cont != body_bb) + set_immediate_dominator (CDI_DOMINATORS, kfor->cont, body_bb); + set_immediate_dominator (CDI_DOMINATORS, kfor->exit, kfor->cont); +} + +/* Structure passed to grid_remap_kernel_arg_accesses so that it can remap + argument_decls. */ + +struct grid_arg_decl_map +{ + tree old_arg; + tree new_arg; +}; + +/* Invoked through walk_gimple_op, will remap all PARM_DECLs to the ones + pertaining to kernel function. */ + +static tree +grid_remap_kernel_arg_accesses (tree *tp, int *walk_subtrees, void *data) +{ + struct walk_stmt_info *wi = (struct walk_stmt_info *) data; + struct grid_arg_decl_map *adm = (struct grid_arg_decl_map *) wi->info; + tree t = *tp; + + if (t == adm->old_arg) + *tp = adm->new_arg; + *walk_subtrees = !TYPE_P (t) && !DECL_P (t); + return NULL_TREE; +} + +/* If TARGET region contains a kernel body for loop, remove its region from the + TARGET and expand it in HSA gridified kernel fashion. */ + +static void +grid_expand_target_grid_body (struct omp_region *target) +{ + if (!hsa_gen_requested_p ()) + return; + + gomp_target *tgt_stmt = as_a <gomp_target *> (last_stmt (target->entry)); + struct omp_region **pp; + + for (pp = &target->inner; *pp; pp = &(*pp)->next) + if ((*pp)->type == GIMPLE_OMP_GRID_BODY) + break; + + struct omp_region *gpukernel = *pp; + + tree orig_child_fndecl = gimple_omp_target_child_fn (tgt_stmt); + if (!gpukernel) + { + /* HSA cannot handle OACC stuff. */ + if (gimple_omp_target_kind (tgt_stmt) != GF_OMP_TARGET_KIND_REGION) + return; + gcc_checking_assert (orig_child_fndecl); + gcc_assert (!omp_find_clause (gimple_omp_target_clauses (tgt_stmt), + OMP_CLAUSE__GRIDDIM_)); + cgraph_node *n = cgraph_node::get (orig_child_fndecl); + + hsa_register_kernel (n); + return; + } + + gcc_assert (omp_find_clause (gimple_omp_target_clauses (tgt_stmt), + OMP_CLAUSE__GRIDDIM_)); + tree inside_block = gimple_block (first_stmt (single_succ (gpukernel->entry))); + *pp = gpukernel->next; + for (pp = &gpukernel->inner; *pp; pp = &(*pp)->next) + if ((*pp)->type == GIMPLE_OMP_FOR) + break; + + struct omp_region *kfor = *pp; + gcc_assert (kfor); + gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry)); + gcc_assert (gimple_omp_for_kind (for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP); + *pp = kfor->next; + if (kfor->inner) + { + if (gimple_omp_for_grid_group_iter (for_stmt)) + { + struct omp_region **next_pp; + for (pp = &kfor->inner; *pp; pp = next_pp) + { + next_pp = &(*pp)->next; + if ((*pp)->type != GIMPLE_OMP_FOR) + continue; + gomp_for *inner = as_a <gomp_for *> (last_stmt ((*pp)->entry)); + gcc_assert (gimple_omp_for_kind (inner) + == GF_OMP_FOR_KIND_GRID_LOOP); + grid_expand_omp_for_loop (*pp, true); + *pp = (*pp)->next; + next_pp = pp; + } + } + expand_omp (kfor->inner); + } + if (gpukernel->inner) + expand_omp (gpukernel->inner); + + tree kern_fndecl = copy_node (orig_child_fndecl); + DECL_NAME (kern_fndecl) = clone_function_name (kern_fndecl, "kernel"); + SET_DECL_ASSEMBLER_NAME (kern_fndecl, DECL_NAME (kern_fndecl)); + tree tgtblock = gimple_block (tgt_stmt); + tree fniniblock = make_node (BLOCK); + BLOCK_ABSTRACT_ORIGIN (fniniblock) = tgtblock; + BLOCK_SOURCE_LOCATION (fniniblock) = BLOCK_SOURCE_LOCATION (tgtblock); + BLOCK_SOURCE_END_LOCATION (fniniblock) = BLOCK_SOURCE_END_LOCATION (tgtblock); + BLOCK_SUPERCONTEXT (fniniblock) = kern_fndecl; + DECL_INITIAL (kern_fndecl) = fniniblock; + push_struct_function (kern_fndecl); + cfun->function_end_locus = gimple_location (tgt_stmt); + init_tree_ssa (cfun); + pop_cfun (); + + tree old_parm_decl = DECL_ARGUMENTS (kern_fndecl); + gcc_assert (!DECL_CHAIN (old_parm_decl)); + tree new_parm_decl = copy_node (DECL_ARGUMENTS (kern_fndecl)); + DECL_CONTEXT (new_parm_decl) = kern_fndecl; + DECL_ARGUMENTS (kern_fndecl) = new_parm_decl; + gcc_assert (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (kern_fndecl)))); + DECL_RESULT (kern_fndecl) = copy_node (DECL_RESULT (kern_fndecl)); + DECL_CONTEXT (DECL_RESULT (kern_fndecl)) = kern_fndecl; + struct function *kern_cfun = DECL_STRUCT_FUNCTION (kern_fndecl); + kern_cfun->curr_properties = cfun->curr_properties; + + grid_expand_omp_for_loop (kfor, false); + + /* Remove the omp for statement */ + gimple_stmt_iterator gsi = gsi_last_bb (gpukernel->entry); + gsi_remove (&gsi, true); + /* Replace the GIMPLE_OMP_RETURN at the end of the kernel region with a real + return. */ + gsi = gsi_last_bb (gpukernel->exit); + gcc_assert (!gsi_end_p (gsi) + && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN); + gimple *ret_stmt = gimple_build_return (NULL); + gsi_insert_after (&gsi, ret_stmt, GSI_SAME_STMT); + gsi_remove (&gsi, true); + + /* Statements in the first BB in the target construct have been produced by + target lowering and must be copied inside the GPUKERNEL, with the two + exceptions of the first OMP statement and the OMP_DATA assignment + statement. */ + gsi = gsi_start_bb (single_succ (gpukernel->entry)); + tree data_arg = gimple_omp_target_data_arg (tgt_stmt); + tree sender = data_arg ? TREE_VEC_ELT (data_arg, 0) : NULL; + for (gimple_stmt_iterator tsi = gsi_start_bb (single_succ (target->entry)); + !gsi_end_p (tsi); gsi_next (&tsi)) + { + gimple *stmt = gsi_stmt (tsi); + if (is_gimple_omp (stmt)) + break; + if (sender + && is_gimple_assign (stmt) + && TREE_CODE (gimple_assign_rhs1 (stmt)) == ADDR_EXPR + && TREE_OPERAND (gimple_assign_rhs1 (stmt), 0) == sender) + continue; + gimple *copy = gimple_copy (stmt); + gsi_insert_before (&gsi, copy, GSI_SAME_STMT); + gimple_set_block (copy, fniniblock); + } + + move_sese_region_to_fn (kern_cfun, single_succ (gpukernel->entry), + gpukernel->exit, inside_block); + + cgraph_node *kcn = cgraph_node::get_create (kern_fndecl); + kcn->mark_force_output (); + cgraph_node *orig_child = cgraph_node::get (orig_child_fndecl); + + hsa_register_kernel (kcn, orig_child); + + cgraph_node::add_new_function (kern_fndecl, true); + push_cfun (kern_cfun); + cgraph_edge::rebuild_edges (); + + /* Re-map any mention of the PARM_DECL of the original function to the + PARM_DECL of the new one. + + TODO: It would be great if lowering produced references into the GPU + kernel decl straight away and we did not have to do this. */ + struct grid_arg_decl_map adm; + adm.old_arg = old_parm_decl; + adm.new_arg = new_parm_decl; + basic_block bb; + FOR_EACH_BB_FN (bb, kern_cfun) + { + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple *stmt = gsi_stmt (gsi); + struct walk_stmt_info wi; + memset (&wi, 0, sizeof (wi)); + wi.info = &adm; + walk_gimple_op (stmt, grid_remap_kernel_arg_accesses, &wi); + } + } + pop_cfun (); + + return; +} + +/* Expand the parallel region tree rooted at REGION. Expansion + proceeds in depth-first order. Innermost regions are expanded + first. This way, parallel regions that require a new function to + be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any + internal dependencies in their body. */ + +static void +expand_omp (struct omp_region *region) +{ + omp_any_child_fn_dumped = false; + while (region) + { + location_t saved_location; + gimple *inner_stmt = NULL; + + /* First, determine whether this is a combined parallel+workshare + region. */ + if (region->type == GIMPLE_OMP_PARALLEL) + determine_parallel_type (region); + else if (region->type == GIMPLE_OMP_TARGET) + grid_expand_target_grid_body (region); + + if (region->type == GIMPLE_OMP_FOR + && gimple_omp_for_combined_p (last_stmt (region->entry))) + inner_stmt = last_stmt (region->inner->entry); + + if (region->inner) + expand_omp (region->inner); + + saved_location = input_location; + if (gimple_has_location (last_stmt (region->entry))) + input_location = gimple_location (last_stmt (region->entry)); + + switch (region->type) + { + case GIMPLE_OMP_PARALLEL: + case GIMPLE_OMP_TASK: + expand_omp_taskreg (region); + break; + + case GIMPLE_OMP_FOR: + expand_omp_for (region, inner_stmt); + break; + + case GIMPLE_OMP_SECTIONS: + expand_omp_sections (region); + break; + + case GIMPLE_OMP_SECTION: + /* Individual omp sections are handled together with their + parent GIMPLE_OMP_SECTIONS region. */ + break; + + case GIMPLE_OMP_SINGLE: + expand_omp_single (region); + break; + + case GIMPLE_OMP_ORDERED: + { + gomp_ordered *ord_stmt + = as_a <gomp_ordered *> (last_stmt (region->entry)); + if (omp_find_clause (gimple_omp_ordered_clauses (ord_stmt), + OMP_CLAUSE_DEPEND)) + { + /* We'll expand these when expanding corresponding + worksharing region with ordered(n) clause. */ + gcc_assert (region->outer + && region->outer->type == GIMPLE_OMP_FOR); + region->ord_stmt = ord_stmt; + break; + } + } + /* FALLTHRU */ + case GIMPLE_OMP_MASTER: + case GIMPLE_OMP_TASKGROUP: + case GIMPLE_OMP_CRITICAL: + case GIMPLE_OMP_TEAMS: + expand_omp_synch (region); + break; + + case GIMPLE_OMP_ATOMIC_LOAD: + expand_omp_atomic (region); + break; + + case GIMPLE_OMP_TARGET: + expand_omp_target (region); + break; + + default: + gcc_unreachable (); + } + + input_location = saved_location; + region = region->next; + } + if (omp_any_child_fn_dumped) + { + if (dump_file) + dump_function_header (dump_file, current_function_decl, dump_flags); + omp_any_child_fn_dumped = false; + } +} + +/* Helper for build_omp_regions. Scan the dominator tree starting at + block BB. PARENT is the region that contains BB. If SINGLE_TREE is + true, the function ends once a single tree is built (otherwise, whole + forest of OMP constructs may be built). */ + +static void +build_omp_regions_1 (basic_block bb, struct omp_region *parent, + bool single_tree) +{ + gimple_stmt_iterator gsi; + gimple *stmt; + basic_block son; + + gsi = gsi_last_bb (bb); + if (!gsi_end_p (gsi) && is_gimple_omp (gsi_stmt (gsi))) + { + struct omp_region *region; + enum gimple_code code; + + stmt = gsi_stmt (gsi); + code = gimple_code (stmt); + if (code == GIMPLE_OMP_RETURN) + { + /* STMT is the return point out of region PARENT. Mark it + as the exit point and make PARENT the immediately + enclosing region. */ + gcc_assert (parent); + region = parent; + region->exit = bb; + parent = parent->outer; + } + else if (code == GIMPLE_OMP_ATOMIC_STORE) + { + /* GIMPLE_OMP_ATOMIC_STORE is analoguous to + GIMPLE_OMP_RETURN, but matches with + GIMPLE_OMP_ATOMIC_LOAD. */ + gcc_assert (parent); + gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD); + region = parent; + region->exit = bb; + parent = parent->outer; + } + else if (code == GIMPLE_OMP_CONTINUE) + { + gcc_assert (parent); + parent->cont = bb; + } + else if (code == GIMPLE_OMP_SECTIONS_SWITCH) + { + /* GIMPLE_OMP_SECTIONS_SWITCH is part of + GIMPLE_OMP_SECTIONS, and we do nothing for it. */ + } + else + { + region = new_omp_region (bb, code, parent); + /* Otherwise... */ + if (code == GIMPLE_OMP_TARGET) + { + switch (gimple_omp_target_kind (stmt)) + { + case GF_OMP_TARGET_KIND_REGION: + case GF_OMP_TARGET_KIND_DATA: + case GF_OMP_TARGET_KIND_OACC_PARALLEL: + case GF_OMP_TARGET_KIND_OACC_KERNELS: + case GF_OMP_TARGET_KIND_OACC_DATA: + case GF_OMP_TARGET_KIND_OACC_HOST_DATA: + break; + case GF_OMP_TARGET_KIND_UPDATE: + case GF_OMP_TARGET_KIND_ENTER_DATA: + case GF_OMP_TARGET_KIND_EXIT_DATA: + case GF_OMP_TARGET_KIND_OACC_UPDATE: + case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA: + case GF_OMP_TARGET_KIND_OACC_DECLARE: + /* ..., other than for those stand-alone directives... */ + region = NULL; + break; + default: + gcc_unreachable (); + } + } + else if (code == GIMPLE_OMP_ORDERED + && omp_find_clause (gimple_omp_ordered_clauses + (as_a <gomp_ordered *> (stmt)), + OMP_CLAUSE_DEPEND)) + /* #pragma omp ordered depend is also just a stand-alone + directive. */ + region = NULL; + /* ..., this directive becomes the parent for a new region. */ + if (region) + parent = region; + } + } + + if (single_tree && !parent) + return; + + for (son = first_dom_son (CDI_DOMINATORS, bb); + son; + son = next_dom_son (CDI_DOMINATORS, son)) + build_omp_regions_1 (son, parent, single_tree); +} + +/* Builds the tree of OMP regions rooted at ROOT, storing it to + root_omp_region. */ + +static void +build_omp_regions_root (basic_block root) +{ + gcc_assert (root_omp_region == NULL); + build_omp_regions_1 (root, NULL, true); + gcc_assert (root_omp_region != NULL); +} + +/* Expands omp construct (and its subconstructs) starting in HEAD. */ + +void +omp_expand_local (basic_block head) +{ + build_omp_regions_root (head); + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "\nOMP region tree\n\n"); + dump_omp_region (dump_file, root_omp_region, 0); + fprintf (dump_file, "\n"); + } + + remove_exit_barriers (root_omp_region); + expand_omp (root_omp_region); + + omp_free_regions (); +} + +/* Scan the CFG and build a tree of OMP regions. Return the root of + the OMP region tree. */ + +static void +build_omp_regions (void) +{ + gcc_assert (root_omp_region == NULL); + calculate_dominance_info (CDI_DOMINATORS); + build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false); +} + +/* Main entry point for expanding OMP-GIMPLE into runtime calls. */ + +static unsigned int +execute_expand_omp (void) +{ + build_omp_regions (); + + if (!root_omp_region) + return 0; + + if (dump_file) + { + fprintf (dump_file, "\nOMP region tree\n\n"); + dump_omp_region (dump_file, root_omp_region, 0); + fprintf (dump_file, "\n"); + } + + remove_exit_barriers (root_omp_region); + + expand_omp (root_omp_region); + + if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP)) + verify_loop_structure (); + cleanup_tree_cfg (); + + omp_free_regions (); + + return 0; +} + +/* OMP expansion -- the default pass, run before creation of SSA form. */ + +namespace { + +const pass_data pass_data_expand_omp = +{ + GIMPLE_PASS, /* type */ + "ompexp", /* name */ + OPTGROUP_OPENMP, /* optinfo_flags */ + TV_NONE, /* tv_id */ + PROP_gimple_any, /* properties_required */ + PROP_gimple_eomp, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + 0, /* todo_flags_finish */ +}; + +class pass_expand_omp : public gimple_opt_pass +{ +public: + pass_expand_omp (gcc::context *ctxt) + : gimple_opt_pass (pass_data_expand_omp, ctxt) + {} + + /* opt_pass methods: */ + virtual unsigned int execute (function *) + { + bool gate = ((flag_cilkplus != 0 || flag_openacc != 0 || flag_openmp != 0 + || flag_openmp_simd != 0) + && !seen_error ()); + + /* This pass always runs, to provide PROP_gimple_eomp. + But often, there is nothing to do. */ + if (!gate) + return 0; + + return execute_expand_omp (); + } + +}; // class pass_expand_omp + +} // anon namespace + +gimple_opt_pass * +make_pass_expand_omp (gcc::context *ctxt) +{ + return new pass_expand_omp (ctxt); +} + +namespace { + +const pass_data pass_data_expand_omp_ssa = +{ + GIMPLE_PASS, /* type */ + "ompexpssa", /* name */ + OPTGROUP_OPENMP, /* optinfo_flags */ + TV_NONE, /* tv_id */ + PROP_cfg | PROP_ssa, /* properties_required */ + PROP_gimple_eomp, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */ +}; + +class pass_expand_omp_ssa : public gimple_opt_pass +{ +public: + pass_expand_omp_ssa (gcc::context *ctxt) + : gimple_opt_pass (pass_data_expand_omp_ssa, ctxt) + {} + + /* opt_pass methods: */ + virtual bool gate (function *fun) + { + return !(fun->curr_properties & PROP_gimple_eomp); + } + virtual unsigned int execute (function *) { return execute_expand_omp (); } + opt_pass * clone () { return new pass_expand_omp_ssa (m_ctxt); } + +}; // class pass_expand_omp_ssa + +} // anon namespace + +gimple_opt_pass * +make_pass_expand_omp_ssa (gcc::context *ctxt) +{ + return new pass_expand_omp_ssa (ctxt); +} + +/* Called from tree-cfg.c::make_edges to create cfg edges for all relevant + GIMPLE_* codes. */ + +bool +omp_make_gimple_edges (basic_block bb, struct omp_region **region, + int *region_idx) +{ + gimple *last = last_stmt (bb); + enum gimple_code code = gimple_code (last); + struct omp_region *cur_region = *region; + bool fallthru = false; + + switch (code) + { + case GIMPLE_OMP_PARALLEL: + case GIMPLE_OMP_TASK: + case GIMPLE_OMP_FOR: + case GIMPLE_OMP_SINGLE: + case GIMPLE_OMP_TEAMS: + case GIMPLE_OMP_MASTER: + case GIMPLE_OMP_TASKGROUP: + case GIMPLE_OMP_CRITICAL: + case GIMPLE_OMP_SECTION: + case GIMPLE_OMP_GRID_BODY: + cur_region = new_omp_region (bb, code, cur_region); + fallthru = true; + break; + + case GIMPLE_OMP_ORDERED: + cur_region = new_omp_region (bb, code, cur_region); + fallthru = true; + if (omp_find_clause (gimple_omp_ordered_clauses + (as_a <gomp_ordered *> (last)), + OMP_CLAUSE_DEPEND)) + cur_region = cur_region->outer; + break; + + case GIMPLE_OMP_TARGET: + cur_region = new_omp_region (bb, code, cur_region); + fallthru = true; + switch (gimple_omp_target_kind (last)) + { + case GF_OMP_TARGET_KIND_REGION: + case GF_OMP_TARGET_KIND_DATA: + case GF_OMP_TARGET_KIND_OACC_PARALLEL: + case GF_OMP_TARGET_KIND_OACC_KERNELS: + case GF_OMP_TARGET_KIND_OACC_DATA: + case GF_OMP_TARGET_KIND_OACC_HOST_DATA: + break; + case GF_OMP_TARGET_KIND_UPDATE: + case GF_OMP_TARGET_KIND_ENTER_DATA: + case GF_OMP_TARGET_KIND_EXIT_DATA: + case GF_OMP_TARGET_KIND_OACC_UPDATE: + case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA: + case GF_OMP_TARGET_KIND_OACC_DECLARE: + cur_region = cur_region->outer; + break; + default: + gcc_unreachable (); + } + break; + + case GIMPLE_OMP_SECTIONS: + cur_region = new_omp_region (bb, code, cur_region); + fallthru = true; + break; + + case GIMPLE_OMP_SECTIONS_SWITCH: + fallthru = false; + break; + + case GIMPLE_OMP_ATOMIC_LOAD: + case GIMPLE_OMP_ATOMIC_STORE: + fallthru = true; + break; + + case GIMPLE_OMP_RETURN: + /* In the case of a GIMPLE_OMP_SECTION, the edge will go + somewhere other than the next block. This will be + created later. */ + cur_region->exit = bb; + if (cur_region->type == GIMPLE_OMP_TASK) + /* Add an edge corresponding to not scheduling the task + immediately. */ + make_edge (cur_region->entry, bb, EDGE_ABNORMAL); + fallthru = cur_region->type != GIMPLE_OMP_SECTION; + cur_region = cur_region->outer; + break; + + case GIMPLE_OMP_CONTINUE: + cur_region->cont = bb; + switch (cur_region->type) + { + case GIMPLE_OMP_FOR: + /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE + succs edges as abnormal to prevent splitting + them. */ + single_succ_edge (cur_region->entry)->flags |= EDGE_ABNORMAL; + /* Make the loopback edge. */ + make_edge (bb, single_succ (cur_region->entry), + EDGE_ABNORMAL); + + /* Create an edge from GIMPLE_OMP_FOR to exit, which + corresponds to the case that the body of the loop + is not executed at all. */ + make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL); + make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL); + fallthru = false; + break; + + case GIMPLE_OMP_SECTIONS: + /* Wire up the edges into and out of the nested sections. */ + { + basic_block switch_bb = single_succ (cur_region->entry); + + struct omp_region *i; + for (i = cur_region->inner; i ; i = i->next) + { + gcc_assert (i->type == GIMPLE_OMP_SECTION); + make_edge (switch_bb, i->entry, 0); + make_edge (i->exit, bb, EDGE_FALLTHRU); + } + + /* Make the loopback edge to the block with + GIMPLE_OMP_SECTIONS_SWITCH. */ + make_edge (bb, switch_bb, 0); + + /* Make the edge from the switch to exit. */ + make_edge (switch_bb, bb->next_bb, 0); + fallthru = false; + } + break; + + case GIMPLE_OMP_TASK: + fallthru = true; + break; + + default: + gcc_unreachable (); + } + break; + + default: + gcc_unreachable (); + } + + if (*region != cur_region) + { + *region = cur_region; + if (cur_region) + *region_idx = cur_region->entry->index; + else + *region_idx = 0; + } + + return fallthru; +} + +#include "gt-omp-expand.h" |