diff options
Diffstat (limited to 'gcc/omp-low.c')
-rw-r--r-- | gcc/omp-low.c | 12091 |
1 files changed, 101 insertions, 11990 deletions
diff --git a/gcc/omp-low.c b/gcc/omp-low.c index 7bcaeee..4fb59eb40 100644 --- a/gcc/omp-low.c +++ b/gcc/omp-low.c @@ -25,26 +25,17 @@ along with GCC; see the file COPYING3. If not see #include "config.h" #include "system.h" #include "coretypes.h" -#include "memmodel.h" #include "backend.h" #include "target.h" -#include "rtl.h" #include "tree.h" #include "gimple.h" -#include "cfghooks.h" -#include "alloc-pool.h" #include "tree-pass.h" #include "ssa.h" -#include "expmed.h" -#include "optabs.h" -#include "emit-rtl.h" #include "cgraph.h" #include "pretty-print.h" #include "diagnostic-core.h" -#include "alias.h" #include "fold-const.h" #include "stor-layout.h" -#include "cfganal.h" #include "internal-fn.h" #include "gimple-fold.h" #include "gimplify.h" @@ -54,36 +45,19 @@ along with GCC; see the file COPYING3. If not see #include "tree-iterator.h" #include "tree-inline.h" #include "langhooks.h" -#include "tree-cfg.h" -#include "tree-into-ssa.h" -#include "flags.h" -#include "dojump.h" -#include "explow.h" -#include "calls.h" -#include "varasm.h" -#include "stmt.h" -#include "expr.h" #include "tree-dfa.h" #include "tree-ssa.h" -#include "except.h" #include "splay-tree.h" -#include "cfgloop.h" -#include "common/common-target.h" +#include "omp-general.h" #include "omp-low.h" +#include "omp-grid.h" #include "gimple-low.h" -#include "tree-cfgcleanup.h" #include "symbol-summary.h" -#include "ipa-prop.h" #include "tree-nested.h" -#include "tree-eh.h" -#include "cilk.h" #include "context.h" -#include "lto-section-names.h" #include "gomp-constants.h" #include "gimple-pretty-print.h" -#include "symbol-summary.h" #include "hsa.h" -#include "params.h" /* Lowering of OMP parallel and workshare constructs proceeds in two phases. The first phase scans the function looking for OMP statements @@ -96,52 +70,6 @@ along with GCC; see the file COPYING3. If not see scanned for regions which are then moved to a new function, to be invoked by the thread library, or offloaded. */ -/* OMP region information. Every parallel and workshare - directive is enclosed between two markers, the OMP_* directive - and a corresponding GIMPLE_OMP_RETURN statement. */ - -struct omp_region -{ - /* The enclosing region. */ - struct omp_region *outer; - - /* First child region. */ - struct omp_region *inner; - - /* Next peer region. */ - struct omp_region *next; - - /* Block containing the omp directive as its last stmt. */ - basic_block entry; - - /* Block containing the GIMPLE_OMP_RETURN as its last stmt. */ - basic_block exit; - - /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt. */ - basic_block cont; - - /* If this is a combined parallel+workshare region, this is a list - of additional arguments needed by the combined parallel+workshare - library call. */ - vec<tree, va_gc> *ws_args; - - /* The code for the omp directive of this region. */ - enum gimple_code type; - - /* Schedule kind, only used for GIMPLE_OMP_FOR type regions. */ - enum omp_clause_schedule_kind sched_kind; - - /* Schedule modifiers. */ - unsigned char sched_modifiers; - - /* True if this is a combined parallel+workshare region. */ - bool is_combined_parallel; - - /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has - a depend clause. */ - gomp_ordered *ord_stmt; -}; - /* Context structure. Used to store information about each parallel directive in the code. */ @@ -196,89 +124,14 @@ struct omp_context bool cancellable; }; -/* A structure holding the elements of: - for (V = N1; V cond N2; V += STEP) [...] */ - -struct omp_for_data_loop -{ - tree v, n1, n2, step; - enum tree_code cond_code; -}; - -/* A structure describing the main elements of a parallel loop. */ - -struct omp_for_data -{ - struct omp_for_data_loop loop; - tree chunk_size; - gomp_for *for_stmt; - tree pre, iter_type; - int collapse; - int ordered; - bool have_nowait, have_ordered, simd_schedule; - unsigned char sched_modifiers; - enum omp_clause_schedule_kind sched_kind; - struct omp_for_data_loop *loops; -}; - -/* Describe the OpenACC looping structure of a function. The entire - function is held in a 'NULL' loop. */ - -struct oacc_loop -{ - oacc_loop *parent; /* Containing loop. */ - - oacc_loop *child; /* First inner loop. */ - - oacc_loop *sibling; /* Next loop within same parent. */ - - location_t loc; /* Location of the loop start. */ - - gcall *marker; /* Initial head marker. */ - - gcall *heads[GOMP_DIM_MAX]; /* Head marker functions. */ - gcall *tails[GOMP_DIM_MAX]; /* Tail marker functions. */ - - tree routine; /* Pseudo-loop enclosing a routine. */ - - unsigned mask; /* Partitioning mask. */ - unsigned inner; /* Partitioning of inner loops. */ - unsigned flags; /* Partitioning flags. */ - unsigned ifns; /* Contained loop abstraction functions. */ - tree chunk_size; /* Chunk size. */ - gcall *head_end; /* Final marker of head sequence. */ -}; - -/* Flags for an OpenACC loop. */ - -enum oacc_loop_flags { - OLF_SEQ = 1u << 0, /* Explicitly sequential */ - OLF_AUTO = 1u << 1, /* Compiler chooses axes. */ - OLF_INDEPENDENT = 1u << 2, /* Iterations are known independent. */ - OLF_GANG_STATIC = 1u << 3, /* Gang partitioning is static (has op). */ - - /* Explicitly specified loop axes. */ - OLF_DIM_BASE = 4, - OLF_DIM_GANG = 1u << (OLF_DIM_BASE + GOMP_DIM_GANG), - OLF_DIM_WORKER = 1u << (OLF_DIM_BASE + GOMP_DIM_WORKER), - OLF_DIM_VECTOR = 1u << (OLF_DIM_BASE + GOMP_DIM_VECTOR), - - OLF_MAX = OLF_DIM_BASE + GOMP_DIM_MAX -}; - - static splay_tree all_contexts; static int taskreg_nesting_level; static int target_nesting_level; -static struct omp_region *root_omp_region; static bitmap task_shared_vars; static vec<omp_context *> taskreg_contexts; -static bool omp_any_child_fn_dumped; static void scan_omp (gimple_seq *, omp_context *); static tree scan_omp_1_op (tree *, int *, void *); -static gphi *find_phi_with_arg_on_edge (tree, edge); -static int omp_max_simt_vf (void); #define WALK_SUBSTMTS \ case GIMPLE_BIND: \ @@ -379,9 +232,6 @@ unshare_and_remap (tree x, tree from, tree to) return x; } -/* Holds offload tables with decls. */ -vec<tree, va_gc> *offload_funcs, *offload_vars; - /* Convenience function for calling scan_omp_1_op on tree operands. */ static inline tree @@ -400,18 +250,6 @@ static void lower_omp (gimple_seq *, omp_context *); static tree lookup_decl_in_outer_ctx (tree, omp_context *); static tree maybe_lookup_decl_in_outer_ctx (tree, omp_context *); -/* Find an OMP clause of type KIND within CLAUSES. */ - -tree -find_omp_clause (tree clauses, enum omp_clause_code kind) -{ - for (; clauses ; clauses = OMP_CLAUSE_CHAIN (clauses)) - if (OMP_CLAUSE_CODE (clauses) == kind) - return clauses; - - return NULL_TREE; -} - /* Return true if CTX is for an omp parallel. */ static inline bool @@ -448,592 +286,6 @@ is_taskreg_ctx (omp_context *ctx) return is_parallel_ctx (ctx) || is_task_ctx (ctx); } - -/* Return true if REGION is a combined parallel+workshare region. */ - -static inline bool -is_combined_parallel (struct omp_region *region) -{ - return region->is_combined_parallel; -} - -/* Adjust *COND_CODE and *N2 so that the former is either LT_EXPR or - GT_EXPR. */ - -static void -adjust_for_condition (location_t loc, enum tree_code *cond_code, tree *n2) -{ - switch (*cond_code) - { - case LT_EXPR: - case GT_EXPR: - case NE_EXPR: - break; - case LE_EXPR: - if (POINTER_TYPE_P (TREE_TYPE (*n2))) - *n2 = fold_build_pointer_plus_hwi_loc (loc, *n2, 1); - else - *n2 = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (*n2), *n2, - build_int_cst (TREE_TYPE (*n2), 1)); - *cond_code = LT_EXPR; - break; - case GE_EXPR: - if (POINTER_TYPE_P (TREE_TYPE (*n2))) - *n2 = fold_build_pointer_plus_hwi_loc (loc, *n2, -1); - else - *n2 = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (*n2), *n2, - build_int_cst (TREE_TYPE (*n2), 1)); - *cond_code = GT_EXPR; - break; - default: - gcc_unreachable (); - } -} - -/* Return the looping step from INCR, extracted from the step of a gimple omp - for statement. */ - -static tree -get_omp_for_step_from_incr (location_t loc, tree incr) -{ - tree step; - switch (TREE_CODE (incr)) - { - case PLUS_EXPR: - step = TREE_OPERAND (incr, 1); - break; - case POINTER_PLUS_EXPR: - step = fold_convert (ssizetype, TREE_OPERAND (incr, 1)); - break; - case MINUS_EXPR: - step = TREE_OPERAND (incr, 1); - step = fold_build1_loc (loc, NEGATE_EXPR, TREE_TYPE (step), step); - break; - default: - gcc_unreachable (); - } - return step; -} - -/* Extract the header elements of parallel loop FOR_STMT and store - them into *FD. */ - -static void -extract_omp_for_data (gomp_for *for_stmt, struct omp_for_data *fd, - struct omp_for_data_loop *loops) -{ - tree t, var, *collapse_iter, *collapse_count; - tree count = NULL_TREE, iter_type = long_integer_type_node; - struct omp_for_data_loop *loop; - int i; - struct omp_for_data_loop dummy_loop; - location_t loc = gimple_location (for_stmt); - bool simd = gimple_omp_for_kind (for_stmt) & GF_OMP_FOR_SIMD; - bool distribute = gimple_omp_for_kind (for_stmt) - == GF_OMP_FOR_KIND_DISTRIBUTE; - bool taskloop = gimple_omp_for_kind (for_stmt) - == GF_OMP_FOR_KIND_TASKLOOP; - tree iterv, countv; - - fd->for_stmt = for_stmt; - fd->pre = NULL; - if (gimple_omp_for_collapse (for_stmt) > 1) - fd->loops = loops; - else - fd->loops = &fd->loop; - - fd->have_nowait = distribute || simd; - fd->have_ordered = false; - fd->collapse = 1; - fd->ordered = 0; - fd->sched_kind = OMP_CLAUSE_SCHEDULE_STATIC; - fd->sched_modifiers = 0; - fd->chunk_size = NULL_TREE; - fd->simd_schedule = false; - if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_CILKFOR) - fd->sched_kind = OMP_CLAUSE_SCHEDULE_CILKFOR; - collapse_iter = NULL; - collapse_count = NULL; - - for (t = gimple_omp_for_clauses (for_stmt); t ; t = OMP_CLAUSE_CHAIN (t)) - switch (OMP_CLAUSE_CODE (t)) - { - case OMP_CLAUSE_NOWAIT: - fd->have_nowait = true; - break; - case OMP_CLAUSE_ORDERED: - fd->have_ordered = true; - if (OMP_CLAUSE_ORDERED_EXPR (t)) - fd->ordered = tree_to_shwi (OMP_CLAUSE_ORDERED_EXPR (t)); - break; - case OMP_CLAUSE_SCHEDULE: - gcc_assert (!distribute && !taskloop); - fd->sched_kind - = (enum omp_clause_schedule_kind) - (OMP_CLAUSE_SCHEDULE_KIND (t) & OMP_CLAUSE_SCHEDULE_MASK); - fd->sched_modifiers = (OMP_CLAUSE_SCHEDULE_KIND (t) - & ~OMP_CLAUSE_SCHEDULE_MASK); - fd->chunk_size = OMP_CLAUSE_SCHEDULE_CHUNK_EXPR (t); - fd->simd_schedule = OMP_CLAUSE_SCHEDULE_SIMD (t); - break; - case OMP_CLAUSE_DIST_SCHEDULE: - gcc_assert (distribute); - fd->chunk_size = OMP_CLAUSE_DIST_SCHEDULE_CHUNK_EXPR (t); - break; - case OMP_CLAUSE_COLLAPSE: - fd->collapse = tree_to_shwi (OMP_CLAUSE_COLLAPSE_EXPR (t)); - if (fd->collapse > 1) - { - collapse_iter = &OMP_CLAUSE_COLLAPSE_ITERVAR (t); - collapse_count = &OMP_CLAUSE_COLLAPSE_COUNT (t); - } - break; - default: - break; - } - if (fd->ordered && fd->collapse == 1 && loops != NULL) - { - fd->loops = loops; - iterv = NULL_TREE; - countv = NULL_TREE; - collapse_iter = &iterv; - collapse_count = &countv; - } - - /* FIXME: for now map schedule(auto) to schedule(static). - There should be analysis to determine whether all iterations - are approximately the same amount of work (then schedule(static) - is best) or if it varies (then schedule(dynamic,N) is better). */ - if (fd->sched_kind == OMP_CLAUSE_SCHEDULE_AUTO) - { - fd->sched_kind = OMP_CLAUSE_SCHEDULE_STATIC; - gcc_assert (fd->chunk_size == NULL); - } - gcc_assert (fd->collapse == 1 || collapse_iter != NULL); - if (taskloop) - fd->sched_kind = OMP_CLAUSE_SCHEDULE_RUNTIME; - if (fd->sched_kind == OMP_CLAUSE_SCHEDULE_RUNTIME) - gcc_assert (fd->chunk_size == NULL); - else if (fd->chunk_size == NULL) - { - /* We only need to compute a default chunk size for ordered - static loops and dynamic loops. */ - if (fd->sched_kind != OMP_CLAUSE_SCHEDULE_STATIC - || fd->have_ordered) - fd->chunk_size = (fd->sched_kind == OMP_CLAUSE_SCHEDULE_STATIC) - ? integer_zero_node : integer_one_node; - } - - int cnt = fd->ordered ? fd->ordered : fd->collapse; - for (i = 0; i < cnt; i++) - { - if (i == 0 && fd->collapse == 1 && (fd->ordered == 0 || loops == NULL)) - loop = &fd->loop; - else if (loops != NULL) - loop = loops + i; - else - loop = &dummy_loop; - - loop->v = gimple_omp_for_index (for_stmt, i); - gcc_assert (SSA_VAR_P (loop->v)); - gcc_assert (TREE_CODE (TREE_TYPE (loop->v)) == INTEGER_TYPE - || TREE_CODE (TREE_TYPE (loop->v)) == POINTER_TYPE); - var = TREE_CODE (loop->v) == SSA_NAME ? SSA_NAME_VAR (loop->v) : loop->v; - loop->n1 = gimple_omp_for_initial (for_stmt, i); - - loop->cond_code = gimple_omp_for_cond (for_stmt, i); - loop->n2 = gimple_omp_for_final (for_stmt, i); - gcc_assert (loop->cond_code != NE_EXPR - || gimple_omp_for_kind (for_stmt) == GF_OMP_FOR_KIND_CILKSIMD - || gimple_omp_for_kind (for_stmt) == GF_OMP_FOR_KIND_CILKFOR); - adjust_for_condition (loc, &loop->cond_code, &loop->n2); - - t = gimple_omp_for_incr (for_stmt, i); - gcc_assert (TREE_OPERAND (t, 0) == var); - loop->step = get_omp_for_step_from_incr (loc, t); - - if (simd - || (fd->sched_kind == OMP_CLAUSE_SCHEDULE_STATIC - && !fd->have_ordered)) - { - if (fd->collapse == 1) - iter_type = TREE_TYPE (loop->v); - else if (i == 0 - || TYPE_PRECISION (iter_type) - < TYPE_PRECISION (TREE_TYPE (loop->v))) - iter_type - = build_nonstandard_integer_type - (TYPE_PRECISION (TREE_TYPE (loop->v)), 1); - } - else if (iter_type != long_long_unsigned_type_node) - { - if (POINTER_TYPE_P (TREE_TYPE (loop->v))) - iter_type = long_long_unsigned_type_node; - else if (TYPE_UNSIGNED (TREE_TYPE (loop->v)) - && TYPE_PRECISION (TREE_TYPE (loop->v)) - >= TYPE_PRECISION (iter_type)) - { - tree n; - - if (loop->cond_code == LT_EXPR) - n = fold_build2_loc (loc, - PLUS_EXPR, TREE_TYPE (loop->v), - loop->n2, loop->step); - else - n = loop->n1; - if (TREE_CODE (n) != INTEGER_CST - || tree_int_cst_lt (TYPE_MAX_VALUE (iter_type), n)) - iter_type = long_long_unsigned_type_node; - } - else if (TYPE_PRECISION (TREE_TYPE (loop->v)) - > TYPE_PRECISION (iter_type)) - { - tree n1, n2; - - if (loop->cond_code == LT_EXPR) - { - n1 = loop->n1; - n2 = fold_build2_loc (loc, - PLUS_EXPR, TREE_TYPE (loop->v), - loop->n2, loop->step); - } - else - { - n1 = fold_build2_loc (loc, - MINUS_EXPR, TREE_TYPE (loop->v), - loop->n2, loop->step); - n2 = loop->n1; - } - if (TREE_CODE (n1) != INTEGER_CST - || TREE_CODE (n2) != INTEGER_CST - || !tree_int_cst_lt (TYPE_MIN_VALUE (iter_type), n1) - || !tree_int_cst_lt (n2, TYPE_MAX_VALUE (iter_type))) - iter_type = long_long_unsigned_type_node; - } - } - - if (i >= fd->collapse) - continue; - - if (collapse_count && *collapse_count == NULL) - { - t = fold_binary (loop->cond_code, boolean_type_node, - fold_convert (TREE_TYPE (loop->v), loop->n1), - fold_convert (TREE_TYPE (loop->v), loop->n2)); - if (t && integer_zerop (t)) - count = build_zero_cst (long_long_unsigned_type_node); - else if ((i == 0 || count != NULL_TREE) - && TREE_CODE (TREE_TYPE (loop->v)) == INTEGER_TYPE - && TREE_CONSTANT (loop->n1) - && TREE_CONSTANT (loop->n2) - && TREE_CODE (loop->step) == INTEGER_CST) - { - tree itype = TREE_TYPE (loop->v); - - if (POINTER_TYPE_P (itype)) - itype = signed_type_for (itype); - t = build_int_cst (itype, (loop->cond_code == LT_EXPR ? -1 : 1)); - t = fold_build2_loc (loc, - PLUS_EXPR, itype, - fold_convert_loc (loc, itype, loop->step), t); - t = fold_build2_loc (loc, PLUS_EXPR, itype, t, - fold_convert_loc (loc, itype, loop->n2)); - t = fold_build2_loc (loc, MINUS_EXPR, itype, t, - fold_convert_loc (loc, itype, loop->n1)); - if (TYPE_UNSIGNED (itype) && loop->cond_code == GT_EXPR) - t = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, - fold_build1_loc (loc, NEGATE_EXPR, itype, t), - fold_build1_loc (loc, NEGATE_EXPR, itype, - fold_convert_loc (loc, itype, - loop->step))); - else - t = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, t, - fold_convert_loc (loc, itype, loop->step)); - t = fold_convert_loc (loc, long_long_unsigned_type_node, t); - if (count != NULL_TREE) - count = fold_build2_loc (loc, - MULT_EXPR, long_long_unsigned_type_node, - count, t); - else - count = t; - if (TREE_CODE (count) != INTEGER_CST) - count = NULL_TREE; - } - else if (count && !integer_zerop (count)) - count = NULL_TREE; - } - } - - if (count - && !simd - && (fd->sched_kind != OMP_CLAUSE_SCHEDULE_STATIC - || fd->have_ordered)) - { - if (!tree_int_cst_lt (count, TYPE_MAX_VALUE (long_integer_type_node))) - iter_type = long_long_unsigned_type_node; - else - iter_type = long_integer_type_node; - } - else if (collapse_iter && *collapse_iter != NULL) - iter_type = TREE_TYPE (*collapse_iter); - fd->iter_type = iter_type; - if (collapse_iter && *collapse_iter == NULL) - *collapse_iter = create_tmp_var (iter_type, ".iter"); - if (collapse_count && *collapse_count == NULL) - { - if (count) - *collapse_count = fold_convert_loc (loc, iter_type, count); - else - *collapse_count = create_tmp_var (iter_type, ".count"); - } - - if (fd->collapse > 1 || (fd->ordered && loops)) - { - fd->loop.v = *collapse_iter; - fd->loop.n1 = build_int_cst (TREE_TYPE (fd->loop.v), 0); - fd->loop.n2 = *collapse_count; - fd->loop.step = build_int_cst (TREE_TYPE (fd->loop.v), 1); - fd->loop.cond_code = LT_EXPR; - } - else if (loops) - loops[0] = fd->loop; -} - - -/* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB - is the immediate dominator of PAR_ENTRY_BB, return true if there - are no data dependencies that would prevent expanding the parallel - directive at PAR_ENTRY_BB as a combined parallel+workshare region. - - When expanding a combined parallel+workshare region, the call to - the child function may need additional arguments in the case of - GIMPLE_OMP_FOR regions. In some cases, these arguments are - computed out of variables passed in from the parent to the child - via 'struct .omp_data_s'. For instance: - - #pragma omp parallel for schedule (guided, i * 4) - for (j ...) - - Is lowered into: - - # BLOCK 2 (PAR_ENTRY_BB) - .omp_data_o.i = i; - #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598) - - # BLOCK 3 (WS_ENTRY_BB) - .omp_data_i = &.omp_data_o; - D.1667 = .omp_data_i->i; - D.1598 = D.1667 * 4; - #pragma omp for schedule (guided, D.1598) - - When we outline the parallel region, the call to the child function - 'bar.omp_fn.0' will need the value D.1598 in its argument list, but - that value is computed *after* the call site. So, in principle we - cannot do the transformation. - - To see whether the code in WS_ENTRY_BB blocks the combined - parallel+workshare call, we collect all the variables used in the - GIMPLE_OMP_FOR header check whether they appear on the LHS of any - statement in WS_ENTRY_BB. If so, then we cannot emit the combined - call. - - FIXME. If we had the SSA form built at this point, we could merely - hoist the code in block 3 into block 2 and be done with it. But at - this point we don't have dataflow information and though we could - hack something up here, it is really not worth the aggravation. */ - -static bool -workshare_safe_to_combine_p (basic_block ws_entry_bb) -{ - struct omp_for_data fd; - gimple *ws_stmt = last_stmt (ws_entry_bb); - - if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS) - return true; - - gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR); - - extract_omp_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL); - - if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST) - return false; - if (fd.iter_type != long_integer_type_node) - return false; - - /* FIXME. We give up too easily here. If any of these arguments - are not constants, they will likely involve variables that have - been mapped into fields of .omp_data_s for sharing with the child - function. With appropriate data flow, it would be possible to - see through this. */ - if (!is_gimple_min_invariant (fd.loop.n1) - || !is_gimple_min_invariant (fd.loop.n2) - || !is_gimple_min_invariant (fd.loop.step) - || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size))) - return false; - - return true; -} - - -static int omp_max_vf (void); - -/* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier - presence (SIMD_SCHEDULE). */ - -static tree -omp_adjust_chunk_size (tree chunk_size, bool simd_schedule) -{ - if (!simd_schedule) - return chunk_size; - - int vf = omp_max_vf (); - if (vf == 1) - return chunk_size; - - tree type = TREE_TYPE (chunk_size); - chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size, - build_int_cst (type, vf - 1)); - return fold_build2 (BIT_AND_EXPR, type, chunk_size, - build_int_cst (type, -vf)); -} - - -/* Collect additional arguments needed to emit a combined - parallel+workshare call. WS_STMT is the workshare directive being - expanded. */ - -static vec<tree, va_gc> * -get_ws_args_for (gimple *par_stmt, gimple *ws_stmt) -{ - tree t; - location_t loc = gimple_location (ws_stmt); - vec<tree, va_gc> *ws_args; - - if (gomp_for *for_stmt = dyn_cast <gomp_for *> (ws_stmt)) - { - struct omp_for_data fd; - tree n1, n2; - - extract_omp_for_data (for_stmt, &fd, NULL); - n1 = fd.loop.n1; - n2 = fd.loop.n2; - - if (gimple_omp_for_combined_into_p (for_stmt)) - { - tree innerc - = find_omp_clause (gimple_omp_parallel_clauses (par_stmt), - OMP_CLAUSE__LOOPTEMP_); - gcc_assert (innerc); - n1 = OMP_CLAUSE_DECL (innerc); - innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc), - OMP_CLAUSE__LOOPTEMP_); - gcc_assert (innerc); - n2 = OMP_CLAUSE_DECL (innerc); - } - - vec_alloc (ws_args, 3 + (fd.chunk_size != 0)); - - t = fold_convert_loc (loc, long_integer_type_node, n1); - ws_args->quick_push (t); - - t = fold_convert_loc (loc, long_integer_type_node, n2); - ws_args->quick_push (t); - - t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step); - ws_args->quick_push (t); - - if (fd.chunk_size) - { - t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size); - t = omp_adjust_chunk_size (t, fd.simd_schedule); - ws_args->quick_push (t); - } - - return ws_args; - } - else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS) - { - /* Number of sections is equal to the number of edges from the - GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to - the exit of the sections region. */ - basic_block bb = single_succ (gimple_bb (ws_stmt)); - t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1); - vec_alloc (ws_args, 1); - ws_args->quick_push (t); - return ws_args; - } - - gcc_unreachable (); -} - - -/* Discover whether REGION is a combined parallel+workshare region. */ - -static void -determine_parallel_type (struct omp_region *region) -{ - basic_block par_entry_bb, par_exit_bb; - basic_block ws_entry_bb, ws_exit_bb; - - if (region == NULL || region->inner == NULL - || region->exit == NULL || region->inner->exit == NULL - || region->inner->cont == NULL) - return; - - /* We only support parallel+for and parallel+sections. */ - if (region->type != GIMPLE_OMP_PARALLEL - || (region->inner->type != GIMPLE_OMP_FOR - && region->inner->type != GIMPLE_OMP_SECTIONS)) - return; - - /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and - WS_EXIT_BB -> PAR_EXIT_BB. */ - par_entry_bb = region->entry; - par_exit_bb = region->exit; - ws_entry_bb = region->inner->entry; - ws_exit_bb = region->inner->exit; - - if (single_succ (par_entry_bb) == ws_entry_bb - && single_succ (ws_exit_bb) == par_exit_bb - && workshare_safe_to_combine_p (ws_entry_bb) - && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb)) - || (last_and_only_stmt (ws_entry_bb) - && last_and_only_stmt (par_exit_bb)))) - { - gimple *par_stmt = last_stmt (par_entry_bb); - gimple *ws_stmt = last_stmt (ws_entry_bb); - - if (region->inner->type == GIMPLE_OMP_FOR) - { - /* If this is a combined parallel loop, we need to determine - whether or not to use the combined library calls. There - are two cases where we do not apply the transformation: - static loops and any kind of ordered loop. In the first - case, we already open code the loop so there is no need - to do anything else. In the latter case, the combined - parallel loop call would still need extra synchronization - to implement ordered semantics, so there would not be any - gain in using the combined call. */ - tree clauses = gimple_omp_for_clauses (ws_stmt); - tree c = find_omp_clause (clauses, OMP_CLAUSE_SCHEDULE); - if (c == NULL - || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK) - == OMP_CLAUSE_SCHEDULE_STATIC) - || find_omp_clause (clauses, OMP_CLAUSE_ORDERED)) - { - region->is_combined_parallel = false; - region->inner->is_combined_parallel = false; - return; - } - } - - region->is_combined_parallel = true; - region->inner->is_combined_parallel = true; - region->ws_args = get_ws_args_for (par_stmt, ws_stmt); - } -} - - /* Return true if EXPR is variable sized. */ static inline bool @@ -1042,25 +294,6 @@ is_variable_sized (const_tree expr) return !TREE_CONSTANT (TYPE_SIZE_UNIT (TREE_TYPE (expr))); } -/* Return true if DECL is a reference type. */ - -static inline bool -is_reference (tree decl) -{ - return lang_hooks.decls.omp_privatize_by_reference (decl); -} - -/* Return the type of a decl. If the decl is reference type, - return its base type. */ -static inline tree -get_base_type (tree decl) -{ - tree type = TREE_TYPE (decl); - if (is_reference (decl)) - type = TREE_TYPE (type); - return type; -} - /* Lookup variables. The "maybe" form allows for the variable form to not have been entered, otherwise we assert that the variable must have been entered. */ @@ -1359,7 +592,7 @@ build_outer_var_ref (tree var, omp_context *ctx, } x = lookup_decl (var, outer); } - else if (is_reference (var)) + else if (omp_is_reference (var)) /* This can happen with orphaned constructs. If var is reference, it is possible it is shared and as such valid. */ x = var; @@ -1382,7 +615,7 @@ build_outer_var_ref (tree var, omp_context *ctx, } } - if (is_reference (var)) + if (omp_is_reference (var)) x = build_simple_mem_ref (x); return x; @@ -1444,7 +677,7 @@ install_var_field (tree var, bool by_ref, int mask, omp_context *ctx, if (base_pointers_restrict) type = build_qualified_type (type, TYPE_QUAL_RESTRICT); } - else if ((mask & 3) == 1 && is_reference (var)) + else if ((mask & 3) == 1 && omp_is_reference (var)) type = TREE_TYPE (type); field = build_decl (DECL_SOURCE_LOCATION (var), @@ -1585,113 +818,6 @@ omp_copy_decl (tree var, copy_body_data *cb) return error_mark_node; } - -/* Debugging dumps for parallel regions. */ -void dump_omp_region (FILE *, struct omp_region *, int); -void debug_omp_region (struct omp_region *); -void debug_all_omp_regions (void); - -/* Dump the parallel region tree rooted at REGION. */ - -void -dump_omp_region (FILE *file, struct omp_region *region, int indent) -{ - fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index, - gimple_code_name[region->type]); - - if (region->inner) - dump_omp_region (file, region->inner, indent + 4); - - if (region->cont) - { - fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "", - region->cont->index); - } - - if (region->exit) - fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "", - region->exit->index); - else - fprintf (file, "%*s[no exit marker]\n", indent, ""); - - if (region->next) - dump_omp_region (file, region->next, indent); -} - -DEBUG_FUNCTION void -debug_omp_region (struct omp_region *region) -{ - dump_omp_region (stderr, region, 0); -} - -DEBUG_FUNCTION void -debug_all_omp_regions (void) -{ - dump_omp_region (stderr, root_omp_region, 0); -} - - -/* Create a new parallel region starting at STMT inside region PARENT. */ - -static struct omp_region * -new_omp_region (basic_block bb, enum gimple_code type, - struct omp_region *parent) -{ - struct omp_region *region = XCNEW (struct omp_region); - - region->outer = parent; - region->entry = bb; - region->type = type; - - if (parent) - { - /* This is a nested region. Add it to the list of inner - regions in PARENT. */ - region->next = parent->inner; - parent->inner = region; - } - else - { - /* This is a toplevel region. Add it to the list of toplevel - regions in ROOT_OMP_REGION. */ - region->next = root_omp_region; - root_omp_region = region; - } - - return region; -} - -/* Release the memory associated with the region tree rooted at REGION. */ - -static void -free_omp_region_1 (struct omp_region *region) -{ - struct omp_region *i, *n; - - for (i = region->inner; i ; i = n) - { - n = i->next; - free_omp_region_1 (i); - } - - free (region); -} - -/* Release the memory for the entire omp region tree. */ - -void -free_omp_regions (void) -{ - struct omp_region *r, *n; - for (r = root_omp_region; r ; r = n) - { - n = r->next; - free_omp_region_1 (r); - } - root_omp_region = NULL; -} - - /* Create a new context, with OUTER_CTX being the surrounding context. */ static omp_context * @@ -1915,7 +1041,7 @@ scan_sharing_clauses (tree clauses, omp_context *ctx, if ((! TREE_READONLY (decl) && !OMP_CLAUSE_SHARED_READONLY (c)) || TREE_ADDRESSABLE (decl) || by_ref - || is_reference (decl)) + || omp_is_reference (decl)) { by_ref = use_pointer_for_field (decl, ctx); install_var_field (decl, by_ref, 3, ctx); @@ -1965,7 +1091,7 @@ scan_sharing_clauses (tree clauses, omp_context *ctx, && is_gimple_omp_offloaded (ctx->stmt)) { if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_FIRSTPRIVATE) - install_var_field (decl, !is_reference (decl), 3, ctx); + install_var_field (decl, !omp_is_reference (decl), 3, ctx); else if (TREE_CODE (TREE_TYPE (decl)) == ARRAY_TYPE) install_var_field (decl, true, 3, ctx); else @@ -1984,7 +1110,7 @@ scan_sharing_clauses (tree clauses, omp_context *ctx, by_ref = use_pointer_for_field (decl, NULL); if (is_task_ctx (ctx) - && (global || by_ref || is_reference (decl))) + && (global || by_ref || omp_is_reference (decl))) { install_var_field (decl, false, 1, ctx); if (!global) @@ -2461,7 +1587,7 @@ create_omp_child_function (omp_context *ctx, bool task_copy) tree cilk_for_count = (flag_cilkplus && gimple_code (ctx->stmt) == GIMPLE_OMP_PARALLEL) - ? find_omp_clause (gimple_omp_parallel_clauses (ctx->stmt), + ? omp_find_clause (gimple_omp_parallel_clauses (ctx->stmt), OMP_CLAUSE__CILK_FOR_COUNT_) : NULL_TREE; tree cilk_var_type = NULL_TREE; @@ -2592,10 +1718,10 @@ create_omp_child_function (omp_context *ctx, bool task_copy) /* Callback for walk_gimple_seq. Check if combined parallel contains gimple_omp_for_combined_into_p OMP_FOR. */ -static tree -find_combined_for (gimple_stmt_iterator *gsi_p, - bool *handled_ops_p, - struct walk_stmt_info *wi) +tree +omp_find_combined_for (gimple_stmt_iterator *gsi_p, + bool *handled_ops_p, + struct walk_stmt_info *wi) { gimple *stmt = gsi_stmt (*gsi_p); @@ -2630,12 +1756,12 @@ add_taskreg_looptemp_clauses (enum gf_mask msk, gimple *stmt, memset (&wi, 0, sizeof (wi)); wi.val_only = true; wi.info = (void *) &msk; - walk_gimple_seq (gimple_omp_body (stmt), find_combined_for, NULL, &wi); + walk_gimple_seq (gimple_omp_body (stmt), omp_find_combined_for, NULL, &wi); if (wi.info != (void *) &msk) { gomp_for *for_stmt = as_a <gomp_for *> ((gimple *) wi.info); struct omp_for_data fd; - extract_omp_for_data (for_stmt, &fd, NULL); + omp_extract_for_data (for_stmt, &fd, NULL); /* We need two temporaries with fd.loop.v type (istart/iend) and then (fd.collapse - 1) temporaries with the same type for count2 ... countN-1 vars if not constant. */ @@ -2648,11 +1774,11 @@ add_taskreg_looptemp_clauses (enum gf_mask msk, gimple *stmt, /* If there are lastprivate clauses on the inner GIMPLE_OMP_FOR, add one more temporaries for the total number of iterations (product of count1 ... countN-1). */ - if (find_omp_clause (gimple_omp_for_clauses (for_stmt), + if (omp_find_clause (gimple_omp_for_clauses (for_stmt), OMP_CLAUSE_LASTPRIVATE)) count++; else if (msk == GF_OMP_FOR_KIND_FOR - && find_omp_clause (gimple_omp_parallel_clauses (stmt), + && omp_find_clause (gimple_omp_parallel_clauses (stmt), OMP_CLAUSE_LASTPRIVATE)) count++; } @@ -2681,7 +1807,7 @@ scan_omp_parallel (gimple_stmt_iterator *gsi, omp_context *outer_ctx) are copyin clauses. */ if (optimize > 0 && empty_body_p (gimple_omp_body (stmt)) - && find_omp_clause (gimple_omp_parallel_clauses (stmt), + && omp_find_clause (gimple_omp_parallel_clauses (stmt), OMP_CLAUSE_COPYIN) == NULL) { gsi_replace (gsi, gimple_build_nop (), false); @@ -2866,8 +1992,8 @@ finish_taskreg_scan (omp_context *ctx) clause first. There are filled by GOMP_taskloop and thus need to be in specific positions. */ tree c1 = gimple_omp_task_clauses (ctx->stmt); - c1 = find_omp_clause (c1, OMP_CLAUSE__LOOPTEMP_); - tree c2 = find_omp_clause (OMP_CLAUSE_CHAIN (c1), + c1 = omp_find_clause (c1, OMP_CLAUSE__LOOPTEMP_); + tree c2 = omp_find_clause (OMP_CLAUSE_CHAIN (c1), OMP_CLAUSE__LOOPTEMP_); tree f1 = lookup_field (OMP_CLAUSE_DECL (c1), ctx); tree f2 = lookup_field (OMP_CLAUSE_DECL (c2), ctx); @@ -3304,7 +2430,7 @@ check_omp_nesting_restrictions (gimple *stmt, omp_context *ctx) && !(gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD || gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE)) { - if (get_oacc_fn_attrib (cfun->decl) != NULL) + if (oacc_get_fn_attrib (cfun->decl) != NULL) { error_at (gimple_location (stmt), "non-OpenACC construct inside of OpenACC routine"); @@ -3330,9 +2456,9 @@ check_omp_nesting_restrictions (gimple *stmt, omp_context *ctx) if (gimple_code (stmt) == GIMPLE_OMP_ORDERED) { c = gimple_omp_ordered_clauses (as_a <gomp_ordered *> (stmt)); - if (find_omp_clause (c, OMP_CLAUSE_SIMD)) + if (omp_find_clause (c, OMP_CLAUSE_SIMD)) { - if (find_omp_clause (c, OMP_CLAUSE_THREADS) + if (omp_find_clause (c, OMP_CLAUSE_THREADS) && (ctx->outer == NULL || !gimple_omp_for_combined_into_p (ctx->stmt) || gimple_code (ctx->outer->stmt) != GIMPLE_OMP_FOR @@ -3414,7 +2540,7 @@ check_omp_nesting_restrictions (gimple *stmt, omp_context *ctx) default: break; } - else if (get_oacc_fn_attrib (current_function_decl)) + else if (oacc_get_fn_attrib (current_function_decl)) ok = true; if (!ok) { @@ -3467,12 +2593,12 @@ check_omp_nesting_restrictions (gimple *stmt, omp_context *ctx) && !integer_zerop (gimple_call_arg (stmt, 1))) { ctx->cancellable = true; - if (find_omp_clause (gimple_omp_for_clauses (ctx->stmt), + if (omp_find_clause (gimple_omp_for_clauses (ctx->stmt), OMP_CLAUSE_NOWAIT)) warning_at (gimple_location (stmt), 0, "%<#pragma omp cancel for%> inside " "%<nowait%> for construct"); - if (find_omp_clause (gimple_omp_for_clauses (ctx->stmt), + if (omp_find_clause (gimple_omp_for_clauses (ctx->stmt), OMP_CLAUSE_ORDERED)) warning_at (gimple_location (stmt), 0, "%<#pragma omp cancel for%> inside " @@ -3491,7 +2617,7 @@ check_omp_nesting_restrictions (gimple *stmt, omp_context *ctx) if (gimple_code (ctx->stmt) == GIMPLE_OMP_SECTIONS) { ctx->cancellable = true; - if (find_omp_clause (gimple_omp_sections_clauses + if (omp_find_clause (gimple_omp_sections_clauses (ctx->stmt), OMP_CLAUSE_NOWAIT)) warning_at (gimple_location (stmt), 0, @@ -3504,7 +2630,7 @@ check_omp_nesting_restrictions (gimple *stmt, omp_context *ctx) && gimple_code (ctx->outer->stmt) == GIMPLE_OMP_SECTIONS); ctx->outer->cancellable = true; - if (find_omp_clause (gimple_omp_sections_clauses + if (omp_find_clause (gimple_omp_sections_clauses (ctx->outer->stmt), OMP_CLAUSE_NOWAIT)) warning_at (gimple_location (stmt), 0, @@ -3666,7 +2792,7 @@ check_omp_nesting_restrictions (gimple *stmt, omp_context *ctx) if (ctx == NULL || gimple_code (ctx->stmt) != GIMPLE_OMP_FOR || (oclause - = find_omp_clause (gimple_omp_for_clauses (ctx->stmt), + = omp_find_clause (gimple_omp_for_clauses (ctx->stmt), OMP_CLAUSE_ORDERED)) == NULL_TREE) { error_at (OMP_CLAUSE_LOCATION (c), @@ -3692,7 +2818,7 @@ check_omp_nesting_restrictions (gimple *stmt, omp_context *ctx) } } c = gimple_omp_ordered_clauses (as_a <gomp_ordered *> (stmt)); - if (find_omp_clause (c, OMP_CLAUSE_SIMD)) + if (omp_find_clause (c, OMP_CLAUSE_SIMD)) { /* ordered simd must be closely nested inside of simd region, and simd region must not encounter constructs other than @@ -3722,7 +2848,7 @@ check_omp_nesting_restrictions (gimple *stmt, omp_context *ctx) case GIMPLE_OMP_FOR: if (gimple_omp_for_kind (ctx->stmt) == GF_OMP_FOR_KIND_TASKLOOP) goto ordered_in_taskloop; - if (find_omp_clause (gimple_omp_for_clauses (ctx->stmt), + if (omp_find_clause (gimple_omp_for_clauses (ctx->stmt), OMP_CLAUSE_ORDERED) == NULL) { error_at (gimple_location (stmt), @@ -3786,7 +2912,7 @@ check_omp_nesting_restrictions (gimple *stmt, omp_context *ctx) return false; } if (is_gimple_omp_offloaded (stmt) - && get_oacc_fn_attrib (cfun->decl) != NULL) + && oacc_get_fn_attrib (cfun->decl) != NULL) { error_at (gimple_location (stmt), "OpenACC region inside of OpenACC routine, nested " @@ -4098,19 +3224,6 @@ scan_omp (gimple_seq *body_p, omp_context *ctx) /* Re-gimplification and code generation routines. */ -/* Build a call to GOMP_barrier. */ - -static gimple * -build_omp_barrier (tree lhs) -{ - tree fndecl = builtin_decl_explicit (lhs ? BUILT_IN_GOMP_BARRIER_CANCEL - : BUILT_IN_GOMP_BARRIER); - gcall *g = gimple_build_call (fndecl, 0); - if (lhs) - gimple_call_set_lhs (g, lhs); - return g; -} - /* If a context was created for STMT when it was scanned, return it. */ static omp_context * @@ -4332,51 +3445,6 @@ omp_clause_aligned_alignment (tree clause) return build_int_cst (integer_type_node, al); } - -/* Return maximum SIMT width if offloading may target SIMT hardware. */ - -static int -omp_max_simt_vf (void) -{ - if (!optimize) - return 0; - if (ENABLE_OFFLOADING) - for (const char *c = getenv ("OFFLOAD_TARGET_NAMES"); c; ) - { - if (!strncmp (c, "nvptx", strlen ("nvptx"))) - return 32; - else if ((c = strchr (c, ','))) - c++; - } - return 0; -} - -/* Return maximum possible vectorization factor for the target. */ - -static int -omp_max_vf (void) -{ - if (!optimize - || optimize_debug - || !flag_tree_loop_optimize - || (!flag_tree_loop_vectorize - && (global_options_set.x_flag_tree_loop_vectorize - || global_options_set.x_flag_tree_vectorize))) - return 1; - - int vf = 1; - int vs = targetm.vectorize.autovectorize_vector_sizes (); - if (vs) - vf = 1 << floor_log2 (vs); - else - { - machine_mode vqimode = targetm.vectorize.preferred_simd_mode (QImode); - if (GET_MODE_CLASS (vqimode) == MODE_VECTOR_INT) - vf = GET_MODE_NUNITS (vqimode); - } - return vf; -} - /* Helper function of lower_rec_input_clauses, used for #pragma omp simd privatization. */ @@ -4386,14 +3454,14 @@ lower_rec_simd_input_clauses (tree new_var, omp_context *ctx, int &max_vf, { if (max_vf == 0) { - if (find_omp_clause (gimple_omp_for_clauses (ctx->stmt), + if (omp_find_clause (gimple_omp_for_clauses (ctx->stmt), OMP_CLAUSE__SIMT_)) max_vf = omp_max_simt_vf (); else max_vf = omp_max_vf (); if (max_vf > 1) { - tree c = find_omp_clause (gimple_omp_for_clauses (ctx->stmt), + tree c = omp_find_clause (gimple_omp_for_clauses (ctx->stmt), OMP_CLAUSE_SAFELEN); if (c && (TREE_CODE (OMP_CLAUSE_SAFELEN_EXPR (c)) != INTEGER_CST @@ -4466,7 +3534,7 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, int pass; bool is_simd = (gimple_code (ctx->stmt) == GIMPLE_OMP_FOR && gimple_omp_for_kind (ctx->stmt) & GF_OMP_FOR_SIMD); - bool maybe_simt = is_simd && find_omp_clause (clauses, OMP_CLAUSE__SIMT_); + bool maybe_simt = is_simd && omp_find_clause (clauses, OMP_CLAUSE__SIMT_); int max_vf = 0; tree lane = NULL_TREE, idx = NULL_TREE; tree simt_lane = NULL_TREE; @@ -4768,10 +3836,10 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, tree ref = build_outer_var_ref (var, ctx); /* For ref build_outer_var_ref already performs this. */ if (TREE_CODE (d) == INDIRECT_REF) - gcc_assert (is_reference (var)); + gcc_assert (omp_is_reference (var)); else if (TREE_CODE (d) == ADDR_EXPR) ref = build_fold_addr_expr (ref); - else if (is_reference (var)) + else if (omp_is_reference (var)) ref = build_fold_addr_expr (ref); ref = fold_convert_loc (clause_loc, ptype, ref); if (OMP_CLAUSE_REDUCTION_PLACEHOLDER (c) @@ -4935,7 +4003,7 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, gimplify_assign (ptr, x, ilist); } } - else if (is_reference (var)) + else if (omp_is_reference (var)) { /* For references that are being privatized for Fortran, allocate new backing storage for the new pointer @@ -5102,7 +4170,7 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, case OMP_CLAUSE_FIRSTPRIVATE: if (is_task_ctx (ctx)) { - if (is_reference (var) || is_variable_sized (var)) + if (omp_is_reference (var) || is_variable_sized (var)) goto do_dtor; else if (is_global_var (maybe_lookup_decl_in_outer_ctx (var, ctx)) @@ -5123,7 +4191,7 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, { tree t = OMP_CLAUSE_LINEAR_STEP (c); tree stept = TREE_TYPE (t); - tree ct = find_omp_clause (clauses, + tree ct = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_); gcc_assert (ct); tree l = OMP_CLAUSE_DECL (ct); @@ -5230,14 +4298,14 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, gimple *tseq; x = build_outer_var_ref (var, ctx); - if (is_reference (var) + if (omp_is_reference (var) && !useless_type_conversion_p (TREE_TYPE (placeholder), TREE_TYPE (x))) x = build_fold_addr_expr_loc (clause_loc, x); SET_DECL_VALUE_EXPR (placeholder, x); DECL_HAS_VALUE_EXPR_P (placeholder) = 1; tree new_vard = new_var; - if (is_reference (var)) + if (omp_is_reference (var)) { gcc_assert (TREE_CODE (new_var) == MEM_REF); new_vard = TREE_OPERAND (new_var, 0); @@ -5295,7 +4363,7 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, for it because it is undesirable if SIMD arrays are used. But if they aren't used, we need to emit the deferred initialization now. */ - else if (is_reference (var) && is_simd) + else if (omp_is_reference (var) && is_simd) handle_simd_reference (clause_loc, new_vard, ilist); x = lang_hooks.decls.omp_clause_default_ctor (c, unshare_expr (new_var), @@ -5331,7 +4399,7 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, code = PLUS_EXPR; tree new_vard = new_var; - if (is_simd && is_reference (var)) + if (is_simd && omp_is_reference (var)) { gcc_assert (TREE_CODE (new_var) == MEM_REF); new_vard = TREE_OPERAND (new_var, 0); @@ -5368,7 +4436,7 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, } else { - if (is_reference (var) && is_simd) + if (omp_is_reference (var) && is_simd) handle_simd_reference (clause_loc, new_vard, ilist); gimplify_assign (new_var, x, ilist); if (is_simd) @@ -5492,14 +4560,14 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist, #pragma omp distribute. */ if (gimple_code (ctx->stmt) != GIMPLE_OMP_FOR || gimple_omp_for_kind (ctx->stmt) == GF_OMP_FOR_KIND_FOR) - gimple_seq_add_stmt (ilist, build_omp_barrier (NULL_TREE)); + gimple_seq_add_stmt (ilist, omp_build_barrier (NULL_TREE)); } /* If max_vf is non-zero, then we can use only a vectorization factor up to the max_vf we chose. So stick it into the safelen clause. */ if (max_vf) { - tree c = find_omp_clause (gimple_omp_for_clauses (ctx->stmt), + tree c = omp_find_clause (gimple_omp_for_clauses (ctx->stmt), OMP_CLAUSE_SAFELEN); if (c == NULL_TREE || (TREE_CODE (OMP_CLAUSE_SAFELEN_EXPR (c)) == INTEGER_CST @@ -5546,7 +4614,7 @@ lower_lastprivate_clauses (tree clauses, tree predicate, gimple_seq *stmt_list, if (ctx == NULL || !is_parallel_ctx (ctx)) return; - clauses = find_omp_clause (gimple_omp_parallel_clauses (ctx->stmt), + clauses = omp_find_clause (gimple_omp_parallel_clauses (ctx->stmt), OMP_CLAUSE_LASTPRIVATE); if (clauses == NULL) return; @@ -5557,8 +4625,8 @@ lower_lastprivate_clauses (tree clauses, tree predicate, gimple_seq *stmt_list, if (gimple_code (ctx->stmt) == GIMPLE_OMP_FOR && gimple_omp_for_kind (ctx->stmt) & GF_OMP_FOR_SIMD) { - maybe_simt = find_omp_clause (orig_clauses, OMP_CLAUSE__SIMT_); - simduid = find_omp_clause (orig_clauses, OMP_CLAUSE__SIMDUID_); + maybe_simt = omp_find_clause (orig_clauses, OMP_CLAUSE__SIMT_); + simduid = omp_find_clause (orig_clauses, OMP_CLAUSE__SIMDUID_); if (simduid) simduid = OMP_CLAUSE__SIMDUID__DECL (simduid); } @@ -5705,7 +4773,7 @@ lower_lastprivate_clauses (tree clauses, tree predicate, gimple_seq *stmt_list, } if (!x) x = build_outer_var_ref (var, ctx, OMP_CLAUSE_LASTPRIVATE); - if (is_reference (var)) + if (omp_is_reference (var)) new_var = build_simple_mem_ref_loc (clause_loc, new_var); x = lang_hooks.decls.omp_clause_assign_op (c, x, new_var); gimplify_and_add (x, stmt_list); @@ -5723,7 +4791,7 @@ lower_lastprivate_clauses (tree clauses, tree predicate, gimple_seq *stmt_list, if (ctx == NULL || !is_parallel_ctx (ctx)) break; - c = find_omp_clause (gimple_omp_parallel_clauses (ctx->stmt), + c = omp_find_clause (gimple_omp_parallel_clauses (ctx->stmt), OMP_CLAUSE_LASTPRIVATE); par_clauses = true; } @@ -5836,7 +4904,7 @@ lower_oacc_reductions (location_t loc, tree clauses, tree level, bool inner, && maybe_lookup_field (orig, outer) && !is_private) { ref_to_res = build_receiver_ref (orig, false, outer); - if (is_reference (orig)) + if (omp_is_reference (orig)) ref_to_res = build_simple_mem_ref (ref_to_res); tree type = TREE_TYPE (var); @@ -5866,7 +4934,7 @@ lower_oacc_reductions (location_t loc, tree clauses, tree level, bool inner, if (!ref_to_res) ref_to_res = integer_zero_node; - if (is_reference (orig)) + if (omp_is_reference (orig)) { tree type = TREE_TYPE (var); const char *id = IDENTIFIER_POINTER (DECL_NAME (var)); @@ -6021,7 +5089,7 @@ lower_reduction_clauses (tree clauses, gimple_seq *stmt_seqp, omp_context *ctx) } } new_var = lookup_decl (var, ctx); - if (var == OMP_CLAUSE_DECL (c) && is_reference (var)) + if (var == OMP_CLAUSE_DECL (c) && omp_is_reference (var)) new_var = build_simple_mem_ref_loc (clause_loc, new_var); ref = build_outer_var_ref (var, ctx); code = OMP_CLAUSE_REDUCTION_CODE (c); @@ -6075,7 +5143,7 @@ lower_reduction_clauses (tree clauses, gimple_seq *stmt_seqp, omp_context *ctx) if (TREE_CODE (d) == INDIRECT_REF) { new_var = build_simple_mem_ref_loc (clause_loc, new_var); - gcc_assert (is_reference (var) && var == orig_var); + gcc_assert (omp_is_reference (var) && var == orig_var); } else if (TREE_CODE (d) == ADDR_EXPR) { @@ -6088,7 +5156,7 @@ lower_reduction_clauses (tree clauses, gimple_seq *stmt_seqp, omp_context *ctx) else { gcc_assert (orig_var == var); - if (is_reference (var)) + if (omp_is_reference (var)) ref = build_fold_addr_expr (ref); } if (DECL_P (v)) @@ -6162,7 +5230,7 @@ lower_reduction_clauses (tree clauses, gimple_seq *stmt_seqp, omp_context *ctx) { tree placeholder = OMP_CLAUSE_REDUCTION_PLACEHOLDER (c); - if (is_reference (var) + if (omp_is_reference (var) && !useless_type_conversion_p (TREE_TYPE (placeholder), TREE_TYPE (ref))) ref = build_fold_addr_expr_loc (clause_loc, ref); @@ -6230,7 +5298,7 @@ lower_copyprivate_clauses (tree clauses, gimple_seq *slist, gimple_seq *rlist, ref); ref = build_fold_indirect_ref_loc (clause_loc, ref); } - if (is_reference (var)) + if (omp_is_reference (var)) { ref = fold_convert_loc (clause_loc, TREE_TYPE (new_var), ref); ref = build_simple_mem_ref_loc (clause_loc, ref); @@ -6369,7 +5437,7 @@ lower_send_clauses (tree clauses, gimple_seq *ilist, gimple_seq *olist, break; case OMP_CLAUSE_LASTPRIVATE: - if (by_ref || is_reference (val)) + if (by_ref || omp_is_reference (val)) { if (OMP_CLAUSE_LASTPRIVATE_FIRSTPRIVATE (c)) continue; @@ -6386,7 +5454,7 @@ lower_send_clauses (tree clauses, gimple_seq *ilist, gimple_seq *olist, case OMP_CLAUSE_REDUCTION: do_in = true; if (val == OMP_CLAUSE_DECL (c)) - do_out = !(by_ref || is_reference (val)); + do_out = !(by_ref || omp_is_reference (val)); else by_ref = TREE_CODE (TREE_TYPE (val)) == ARRAY_TYPE; break; @@ -6647,423 +5715,6 @@ lower_oacc_head_tail (location_t loc, tree clauses, lower_oacc_loop_marker (loc, ddvar, false, NULL_TREE, tail); } -/* A convenience function to build an empty GIMPLE_COND with just the - condition. */ - -static gcond * -gimple_build_cond_empty (tree cond) -{ - enum tree_code pred_code; - tree lhs, rhs; - - gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs); - return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE); -} - -/* Return true if a parallel REGION is within a declare target function or - within a target region and is not a part of a gridified target. */ - -static bool -parallel_needs_hsa_kernel_p (struct omp_region *region) -{ - bool indirect = false; - for (region = region->outer; region; region = region->outer) - { - if (region->type == GIMPLE_OMP_PARALLEL) - indirect = true; - else if (region->type == GIMPLE_OMP_TARGET) - { - gomp_target *tgt_stmt - = as_a <gomp_target *> (last_stmt (region->entry)); - - if (find_omp_clause (gimple_omp_target_clauses (tgt_stmt), - OMP_CLAUSE__GRIDDIM_)) - return indirect; - else - return true; - } - } - - if (lookup_attribute ("omp declare target", - DECL_ATTRIBUTES (current_function_decl))) - return true; - - return false; -} - -static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree, - bool = false); - -/* Build the function calls to GOMP_parallel_start etc to actually - generate the parallel operation. REGION is the parallel region - being expanded. BB is the block where to insert the code. WS_ARGS - will be set if this is a call to a combined parallel+workshare - construct, it contains the list of additional arguments needed by - the workshare construct. */ - -static void -expand_parallel_call (struct omp_region *region, basic_block bb, - gomp_parallel *entry_stmt, - vec<tree, va_gc> *ws_args) -{ - tree t, t1, t2, val, cond, c, clauses, flags; - gimple_stmt_iterator gsi; - gimple *stmt; - enum built_in_function start_ix; - int start_ix2; - location_t clause_loc; - vec<tree, va_gc> *args; - - clauses = gimple_omp_parallel_clauses (entry_stmt); - - /* Determine what flavor of GOMP_parallel we will be - emitting. */ - start_ix = BUILT_IN_GOMP_PARALLEL; - if (is_combined_parallel (region)) - { - switch (region->inner->type) - { - case GIMPLE_OMP_FOR: - gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO); - switch (region->inner->sched_kind) - { - case OMP_CLAUSE_SCHEDULE_RUNTIME: - start_ix2 = 3; - break; - case OMP_CLAUSE_SCHEDULE_DYNAMIC: - case OMP_CLAUSE_SCHEDULE_GUIDED: - if (region->inner->sched_modifiers - & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) - { - start_ix2 = 3 + region->inner->sched_kind; - break; - } - /* FALLTHRU */ - default: - start_ix2 = region->inner->sched_kind; - break; - } - start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC; - start_ix = (enum built_in_function) start_ix2; - break; - case GIMPLE_OMP_SECTIONS: - start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS; - break; - default: - gcc_unreachable (); - } - } - - /* By default, the value of NUM_THREADS is zero (selected at run time) - and there is no conditional. */ - cond = NULL_TREE; - val = build_int_cst (unsigned_type_node, 0); - flags = build_int_cst (unsigned_type_node, 0); - - c = find_omp_clause (clauses, OMP_CLAUSE_IF); - if (c) - cond = OMP_CLAUSE_IF_EXPR (c); - - c = find_omp_clause (clauses, OMP_CLAUSE_NUM_THREADS); - if (c) - { - val = OMP_CLAUSE_NUM_THREADS_EXPR (c); - clause_loc = OMP_CLAUSE_LOCATION (c); - } - else - clause_loc = gimple_location (entry_stmt); - - c = find_omp_clause (clauses, OMP_CLAUSE_PROC_BIND); - if (c) - flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c)); - - /* Ensure 'val' is of the correct type. */ - val = fold_convert_loc (clause_loc, unsigned_type_node, val); - - /* If we found the clause 'if (cond)', build either - (cond != 0) or (cond ? val : 1u). */ - if (cond) - { - cond = gimple_boolify (cond); - - if (integer_zerop (val)) - val = fold_build2_loc (clause_loc, - EQ_EXPR, unsigned_type_node, cond, - build_int_cst (TREE_TYPE (cond), 0)); - else - { - basic_block cond_bb, then_bb, else_bb; - edge e, e_then, e_else; - tree tmp_then, tmp_else, tmp_join, tmp_var; - - tmp_var = create_tmp_var (TREE_TYPE (val)); - if (gimple_in_ssa_p (cfun)) - { - tmp_then = make_ssa_name (tmp_var); - tmp_else = make_ssa_name (tmp_var); - tmp_join = make_ssa_name (tmp_var); - } - else - { - tmp_then = tmp_var; - tmp_else = tmp_var; - tmp_join = tmp_var; - } - - e = split_block_after_labels (bb); - cond_bb = e->src; - bb = e->dest; - remove_edge (e); - - then_bb = create_empty_bb (cond_bb); - else_bb = create_empty_bb (then_bb); - set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb); - set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb); - - stmt = gimple_build_cond_empty (cond); - gsi = gsi_start_bb (cond_bb); - gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); - - gsi = gsi_start_bb (then_bb); - expand_omp_build_assign (&gsi, tmp_then, val, true); - - gsi = gsi_start_bb (else_bb); - expand_omp_build_assign (&gsi, tmp_else, - build_int_cst (unsigned_type_node, 1), - true); - - make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE); - make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE); - add_bb_to_loop (then_bb, cond_bb->loop_father); - add_bb_to_loop (else_bb, cond_bb->loop_father); - e_then = make_edge (then_bb, bb, EDGE_FALLTHRU); - e_else = make_edge (else_bb, bb, EDGE_FALLTHRU); - - if (gimple_in_ssa_p (cfun)) - { - gphi *phi = create_phi_node (tmp_join, bb); - add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION); - add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION); - } - - val = tmp_join; - } - - gsi = gsi_start_bb (bb); - val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE, - false, GSI_CONTINUE_LINKING); - } - - gsi = gsi_last_bb (bb); - t = gimple_omp_parallel_data_arg (entry_stmt); - if (t == NULL) - t1 = null_pointer_node; - else - t1 = build_fold_addr_expr (t); - tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt); - t2 = build_fold_addr_expr (child_fndecl); - - vec_alloc (args, 4 + vec_safe_length (ws_args)); - args->quick_push (t2); - args->quick_push (t1); - args->quick_push (val); - if (ws_args) - args->splice (*ws_args); - args->quick_push (flags); - - t = build_call_expr_loc_vec (UNKNOWN_LOCATION, - builtin_decl_explicit (start_ix), args); - - force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, - false, GSI_CONTINUE_LINKING); - - if (hsa_gen_requested_p () - && parallel_needs_hsa_kernel_p (region)) - { - cgraph_node *child_cnode = cgraph_node::get (child_fndecl); - hsa_register_kernel (child_cnode); - } -} - -/* Insert a function call whose name is FUNC_NAME with the information from - ENTRY_STMT into the basic_block BB. */ - -static void -expand_cilk_for_call (basic_block bb, gomp_parallel *entry_stmt, - vec <tree, va_gc> *ws_args) -{ - tree t, t1, t2; - gimple_stmt_iterator gsi; - vec <tree, va_gc> *args; - - gcc_assert (vec_safe_length (ws_args) == 2); - tree func_name = (*ws_args)[0]; - tree grain = (*ws_args)[1]; - - tree clauses = gimple_omp_parallel_clauses (entry_stmt); - tree count = find_omp_clause (clauses, OMP_CLAUSE__CILK_FOR_COUNT_); - gcc_assert (count != NULL_TREE); - count = OMP_CLAUSE_OPERAND (count, 0); - - gsi = gsi_last_bb (bb); - t = gimple_omp_parallel_data_arg (entry_stmt); - if (t == NULL) - t1 = null_pointer_node; - else - t1 = build_fold_addr_expr (t); - t2 = build_fold_addr_expr (gimple_omp_parallel_child_fn (entry_stmt)); - - vec_alloc (args, 4); - args->quick_push (t2); - args->quick_push (t1); - args->quick_push (count); - args->quick_push (grain); - t = build_call_expr_loc_vec (UNKNOWN_LOCATION, func_name, args); - - force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, false, - GSI_CONTINUE_LINKING); -} - -/* Build the function call to GOMP_task to actually - generate the task operation. BB is the block where to insert the code. */ - -static void -expand_task_call (struct omp_region *region, basic_block bb, - gomp_task *entry_stmt) -{ - tree t1, t2, t3; - gimple_stmt_iterator gsi; - location_t loc = gimple_location (entry_stmt); - - tree clauses = gimple_omp_task_clauses (entry_stmt); - - tree ifc = find_omp_clause (clauses, OMP_CLAUSE_IF); - tree untied = find_omp_clause (clauses, OMP_CLAUSE_UNTIED); - tree mergeable = find_omp_clause (clauses, OMP_CLAUSE_MERGEABLE); - tree depend = find_omp_clause (clauses, OMP_CLAUSE_DEPEND); - tree finalc = find_omp_clause (clauses, OMP_CLAUSE_FINAL); - tree priority = find_omp_clause (clauses, OMP_CLAUSE_PRIORITY); - - unsigned int iflags - = (untied ? GOMP_TASK_FLAG_UNTIED : 0) - | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0) - | (depend ? GOMP_TASK_FLAG_DEPEND : 0); - - bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt); - tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE; - tree num_tasks = NULL_TREE; - bool ull = false; - if (taskloop_p) - { - gimple *g = last_stmt (region->outer->entry); - gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR - && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP); - struct omp_for_data fd; - extract_omp_for_data (as_a <gomp_for *> (g), &fd, NULL); - startvar = find_omp_clause (clauses, OMP_CLAUSE__LOOPTEMP_); - endvar = find_omp_clause (OMP_CLAUSE_CHAIN (startvar), - OMP_CLAUSE__LOOPTEMP_); - startvar = OMP_CLAUSE_DECL (startvar); - endvar = OMP_CLAUSE_DECL (endvar); - step = fold_convert_loc (loc, fd.iter_type, fd.loop.step); - if (fd.loop.cond_code == LT_EXPR) - iflags |= GOMP_TASK_FLAG_UP; - tree tclauses = gimple_omp_for_clauses (g); - num_tasks = find_omp_clause (tclauses, OMP_CLAUSE_NUM_TASKS); - if (num_tasks) - num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks); - else - { - num_tasks = find_omp_clause (tclauses, OMP_CLAUSE_GRAINSIZE); - if (num_tasks) - { - iflags |= GOMP_TASK_FLAG_GRAINSIZE; - num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks); - } - else - num_tasks = integer_zero_node; - } - num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks); - if (ifc == NULL_TREE) - iflags |= GOMP_TASK_FLAG_IF; - if (find_omp_clause (tclauses, OMP_CLAUSE_NOGROUP)) - iflags |= GOMP_TASK_FLAG_NOGROUP; - ull = fd.iter_type == long_long_unsigned_type_node; - } - else if (priority) - iflags |= GOMP_TASK_FLAG_PRIORITY; - - tree flags = build_int_cst (unsigned_type_node, iflags); - - tree cond = boolean_true_node; - if (ifc) - { - if (taskloop_p) - { - tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc)); - t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t, - build_int_cst (unsigned_type_node, - GOMP_TASK_FLAG_IF), - build_int_cst (unsigned_type_node, 0)); - flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, - flags, t); - } - else - cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc)); - } - - if (finalc) - { - tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc)); - t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t, - build_int_cst (unsigned_type_node, - GOMP_TASK_FLAG_FINAL), - build_int_cst (unsigned_type_node, 0)); - flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t); - } - if (depend) - depend = OMP_CLAUSE_DECL (depend); - else - depend = build_int_cst (ptr_type_node, 0); - if (priority) - priority = fold_convert (integer_type_node, - OMP_CLAUSE_PRIORITY_EXPR (priority)); - else - priority = integer_zero_node; - - gsi = gsi_last_bb (bb); - tree t = gimple_omp_task_data_arg (entry_stmt); - if (t == NULL) - t2 = null_pointer_node; - else - t2 = build_fold_addr_expr_loc (loc, t); - t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt)); - t = gimple_omp_task_copy_fn (entry_stmt); - if (t == NULL) - t3 = null_pointer_node; - else - t3 = build_fold_addr_expr_loc (loc, t); - - if (taskloop_p) - t = build_call_expr (ull - ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL) - : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP), - 11, t1, t2, t3, - gimple_omp_task_arg_size (entry_stmt), - gimple_omp_task_arg_align (entry_stmt), flags, - num_tasks, priority, startvar, endvar, step); - else - t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK), - 9, t1, t2, t3, - gimple_omp_task_arg_size (entry_stmt), - gimple_omp_task_arg_align (entry_stmt), cond, flags, - depend, priority); - - force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, - false, GSI_CONTINUE_LINKING); -} - - /* If exceptions are enabled, wrap the statements in BODY in a MUST_NOT_THROW catch handler and return it. This prevents programs from violating the structured block semantics with throws. */ @@ -7089,7403 +5740,6 @@ maybe_catch_exception (gimple_seq body) return gimple_seq_alloc_with_stmt (g); } -/* Chain all the DECLs in LIST by their TREE_CHAIN fields. */ - -static tree -vec2chain (vec<tree, va_gc> *v) -{ - tree chain = NULL_TREE, t; - unsigned ix; - - FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t) - { - DECL_CHAIN (t) = chain; - chain = t; - } - - return chain; -} - - -/* Remove barriers in REGION->EXIT's block. Note that this is only - valid for GIMPLE_OMP_PARALLEL regions. Since the end of a parallel region - is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that - left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be - removed. */ - -static void -remove_exit_barrier (struct omp_region *region) -{ - gimple_stmt_iterator gsi; - basic_block exit_bb; - edge_iterator ei; - edge e; - gimple *stmt; - int any_addressable_vars = -1; - - exit_bb = region->exit; - - /* If the parallel region doesn't return, we don't have REGION->EXIT - block at all. */ - if (! exit_bb) - return; - - /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN. The - workshare's GIMPLE_OMP_RETURN will be in a preceding block. The kinds of - statements that can appear in between are extremely limited -- no - memory operations at all. Here, we allow nothing at all, so the - only thing we allow to precede this GIMPLE_OMP_RETURN is a label. */ - gsi = gsi_last_bb (exit_bb); - gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN); - gsi_prev (&gsi); - if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL) - return; - - FOR_EACH_EDGE (e, ei, exit_bb->preds) - { - gsi = gsi_last_bb (e->src); - if (gsi_end_p (gsi)) - continue; - stmt = gsi_stmt (gsi); - if (gimple_code (stmt) == GIMPLE_OMP_RETURN - && !gimple_omp_return_nowait_p (stmt)) - { - /* OpenMP 3.0 tasks unfortunately prevent this optimization - in many cases. If there could be tasks queued, the barrier - might be needed to let the tasks run before some local - variable of the parallel that the task uses as shared - runs out of scope. The task can be spawned either - from within current function (this would be easy to check) - or from some function it calls and gets passed an address - of such a variable. */ - if (any_addressable_vars < 0) - { - gomp_parallel *parallel_stmt - = as_a <gomp_parallel *> (last_stmt (region->entry)); - tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt); - tree local_decls, block, decl; - unsigned ix; - - any_addressable_vars = 0; - FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl) - if (TREE_ADDRESSABLE (decl)) - { - any_addressable_vars = 1; - break; - } - for (block = gimple_block (stmt); - !any_addressable_vars - && block - && TREE_CODE (block) == BLOCK; - block = BLOCK_SUPERCONTEXT (block)) - { - for (local_decls = BLOCK_VARS (block); - local_decls; - local_decls = DECL_CHAIN (local_decls)) - if (TREE_ADDRESSABLE (local_decls)) - { - any_addressable_vars = 1; - break; - } - if (block == gimple_block (parallel_stmt)) - break; - } - } - if (!any_addressable_vars) - gimple_omp_return_set_nowait (stmt); - } - } -} - -static void -remove_exit_barriers (struct omp_region *region) -{ - if (region->type == GIMPLE_OMP_PARALLEL) - remove_exit_barrier (region); - - if (region->inner) - { - region = region->inner; - remove_exit_barriers (region); - while (region->next) - { - region = region->next; - remove_exit_barriers (region); - } - } -} - -/* Optimize omp_get_thread_num () and omp_get_num_threads () - calls. These can't be declared as const functions, but - within one parallel body they are constant, so they can be - transformed there into __builtin_omp_get_{thread_num,num_threads} () - which are declared const. Similarly for task body, except - that in untied task omp_get_thread_num () can change at any task - scheduling point. */ - -static void -optimize_omp_library_calls (gimple *entry_stmt) -{ - basic_block bb; - gimple_stmt_iterator gsi; - tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM); - tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree); - tree num_thr_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS); - tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree); - bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK - && find_omp_clause (gimple_omp_task_clauses (entry_stmt), - OMP_CLAUSE_UNTIED) != NULL); - - FOR_EACH_BB_FN (bb, cfun) - for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) - { - gimple *call = gsi_stmt (gsi); - tree decl; - - if (is_gimple_call (call) - && (decl = gimple_call_fndecl (call)) - && DECL_EXTERNAL (decl) - && TREE_PUBLIC (decl) - && DECL_INITIAL (decl) == NULL) - { - tree built_in; - - if (DECL_NAME (decl) == thr_num_id) - { - /* In #pragma omp task untied omp_get_thread_num () can change - during the execution of the task region. */ - if (untied_task) - continue; - built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM); - } - else if (DECL_NAME (decl) == num_thr_id) - built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS); - else - continue; - - if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in) - || gimple_call_num_args (call) != 0) - continue; - - if (flag_exceptions && !TREE_NOTHROW (decl)) - continue; - - if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE - || !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)), - TREE_TYPE (TREE_TYPE (built_in)))) - continue; - - gimple_call_set_fndecl (call, built_in); - } - } -} - -/* Callback for expand_omp_build_assign. Return non-NULL if *tp needs to be - regimplified. */ - -static tree -expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *) -{ - tree t = *tp; - - /* Any variable with DECL_VALUE_EXPR needs to be regimplified. */ - if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t)) - return t; - - if (TREE_CODE (t) == ADDR_EXPR) - recompute_tree_invariant_for_addr_expr (t); - - *walk_subtrees = !TYPE_P (t) && !DECL_P (t); - return NULL_TREE; -} - -/* Prepend or append TO = FROM assignment before or after *GSI_P. */ - -static void -expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from, - bool after) -{ - bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to); - from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE, - !after, after ? GSI_CONTINUE_LINKING - : GSI_SAME_STMT); - gimple *stmt = gimple_build_assign (to, from); - if (after) - gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING); - else - gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT); - if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL) - || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL)) - { - gimple_stmt_iterator gsi = gsi_for_stmt (stmt); - gimple_regimplify_operands (stmt, &gsi); - } -} - -/* Expand the OpenMP parallel or task directive starting at REGION. */ - -static void -expand_omp_taskreg (struct omp_region *region) -{ - basic_block entry_bb, exit_bb, new_bb; - struct function *child_cfun; - tree child_fn, block, t; - gimple_stmt_iterator gsi; - gimple *entry_stmt, *stmt; - edge e; - vec<tree, va_gc> *ws_args; - - entry_stmt = last_stmt (region->entry); - child_fn = gimple_omp_taskreg_child_fn (entry_stmt); - child_cfun = DECL_STRUCT_FUNCTION (child_fn); - - entry_bb = region->entry; - if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK) - exit_bb = region->cont; - else - exit_bb = region->exit; - - bool is_cilk_for - = (flag_cilkplus - && gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL - && find_omp_clause (gimple_omp_parallel_clauses (entry_stmt), - OMP_CLAUSE__CILK_FOR_COUNT_) != NULL_TREE); - - if (is_cilk_for) - /* If it is a _Cilk_for statement, it is modelled *like* a parallel for, - and the inner statement contains the name of the built-in function - and grain. */ - ws_args = region->inner->ws_args; - else if (is_combined_parallel (region)) - ws_args = region->ws_args; - else - ws_args = NULL; - - if (child_cfun->cfg) - { - /* Due to inlining, it may happen that we have already outlined - the region, in which case all we need to do is make the - sub-graph unreachable and emit the parallel call. */ - edge entry_succ_e, exit_succ_e; - - entry_succ_e = single_succ_edge (entry_bb); - - gsi = gsi_last_bb (entry_bb); - gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL - || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK); - gsi_remove (&gsi, true); - - new_bb = entry_bb; - if (exit_bb) - { - exit_succ_e = single_succ_edge (exit_bb); - make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU); - } - remove_edge_and_dominated_blocks (entry_succ_e); - } - else - { - unsigned srcidx, dstidx, num; - - /* If the parallel region needs data sent from the parent - function, then the very first statement (except possible - tree profile counter updates) of the parallel body - is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since - &.OMP_DATA_O is passed as an argument to the child function, - we need to replace it with the argument as seen by the child - function. - - In most cases, this will end up being the identity assignment - .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had - a function call that has been inlined, the original PARM_DECL - .OMP_DATA_I may have been converted into a different local - variable. In which case, we need to keep the assignment. */ - if (gimple_omp_taskreg_data_arg (entry_stmt)) - { - basic_block entry_succ_bb - = single_succ_p (entry_bb) ? single_succ (entry_bb) - : FALLTHRU_EDGE (entry_bb)->dest; - tree arg; - gimple *parcopy_stmt = NULL; - - for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi)) - { - gimple *stmt; - - gcc_assert (!gsi_end_p (gsi)); - stmt = gsi_stmt (gsi); - if (gimple_code (stmt) != GIMPLE_ASSIGN) - continue; - - if (gimple_num_ops (stmt) == 2) - { - tree arg = gimple_assign_rhs1 (stmt); - - /* We're ignore the subcode because we're - effectively doing a STRIP_NOPS. */ - - if (TREE_CODE (arg) == ADDR_EXPR - && TREE_OPERAND (arg, 0) - == gimple_omp_taskreg_data_arg (entry_stmt)) - { - parcopy_stmt = stmt; - break; - } - } - } - - gcc_assert (parcopy_stmt != NULL); - arg = DECL_ARGUMENTS (child_fn); - - if (!gimple_in_ssa_p (cfun)) - { - if (gimple_assign_lhs (parcopy_stmt) == arg) - gsi_remove (&gsi, true); - else - { - /* ?? Is setting the subcode really necessary ?? */ - gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg)); - gimple_assign_set_rhs1 (parcopy_stmt, arg); - } - } - else - { - tree lhs = gimple_assign_lhs (parcopy_stmt); - gcc_assert (SSA_NAME_VAR (lhs) == arg); - /* We'd like to set the rhs to the default def in the child_fn, - but it's too early to create ssa names in the child_fn. - Instead, we set the rhs to the parm. In - move_sese_region_to_fn, we introduce a default def for the - parm, map the parm to it's default def, and once we encounter - this stmt, replace the parm with the default def. */ - gimple_assign_set_rhs1 (parcopy_stmt, arg); - update_stmt (parcopy_stmt); - } - } - - /* Declare local variables needed in CHILD_CFUN. */ - block = DECL_INITIAL (child_fn); - BLOCK_VARS (block) = vec2chain (child_cfun->local_decls); - /* The gimplifier could record temporaries in parallel/task block - rather than in containing function's local_decls chain, - which would mean cgraph missed finalizing them. Do it now. */ - for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t)) - if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t)) - varpool_node::finalize_decl (t); - DECL_SAVED_TREE (child_fn) = NULL; - /* We'll create a CFG for child_fn, so no gimple body is needed. */ - gimple_set_body (child_fn, NULL); - TREE_USED (block) = 1; - - /* Reset DECL_CONTEXT on function arguments. */ - for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t)) - DECL_CONTEXT (t) = child_fn; - - /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK, - so that it can be moved to the child function. */ - gsi = gsi_last_bb (entry_bb); - stmt = gsi_stmt (gsi); - gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL - || gimple_code (stmt) == GIMPLE_OMP_TASK)); - e = split_block (entry_bb, stmt); - gsi_remove (&gsi, true); - entry_bb = e->dest; - edge e2 = NULL; - if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL) - single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU; - else - { - e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL); - gcc_assert (e2->dest == region->exit); - remove_edge (BRANCH_EDGE (entry_bb)); - set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src); - gsi = gsi_last_bb (region->exit); - gcc_assert (!gsi_end_p (gsi) - && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN); - gsi_remove (&gsi, true); - } - - /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR. */ - if (exit_bb) - { - gsi = gsi_last_bb (exit_bb); - gcc_assert (!gsi_end_p (gsi) - && (gimple_code (gsi_stmt (gsi)) - == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN))); - stmt = gimple_build_return (NULL); - gsi_insert_after (&gsi, stmt, GSI_SAME_STMT); - gsi_remove (&gsi, true); - } - - /* Move the parallel region into CHILD_CFUN. */ - - if (gimple_in_ssa_p (cfun)) - { - init_tree_ssa (child_cfun); - init_ssa_operands (child_cfun); - child_cfun->gimple_df->in_ssa_p = true; - block = NULL_TREE; - } - else - block = gimple_block (entry_stmt); - - new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block); - if (exit_bb) - single_succ_edge (new_bb)->flags = EDGE_FALLTHRU; - if (e2) - { - basic_block dest_bb = e2->dest; - if (!exit_bb) - make_edge (new_bb, dest_bb, EDGE_FALLTHRU); - remove_edge (e2); - set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb); - } - /* When the OMP expansion process cannot guarantee an up-to-date - loop tree arrange for the child function to fixup loops. */ - if (loops_state_satisfies_p (LOOPS_NEED_FIXUP)) - child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP; - - /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */ - num = vec_safe_length (child_cfun->local_decls); - for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++) - { - t = (*child_cfun->local_decls)[srcidx]; - if (DECL_CONTEXT (t) == cfun->decl) - continue; - if (srcidx != dstidx) - (*child_cfun->local_decls)[dstidx] = t; - dstidx++; - } - if (dstidx != num) - vec_safe_truncate (child_cfun->local_decls, dstidx); - - /* Inform the callgraph about the new function. */ - child_cfun->curr_properties = cfun->curr_properties; - child_cfun->has_simduid_loops |= cfun->has_simduid_loops; - child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops; - cgraph_node *node = cgraph_node::get_create (child_fn); - node->parallelized_function = 1; - cgraph_node::add_new_function (child_fn, true); - - bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl) - && !DECL_ASSEMBLER_NAME_SET_P (child_fn); - - /* Fix the callgraph edges for child_cfun. Those for cfun will be - fixed in a following pass. */ - push_cfun (child_cfun); - if (need_asm) - assign_assembler_name_if_neeeded (child_fn); - - if (optimize) - optimize_omp_library_calls (entry_stmt); - cgraph_edge::rebuild_edges (); - - /* Some EH regions might become dead, see PR34608. If - pass_cleanup_cfg isn't the first pass to happen with the - new child, these dead EH edges might cause problems. - Clean them up now. */ - if (flag_exceptions) - { - basic_block bb; - bool changed = false; - - FOR_EACH_BB_FN (bb, cfun) - changed |= gimple_purge_dead_eh_edges (bb); - if (changed) - cleanup_tree_cfg (); - } - if (gimple_in_ssa_p (cfun)) - update_ssa (TODO_update_ssa); - if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP)) - verify_loop_structure (); - pop_cfun (); - - if (dump_file && !gimple_in_ssa_p (cfun)) - { - omp_any_child_fn_dumped = true; - dump_function_header (dump_file, child_fn, dump_flags); - dump_function_to_file (child_fn, dump_file, dump_flags); - } - } - - /* Emit a library call to launch the children threads. */ - if (is_cilk_for) - expand_cilk_for_call (new_bb, - as_a <gomp_parallel *> (entry_stmt), ws_args); - else if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL) - expand_parallel_call (region, new_bb, - as_a <gomp_parallel *> (entry_stmt), ws_args); - else - expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt)); - if (gimple_in_ssa_p (cfun)) - update_ssa (TODO_update_ssa_only_virtuals); -} - -/* Information about members of an OpenACC collapsed loop nest. */ - -struct oacc_collapse -{ - tree base; /* Base value. */ - tree iters; /* Number of steps. */ - tree step; /* step size. */ -}; - -/* Helper for expand_oacc_for. Determine collapsed loop information. - Fill in COUNTS array. Emit any initialization code before GSI. - Return the calculated outer loop bound of BOUND_TYPE. */ - -static tree -expand_oacc_collapse_init (const struct omp_for_data *fd, - gimple_stmt_iterator *gsi, - oacc_collapse *counts, tree bound_type) -{ - tree total = build_int_cst (bound_type, 1); - int ix; - - gcc_assert (integer_onep (fd->loop.step)); - gcc_assert (integer_zerop (fd->loop.n1)); - - for (ix = 0; ix != fd->collapse; ix++) - { - const omp_for_data_loop *loop = &fd->loops[ix]; - - tree iter_type = TREE_TYPE (loop->v); - tree diff_type = iter_type; - tree plus_type = iter_type; - - gcc_assert (loop->cond_code == fd->loop.cond_code); - - if (POINTER_TYPE_P (iter_type)) - plus_type = sizetype; - if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type)) - diff_type = signed_type_for (diff_type); - - tree b = loop->n1; - tree e = loop->n2; - tree s = loop->step; - bool up = loop->cond_code == LT_EXPR; - tree dir = build_int_cst (diff_type, up ? +1 : -1); - bool negating; - tree expr; - - b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE, - true, GSI_SAME_STMT); - e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE, - true, GSI_SAME_STMT); - - /* Convert the step, avoiding possible unsigned->signed overflow. */ - negating = !up && TYPE_UNSIGNED (TREE_TYPE (s)); - if (negating) - s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s); - s = fold_convert (diff_type, s); - if (negating) - s = fold_build1 (NEGATE_EXPR, diff_type, s); - s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE, - true, GSI_SAME_STMT); - - /* Determine the range, avoiding possible unsigned->signed overflow. */ - negating = !up && TYPE_UNSIGNED (iter_type); - expr = fold_build2 (MINUS_EXPR, plus_type, - fold_convert (plus_type, negating ? b : e), - fold_convert (plus_type, negating ? e : b)); - expr = fold_convert (diff_type, expr); - if (negating) - expr = fold_build1 (NEGATE_EXPR, diff_type, expr); - tree range = force_gimple_operand_gsi - (gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT); - - /* Determine number of iterations. */ - expr = fold_build2 (MINUS_EXPR, diff_type, range, dir); - expr = fold_build2 (PLUS_EXPR, diff_type, expr, s); - expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s); - - tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE, - true, GSI_SAME_STMT); - - counts[ix].base = b; - counts[ix].iters = iters; - counts[ix].step = s; - - total = fold_build2 (MULT_EXPR, bound_type, total, - fold_convert (bound_type, iters)); - } - - return total; -} - -/* Emit initializers for collapsed loop members. IVAR is the outer - loop iteration variable, from which collapsed loop iteration values - are calculated. COUNTS array has been initialized by - expand_oacc_collapse_inits. */ - -static void -expand_oacc_collapse_vars (const struct omp_for_data *fd, - gimple_stmt_iterator *gsi, - const oacc_collapse *counts, tree ivar) -{ - tree ivar_type = TREE_TYPE (ivar); - - /* The most rapidly changing iteration variable is the innermost - one. */ - for (int ix = fd->collapse; ix--;) - { - const omp_for_data_loop *loop = &fd->loops[ix]; - const oacc_collapse *collapse = &counts[ix]; - tree iter_type = TREE_TYPE (loop->v); - tree diff_type = TREE_TYPE (collapse->step); - tree plus_type = iter_type; - enum tree_code plus_code = PLUS_EXPR; - tree expr; - - if (POINTER_TYPE_P (iter_type)) - { - plus_code = POINTER_PLUS_EXPR; - plus_type = sizetype; - } - - expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, ivar, - fold_convert (ivar_type, collapse->iters)); - expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr), - collapse->step); - expr = fold_build2 (plus_code, iter_type, collapse->base, - fold_convert (plus_type, expr)); - expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE, - true, GSI_SAME_STMT); - gassign *ass = gimple_build_assign (loop->v, expr); - gsi_insert_before (gsi, ass, GSI_SAME_STMT); - - if (ix) - { - expr = fold_build2 (TRUNC_DIV_EXPR, ivar_type, ivar, - fold_convert (ivar_type, collapse->iters)); - ivar = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE, - true, GSI_SAME_STMT); - } - } -} - - -/* Helper function for expand_omp_{for_*,simd}. If this is the outermost - of the combined collapse > 1 loop constructs, generate code like: - if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB; - if (cond3 is <) - adj = STEP3 - 1; - else - adj = STEP3 + 1; - count3 = (adj + N32 - N31) / STEP3; - if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB; - if (cond2 is <) - adj = STEP2 - 1; - else - adj = STEP2 + 1; - count2 = (adj + N22 - N21) / STEP2; - if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB; - if (cond1 is <) - adj = STEP1 - 1; - else - adj = STEP1 + 1; - count1 = (adj + N12 - N11) / STEP1; - count = count1 * count2 * count3; - Furthermore, if ZERO_ITER_BB is NULL, create a BB which does: - count = 0; - and set ZERO_ITER_BB to that bb. If this isn't the outermost - of the combined loop constructs, just initialize COUNTS array - from the _looptemp_ clauses. */ - -/* NOTE: It *could* be better to moosh all of the BBs together, - creating one larger BB with all the computation and the unexpected - jump at the end. I.e. - - bool zero3, zero2, zero1, zero; - - zero3 = N32 c3 N31; - count3 = (N32 - N31) /[cl] STEP3; - zero2 = N22 c2 N21; - count2 = (N22 - N21) /[cl] STEP2; - zero1 = N12 c1 N11; - count1 = (N12 - N11) /[cl] STEP1; - zero = zero3 || zero2 || zero1; - count = count1 * count2 * count3; - if (__builtin_expect(zero, false)) goto zero_iter_bb; - - After all, we expect the zero=false, and thus we expect to have to - evaluate all of the comparison expressions, so short-circuiting - oughtn't be a win. Since the condition isn't protecting a - denominator, we're not concerned about divide-by-zero, so we can - fully evaluate count even if a numerator turned out to be wrong. - - It seems like putting this all together would create much better - scheduling opportunities, and less pressure on the chip's branch - predictor. */ - -static void -expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi, - basic_block &entry_bb, tree *counts, - basic_block &zero_iter1_bb, int &first_zero_iter1, - basic_block &zero_iter2_bb, int &first_zero_iter2, - basic_block &l2_dom_bb) -{ - tree t, type = TREE_TYPE (fd->loop.v); - edge e, ne; - int i; - - /* Collapsed loops need work for expansion into SSA form. */ - gcc_assert (!gimple_in_ssa_p (cfun)); - - if (gimple_omp_for_combined_into_p (fd->for_stmt) - && TREE_CODE (fd->loop.n2) != INTEGER_CST) - { - gcc_assert (fd->ordered == 0); - /* First two _looptemp_ clauses are for istart/iend, counts[0] - isn't supposed to be handled, as the inner loop doesn't - use it. */ - tree innerc = find_omp_clause (gimple_omp_for_clauses (fd->for_stmt), - OMP_CLAUSE__LOOPTEMP_); - gcc_assert (innerc); - for (i = 0; i < fd->collapse; i++) - { - innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc), - OMP_CLAUSE__LOOPTEMP_); - gcc_assert (innerc); - if (i) - counts[i] = OMP_CLAUSE_DECL (innerc); - else - counts[0] = NULL_TREE; - } - return; - } - - for (i = fd->collapse; i < fd->ordered; i++) - { - tree itype = TREE_TYPE (fd->loops[i].v); - counts[i] = NULL_TREE; - t = fold_binary (fd->loops[i].cond_code, boolean_type_node, - fold_convert (itype, fd->loops[i].n1), - fold_convert (itype, fd->loops[i].n2)); - if (t && integer_zerop (t)) - { - for (i = fd->collapse; i < fd->ordered; i++) - counts[i] = build_int_cst (type, 0); - break; - } - } - for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++) - { - tree itype = TREE_TYPE (fd->loops[i].v); - - if (i >= fd->collapse && counts[i]) - continue; - if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse) - && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node, - fold_convert (itype, fd->loops[i].n1), - fold_convert (itype, fd->loops[i].n2))) - == NULL_TREE || !integer_onep (t))) - { - gcond *cond_stmt; - tree n1, n2; - n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1)); - n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE, - true, GSI_SAME_STMT); - n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2)); - n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE, - true, GSI_SAME_STMT); - cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2, - NULL_TREE, NULL_TREE); - gsi_insert_before (gsi, cond_stmt, GSI_SAME_STMT); - if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), - expand_omp_regimplify_p, NULL, NULL) - || walk_tree (gimple_cond_rhs_ptr (cond_stmt), - expand_omp_regimplify_p, NULL, NULL)) - { - *gsi = gsi_for_stmt (cond_stmt); - gimple_regimplify_operands (cond_stmt, gsi); - } - e = split_block (entry_bb, cond_stmt); - basic_block &zero_iter_bb - = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb; - int &first_zero_iter - = i < fd->collapse ? first_zero_iter1 : first_zero_iter2; - if (zero_iter_bb == NULL) - { - gassign *assign_stmt; - first_zero_iter = i; - zero_iter_bb = create_empty_bb (entry_bb); - add_bb_to_loop (zero_iter_bb, entry_bb->loop_father); - *gsi = gsi_after_labels (zero_iter_bb); - if (i < fd->collapse) - assign_stmt = gimple_build_assign (fd->loop.n2, - build_zero_cst (type)); - else - { - counts[i] = create_tmp_reg (type, ".count"); - assign_stmt - = gimple_build_assign (counts[i], build_zero_cst (type)); - } - gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT); - set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb, - entry_bb); - } - ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE); - ne->probability = REG_BR_PROB_BASE / 2000 - 1; - e->flags = EDGE_TRUE_VALUE; - e->probability = REG_BR_PROB_BASE - ne->probability; - if (l2_dom_bb == NULL) - l2_dom_bb = entry_bb; - entry_bb = e->dest; - *gsi = gsi_last_bb (entry_bb); - } - - if (POINTER_TYPE_P (itype)) - itype = signed_type_for (itype); - t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR - ? -1 : 1)); - t = fold_build2 (PLUS_EXPR, itype, - fold_convert (itype, fd->loops[i].step), t); - t = fold_build2 (PLUS_EXPR, itype, t, - fold_convert (itype, fd->loops[i].n2)); - t = fold_build2 (MINUS_EXPR, itype, t, - fold_convert (itype, fd->loops[i].n1)); - /* ?? We could probably use CEIL_DIV_EXPR instead of - TRUNC_DIV_EXPR and adjusting by hand. Unless we can't - generate the same code in the end because generically we - don't know that the values involved must be negative for - GT?? */ - if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR) - t = fold_build2 (TRUNC_DIV_EXPR, itype, - fold_build1 (NEGATE_EXPR, itype, t), - fold_build1 (NEGATE_EXPR, itype, - fold_convert (itype, - fd->loops[i].step))); - else - t = fold_build2 (TRUNC_DIV_EXPR, itype, t, - fold_convert (itype, fd->loops[i].step)); - t = fold_convert (type, t); - if (TREE_CODE (t) == INTEGER_CST) - counts[i] = t; - else - { - if (i < fd->collapse || i != first_zero_iter2) - counts[i] = create_tmp_reg (type, ".count"); - expand_omp_build_assign (gsi, counts[i], t); - } - if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse) - { - if (i == 0) - t = counts[0]; - else - t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]); - expand_omp_build_assign (gsi, fd->loop.n2, t); - } - } -} - - -/* Helper function for expand_omp_{for_*,simd}. Generate code like: - T = V; - V3 = N31 + (T % count3) * STEP3; - T = T / count3; - V2 = N21 + (T % count2) * STEP2; - T = T / count2; - V1 = N11 + T * STEP1; - if this loop doesn't have an inner loop construct combined with it. - If it does have an inner loop construct combined with it and the - iteration count isn't known constant, store values from counts array - into its _looptemp_ temporaries instead. */ - -static void -expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi, - tree *counts, gimple *inner_stmt, tree startvar) -{ - int i; - if (gimple_omp_for_combined_p (fd->for_stmt)) - { - /* If fd->loop.n2 is constant, then no propagation of the counts - is needed, they are constant. */ - if (TREE_CODE (fd->loop.n2) == INTEGER_CST) - return; - - tree clauses = gimple_code (inner_stmt) != GIMPLE_OMP_FOR - ? gimple_omp_taskreg_clauses (inner_stmt) - : gimple_omp_for_clauses (inner_stmt); - /* First two _looptemp_ clauses are for istart/iend, counts[0] - isn't supposed to be handled, as the inner loop doesn't - use it. */ - tree innerc = find_omp_clause (clauses, OMP_CLAUSE__LOOPTEMP_); - gcc_assert (innerc); - for (i = 0; i < fd->collapse; i++) - { - innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc), - OMP_CLAUSE__LOOPTEMP_); - gcc_assert (innerc); - if (i) - { - tree tem = OMP_CLAUSE_DECL (innerc); - tree t = fold_convert (TREE_TYPE (tem), counts[i]); - t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE, - false, GSI_CONTINUE_LINKING); - gassign *stmt = gimple_build_assign (tem, t); - gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING); - } - } - return; - } - - tree type = TREE_TYPE (fd->loop.v); - tree tem = create_tmp_reg (type, ".tem"); - gassign *stmt = gimple_build_assign (tem, startvar); - gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING); - - for (i = fd->collapse - 1; i >= 0; i--) - { - tree vtype = TREE_TYPE (fd->loops[i].v), itype, t; - itype = vtype; - if (POINTER_TYPE_P (vtype)) - itype = signed_type_for (vtype); - if (i != 0) - t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]); - else - t = tem; - t = fold_convert (itype, t); - t = fold_build2 (MULT_EXPR, itype, t, - fold_convert (itype, fd->loops[i].step)); - if (POINTER_TYPE_P (vtype)) - t = fold_build_pointer_plus (fd->loops[i].n1, t); - else - t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t); - t = force_gimple_operand_gsi (gsi, t, - DECL_P (fd->loops[i].v) - && TREE_ADDRESSABLE (fd->loops[i].v), - NULL_TREE, false, - GSI_CONTINUE_LINKING); - stmt = gimple_build_assign (fd->loops[i].v, t); - gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING); - if (i != 0) - { - t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]); - t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE, - false, GSI_CONTINUE_LINKING); - stmt = gimple_build_assign (tem, t); - gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING); - } - } -} - - -/* Helper function for expand_omp_for_*. Generate code like: - L10: - V3 += STEP3; - if (V3 cond3 N32) goto BODY_BB; else goto L11; - L11: - V3 = N31; - V2 += STEP2; - if (V2 cond2 N22) goto BODY_BB; else goto L12; - L12: - V2 = N21; - V1 += STEP1; - goto BODY_BB; */ - -static basic_block -extract_omp_for_update_vars (struct omp_for_data *fd, basic_block cont_bb, - basic_block body_bb) -{ - basic_block last_bb, bb, collapse_bb = NULL; - int i; - gimple_stmt_iterator gsi; - edge e; - tree t; - gimple *stmt; - - last_bb = cont_bb; - for (i = fd->collapse - 1; i >= 0; i--) - { - tree vtype = TREE_TYPE (fd->loops[i].v); - - bb = create_empty_bb (last_bb); - add_bb_to_loop (bb, last_bb->loop_father); - gsi = gsi_start_bb (bb); - - if (i < fd->collapse - 1) - { - e = make_edge (last_bb, bb, EDGE_FALSE_VALUE); - e->probability = REG_BR_PROB_BASE / 8; - - t = fd->loops[i + 1].n1; - t = force_gimple_operand_gsi (&gsi, t, - DECL_P (fd->loops[i + 1].v) - && TREE_ADDRESSABLE (fd->loops[i - + 1].v), - NULL_TREE, false, - GSI_CONTINUE_LINKING); - stmt = gimple_build_assign (fd->loops[i + 1].v, t); - gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); - } - else - collapse_bb = bb; - - set_immediate_dominator (CDI_DOMINATORS, bb, last_bb); - - if (POINTER_TYPE_P (vtype)) - t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step); - else - t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step); - t = force_gimple_operand_gsi (&gsi, t, - DECL_P (fd->loops[i].v) - && TREE_ADDRESSABLE (fd->loops[i].v), - NULL_TREE, false, GSI_CONTINUE_LINKING); - stmt = gimple_build_assign (fd->loops[i].v, t); - gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); - - if (i > 0) - { - t = fd->loops[i].n2; - t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, - false, GSI_CONTINUE_LINKING); - tree v = fd->loops[i].v; - if (DECL_P (v) && TREE_ADDRESSABLE (v)) - v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE, - false, GSI_CONTINUE_LINKING); - t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t); - stmt = gimple_build_cond_empty (t); - gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); - e = make_edge (bb, body_bb, EDGE_TRUE_VALUE); - e->probability = REG_BR_PROB_BASE * 7 / 8; - } - else - make_edge (bb, body_bb, EDGE_FALLTHRU); - last_bb = bb; - } - - return collapse_bb; -} - - -/* Expand #pragma omp ordered depend(source). */ - -static void -expand_omp_ordered_source (gimple_stmt_iterator *gsi, struct omp_for_data *fd, - tree *counts, location_t loc) -{ - enum built_in_function source_ix - = fd->iter_type == long_integer_type_node - ? BUILT_IN_GOMP_DOACROSS_POST : BUILT_IN_GOMP_DOACROSS_ULL_POST; - gimple *g - = gimple_build_call (builtin_decl_explicit (source_ix), 1, - build_fold_addr_expr (counts[fd->ordered])); - gimple_set_location (g, loc); - gsi_insert_before (gsi, g, GSI_SAME_STMT); -} - -/* Expand a single depend from #pragma omp ordered depend(sink:...). */ - -static void -expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd, - tree *counts, tree c, location_t loc) -{ - auto_vec<tree, 10> args; - enum built_in_function sink_ix - = fd->iter_type == long_integer_type_node - ? BUILT_IN_GOMP_DOACROSS_WAIT : BUILT_IN_GOMP_DOACROSS_ULL_WAIT; - tree t, off, coff = NULL_TREE, deps = OMP_CLAUSE_DECL (c), cond = NULL_TREE; - int i; - gimple_stmt_iterator gsi2 = *gsi; - bool warned_step = false; - - for (i = 0; i < fd->ordered; i++) - { - tree step = NULL_TREE; - off = TREE_PURPOSE (deps); - if (TREE_CODE (off) == TRUNC_DIV_EXPR) - { - step = TREE_OPERAND (off, 1); - off = TREE_OPERAND (off, 0); - } - if (!integer_zerop (off)) - { - gcc_assert (fd->loops[i].cond_code == LT_EXPR - || fd->loops[i].cond_code == GT_EXPR); - bool forward = fd->loops[i].cond_code == LT_EXPR; - if (step) - { - /* Non-simple Fortran DO loops. If step is variable, - we don't know at compile even the direction, so can't - warn. */ - if (TREE_CODE (step) != INTEGER_CST) - break; - forward = tree_int_cst_sgn (step) != -1; - } - if (forward ^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) - warning_at (loc, 0, "%<depend(sink)%> clause waiting for " - "lexically later iteration"); - break; - } - deps = TREE_CHAIN (deps); - } - /* If all offsets corresponding to the collapsed loops are zero, - this depend clause can be ignored. FIXME: but there is still a - flush needed. We need to emit one __sync_synchronize () for it - though (perhaps conditionally)? Solve this together with the - conservative dependence folding optimization. - if (i >= fd->collapse) - return; */ - - deps = OMP_CLAUSE_DECL (c); - gsi_prev (&gsi2); - edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2)); - edge e2 = split_block_after_labels (e1->dest); - - gsi2 = gsi_after_labels (e1->dest); - *gsi = gsi_last_bb (e1->src); - for (i = 0; i < fd->ordered; i++) - { - tree itype = TREE_TYPE (fd->loops[i].v); - tree step = NULL_TREE; - tree orig_off = NULL_TREE; - if (POINTER_TYPE_P (itype)) - itype = sizetype; - if (i) - deps = TREE_CHAIN (deps); - off = TREE_PURPOSE (deps); - if (TREE_CODE (off) == TRUNC_DIV_EXPR) - { - step = TREE_OPERAND (off, 1); - off = TREE_OPERAND (off, 0); - gcc_assert (fd->loops[i].cond_code == LT_EXPR - && integer_onep (fd->loops[i].step) - && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))); - } - tree s = fold_convert_loc (loc, itype, step ? step : fd->loops[i].step); - if (step) - { - off = fold_convert_loc (loc, itype, off); - orig_off = off; - off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s); - } - - if (integer_zerop (off)) - t = boolean_true_node; - else - { - tree a; - tree co = fold_convert_loc (loc, itype, off); - if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))) - { - if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) - co = fold_build1_loc (loc, NEGATE_EXPR, itype, co); - a = fold_build2_loc (loc, POINTER_PLUS_EXPR, - TREE_TYPE (fd->loops[i].v), fd->loops[i].v, - co); - } - else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) - a = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v), - fd->loops[i].v, co); - else - a = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (fd->loops[i].v), - fd->loops[i].v, co); - if (step) - { - tree t1, t2; - if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) - t1 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a, - fd->loops[i].n1); - else - t1 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a, - fd->loops[i].n2); - if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) - t2 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a, - fd->loops[i].n2); - else - t2 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a, - fd->loops[i].n1); - t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, - step, build_int_cst (TREE_TYPE (step), 0)); - if (TREE_CODE (step) != INTEGER_CST) - { - t1 = unshare_expr (t1); - t1 = force_gimple_operand_gsi (gsi, t1, true, NULL_TREE, - false, GSI_CONTINUE_LINKING); - t2 = unshare_expr (t2); - t2 = force_gimple_operand_gsi (gsi, t2, true, NULL_TREE, - false, GSI_CONTINUE_LINKING); - } - t = fold_build3_loc (loc, COND_EXPR, boolean_type_node, - t, t2, t1); - } - else if (fd->loops[i].cond_code == LT_EXPR) - { - if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) - t = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a, - fd->loops[i].n1); - else - t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a, - fd->loops[i].n2); - } - else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) - t = fold_build2_loc (loc, GT_EXPR, boolean_type_node, a, - fd->loops[i].n2); - else - t = fold_build2_loc (loc, LE_EXPR, boolean_type_node, a, - fd->loops[i].n1); - } - if (cond) - cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, cond, t); - else - cond = t; - - off = fold_convert_loc (loc, itype, off); - - if (step - || (fd->loops[i].cond_code == LT_EXPR - ? !integer_onep (fd->loops[i].step) - : !integer_minus_onep (fd->loops[i].step))) - { - if (step == NULL_TREE - && TYPE_UNSIGNED (itype) - && fd->loops[i].cond_code == GT_EXPR) - t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off, - fold_build1_loc (loc, NEGATE_EXPR, itype, - s)); - else - t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, - orig_off ? orig_off : off, s); - t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t, - build_int_cst (itype, 0)); - if (integer_zerop (t) && !warned_step) - { - warning_at (loc, 0, "%<depend(sink)%> refers to iteration never " - "in the iteration space"); - warned_step = true; - } - cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, - cond, t); - } - - if (i <= fd->collapse - 1 && fd->collapse > 1) - t = fd->loop.v; - else if (counts[i]) - t = counts[i]; - else - { - t = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v), - fd->loops[i].v, fd->loops[i].n1); - t = fold_convert_loc (loc, fd->iter_type, t); - } - if (step) - /* We have divided off by step already earlier. */; - else if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR) - off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, - fold_build1_loc (loc, NEGATE_EXPR, itype, - s)); - else - off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s); - if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps)) - off = fold_build1_loc (loc, NEGATE_EXPR, itype, off); - off = fold_convert_loc (loc, fd->iter_type, off); - if (i <= fd->collapse - 1 && fd->collapse > 1) - { - if (i) - off = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, coff, - off); - if (i < fd->collapse - 1) - { - coff = fold_build2_loc (loc, MULT_EXPR, fd->iter_type, off, - counts[i]); - continue; - } - } - off = unshare_expr (off); - t = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, t, off); - t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE, - true, GSI_SAME_STMT); - args.safe_push (t); - } - gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix), args); - gimple_set_location (g, loc); - gsi_insert_before (&gsi2, g, GSI_SAME_STMT); - - cond = unshare_expr (cond); - cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, false, - GSI_CONTINUE_LINKING); - gsi_insert_after (gsi, gimple_build_cond_empty (cond), GSI_NEW_STMT); - edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE); - e3->probability = REG_BR_PROB_BASE / 8; - e1->probability = REG_BR_PROB_BASE - e3->probability; - e1->flags = EDGE_TRUE_VALUE; - set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src); - - *gsi = gsi_after_labels (e2->dest); -} - -/* Expand all #pragma omp ordered depend(source) and - #pragma omp ordered depend(sink:...) constructs in the current - #pragma omp for ordered(n) region. */ - -static void -expand_omp_ordered_source_sink (struct omp_region *region, - struct omp_for_data *fd, tree *counts, - basic_block cont_bb) -{ - struct omp_region *inner; - int i; - for (i = fd->collapse - 1; i < fd->ordered; i++) - if (i == fd->collapse - 1 && fd->collapse > 1) - counts[i] = NULL_TREE; - else if (i >= fd->collapse && !cont_bb) - counts[i] = build_zero_cst (fd->iter_type); - else if (!POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)) - && integer_onep (fd->loops[i].step)) - counts[i] = NULL_TREE; - else - counts[i] = create_tmp_var (fd->iter_type, ".orditer"); - tree atype - = build_array_type_nelts (fd->iter_type, fd->ordered - fd->collapse + 1); - counts[fd->ordered] = create_tmp_var (atype, ".orditera"); - TREE_ADDRESSABLE (counts[fd->ordered]) = 1; - - for (inner = region->inner; inner; inner = inner->next) - if (inner->type == GIMPLE_OMP_ORDERED) - { - gomp_ordered *ord_stmt = inner->ord_stmt; - gimple_stmt_iterator gsi = gsi_for_stmt (ord_stmt); - location_t loc = gimple_location (ord_stmt); - tree c; - for (c = gimple_omp_ordered_clauses (ord_stmt); - c; c = OMP_CLAUSE_CHAIN (c)) - if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE) - break; - if (c) - expand_omp_ordered_source (&gsi, fd, counts, loc); - for (c = gimple_omp_ordered_clauses (ord_stmt); - c; c = OMP_CLAUSE_CHAIN (c)) - if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK) - expand_omp_ordered_sink (&gsi, fd, counts, c, loc); - gsi_remove (&gsi, true); - } -} - -/* Wrap the body into fd->ordered - fd->collapse loops that aren't - collapsed. */ - -static basic_block -expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts, - basic_block cont_bb, basic_block body_bb, - bool ordered_lastprivate) -{ - if (fd->ordered == fd->collapse) - return cont_bb; - - if (!cont_bb) - { - gimple_stmt_iterator gsi = gsi_after_labels (body_bb); - for (int i = fd->collapse; i < fd->ordered; i++) - { - tree type = TREE_TYPE (fd->loops[i].v); - tree n1 = fold_convert (type, fd->loops[i].n1); - expand_omp_build_assign (&gsi, fd->loops[i].v, n1); - tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered], - size_int (i - fd->collapse + 1), - NULL_TREE, NULL_TREE); - expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type)); - } - return NULL; - } - - for (int i = fd->ordered - 1; i >= fd->collapse; i--) - { - tree t, type = TREE_TYPE (fd->loops[i].v); - gimple_stmt_iterator gsi = gsi_after_labels (body_bb); - expand_omp_build_assign (&gsi, fd->loops[i].v, - fold_convert (type, fd->loops[i].n1)); - if (counts[i]) - expand_omp_build_assign (&gsi, counts[i], - build_zero_cst (fd->iter_type)); - tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered], - size_int (i - fd->collapse + 1), - NULL_TREE, NULL_TREE); - expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type)); - if (!gsi_end_p (gsi)) - gsi_prev (&gsi); - else - gsi = gsi_last_bb (body_bb); - edge e1 = split_block (body_bb, gsi_stmt (gsi)); - basic_block new_body = e1->dest; - if (body_bb == cont_bb) - cont_bb = new_body; - edge e2 = NULL; - basic_block new_header; - if (EDGE_COUNT (cont_bb->preds) > 0) - { - gsi = gsi_last_bb (cont_bb); - if (POINTER_TYPE_P (type)) - t = fold_build_pointer_plus (fd->loops[i].v, - fold_convert (sizetype, - fd->loops[i].step)); - else - t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v, - fold_convert (type, fd->loops[i].step)); - expand_omp_build_assign (&gsi, fd->loops[i].v, t); - if (counts[i]) - { - t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[i], - build_int_cst (fd->iter_type, 1)); - expand_omp_build_assign (&gsi, counts[i], t); - t = counts[i]; - } - else - { - t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[i].v), - fd->loops[i].v, fd->loops[i].n1); - t = fold_convert (fd->iter_type, t); - t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, - true, GSI_SAME_STMT); - } - aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered], - size_int (i - fd->collapse + 1), - NULL_TREE, NULL_TREE); - expand_omp_build_assign (&gsi, aref, t); - gsi_prev (&gsi); - e2 = split_block (cont_bb, gsi_stmt (gsi)); - new_header = e2->dest; - } - else - new_header = cont_bb; - gsi = gsi_after_labels (new_header); - tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE, - true, GSI_SAME_STMT); - tree n2 - = force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2), - true, NULL_TREE, true, GSI_SAME_STMT); - t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2); - gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_NEW_STMT); - edge e3 = split_block (new_header, gsi_stmt (gsi)); - cont_bb = e3->dest; - remove_edge (e1); - make_edge (body_bb, new_header, EDGE_FALLTHRU); - e3->flags = EDGE_FALSE_VALUE; - e3->probability = REG_BR_PROB_BASE / 8; - e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE); - e1->probability = REG_BR_PROB_BASE - e3->probability; - - set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb); - set_immediate_dominator (CDI_DOMINATORS, new_body, new_header); - - if (e2) - { - struct loop *loop = alloc_loop (); - loop->header = new_header; - loop->latch = e2->src; - add_loop (loop, body_bb->loop_father); - } - } - - /* If there are any lastprivate clauses and it is possible some loops - might have zero iterations, ensure all the decls are initialized, - otherwise we could crash evaluating C++ class iterators with lastprivate - clauses. */ - bool need_inits = false; - for (int i = fd->collapse; ordered_lastprivate && i < fd->ordered; i++) - if (need_inits) - { - tree type = TREE_TYPE (fd->loops[i].v); - gimple_stmt_iterator gsi = gsi_after_labels (body_bb); - expand_omp_build_assign (&gsi, fd->loops[i].v, - fold_convert (type, fd->loops[i].n1)); - } - else - { - tree type = TREE_TYPE (fd->loops[i].v); - tree this_cond = fold_build2 (fd->loops[i].cond_code, - boolean_type_node, - fold_convert (type, fd->loops[i].n1), - fold_convert (type, fd->loops[i].n2)); - if (!integer_onep (this_cond)) - need_inits = true; - } - - return cont_bb; -} - - -/* A subroutine of expand_omp_for. Generate code for a parallel - loop with any schedule. Given parameters: - - for (V = N1; V cond N2; V += STEP) BODY; - - where COND is "<" or ">", we generate pseudocode - - more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0); - if (more) goto L0; else goto L3; - L0: - V = istart0; - iend = iend0; - L1: - BODY; - V += STEP; - if (V cond iend) goto L1; else goto L2; - L2: - if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3; - L3: - - If this is a combined omp parallel loop, instead of the call to - GOMP_loop_foo_start, we call GOMP_loop_foo_next. - If this is gimple_omp_for_combined_p loop, then instead of assigning - V and iend in L0 we assign the first two _looptemp_ clause decls of the - inner GIMPLE_OMP_FOR and V += STEP; and - if (V cond iend) goto L1; else goto L2; are removed. - - For collapsed loops, given parameters: - collapse(3) - for (V1 = N11; V1 cond1 N12; V1 += STEP1) - for (V2 = N21; V2 cond2 N22; V2 += STEP2) - for (V3 = N31; V3 cond3 N32; V3 += STEP3) - BODY; - - we generate pseudocode - - if (__builtin_expect (N32 cond3 N31, 0)) goto Z0; - if (cond3 is <) - adj = STEP3 - 1; - else - adj = STEP3 + 1; - count3 = (adj + N32 - N31) / STEP3; - if (__builtin_expect (N22 cond2 N21, 0)) goto Z0; - if (cond2 is <) - adj = STEP2 - 1; - else - adj = STEP2 + 1; - count2 = (adj + N22 - N21) / STEP2; - if (__builtin_expect (N12 cond1 N11, 0)) goto Z0; - if (cond1 is <) - adj = STEP1 - 1; - else - adj = STEP1 + 1; - count1 = (adj + N12 - N11) / STEP1; - count = count1 * count2 * count3; - goto Z1; - Z0: - count = 0; - Z1: - more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0); - if (more) goto L0; else goto L3; - L0: - V = istart0; - T = V; - V3 = N31 + (T % count3) * STEP3; - T = T / count3; - V2 = N21 + (T % count2) * STEP2; - T = T / count2; - V1 = N11 + T * STEP1; - iend = iend0; - L1: - BODY; - V += 1; - if (V < iend) goto L10; else goto L2; - L10: - V3 += STEP3; - if (V3 cond3 N32) goto L1; else goto L11; - L11: - V3 = N31; - V2 += STEP2; - if (V2 cond2 N22) goto L1; else goto L12; - L12: - V2 = N21; - V1 += STEP1; - goto L1; - L2: - if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3; - L3: - - */ - -static void -expand_omp_for_generic (struct omp_region *region, - struct omp_for_data *fd, - enum built_in_function start_fn, - enum built_in_function next_fn, - gimple *inner_stmt) -{ - tree type, istart0, iend0, iend; - tree t, vmain, vback, bias = NULL_TREE; - basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb; - basic_block l2_bb = NULL, l3_bb = NULL; - gimple_stmt_iterator gsi; - gassign *assign_stmt; - bool in_combined_parallel = is_combined_parallel (region); - bool broken_loop = region->cont == NULL; - edge e, ne; - tree *counts = NULL; - int i; - bool ordered_lastprivate = false; - - gcc_assert (!broken_loop || !in_combined_parallel); - gcc_assert (fd->iter_type == long_integer_type_node - || !in_combined_parallel); - - entry_bb = region->entry; - cont_bb = region->cont; - collapse_bb = NULL; - gcc_assert (EDGE_COUNT (entry_bb->succs) == 2); - gcc_assert (broken_loop - || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest); - l0_bb = split_edge (FALLTHRU_EDGE (entry_bb)); - l1_bb = single_succ (l0_bb); - if (!broken_loop) - { - l2_bb = create_empty_bb (cont_bb); - gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb - || (single_succ_edge (BRANCH_EDGE (cont_bb)->dest)->dest - == l1_bb)); - gcc_assert (EDGE_COUNT (cont_bb->succs) == 2); - } - else - l2_bb = NULL; - l3_bb = BRANCH_EDGE (entry_bb)->dest; - exit_bb = region->exit; - - gsi = gsi_last_bb (entry_bb); - - gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); - if (fd->ordered - && find_omp_clause (gimple_omp_for_clauses (gsi_stmt (gsi)), - OMP_CLAUSE_LASTPRIVATE)) - ordered_lastprivate = false; - if (fd->collapse > 1 || fd->ordered) - { - int first_zero_iter1 = -1, first_zero_iter2 = -1; - basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL; - - counts = XALLOCAVEC (tree, fd->ordered ? fd->ordered + 1 : fd->collapse); - expand_omp_for_init_counts (fd, &gsi, entry_bb, counts, - zero_iter1_bb, first_zero_iter1, - zero_iter2_bb, first_zero_iter2, l2_dom_bb); - - if (zero_iter1_bb) - { - /* Some counts[i] vars might be uninitialized if - some loop has zero iterations. But the body shouldn't - be executed in that case, so just avoid uninit warnings. */ - for (i = first_zero_iter1; - i < (fd->ordered ? fd->ordered : fd->collapse); i++) - if (SSA_VAR_P (counts[i])) - TREE_NO_WARNING (counts[i]) = 1; - gsi_prev (&gsi); - e = split_block (entry_bb, gsi_stmt (gsi)); - entry_bb = e->dest; - make_edge (zero_iter1_bb, entry_bb, EDGE_FALLTHRU); - gsi = gsi_last_bb (entry_bb); - set_immediate_dominator (CDI_DOMINATORS, entry_bb, - get_immediate_dominator (CDI_DOMINATORS, - zero_iter1_bb)); - } - if (zero_iter2_bb) - { - /* Some counts[i] vars might be uninitialized if - some loop has zero iterations. But the body shouldn't - be executed in that case, so just avoid uninit warnings. */ - for (i = first_zero_iter2; i < fd->ordered; i++) - if (SSA_VAR_P (counts[i])) - TREE_NO_WARNING (counts[i]) = 1; - if (zero_iter1_bb) - make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU); - else - { - gsi_prev (&gsi); - e = split_block (entry_bb, gsi_stmt (gsi)); - entry_bb = e->dest; - make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU); - gsi = gsi_last_bb (entry_bb); - set_immediate_dominator (CDI_DOMINATORS, entry_bb, - get_immediate_dominator - (CDI_DOMINATORS, zero_iter2_bb)); - } - } - if (fd->collapse == 1) - { - counts[0] = fd->loop.n2; - fd->loop = fd->loops[0]; - } - } - - type = TREE_TYPE (fd->loop.v); - istart0 = create_tmp_var (fd->iter_type, ".istart0"); - iend0 = create_tmp_var (fd->iter_type, ".iend0"); - TREE_ADDRESSABLE (istart0) = 1; - TREE_ADDRESSABLE (iend0) = 1; - - /* See if we need to bias by LLONG_MIN. */ - if (fd->iter_type == long_long_unsigned_type_node - && TREE_CODE (type) == INTEGER_TYPE - && !TYPE_UNSIGNED (type) - && fd->ordered == 0) - { - tree n1, n2; - - if (fd->loop.cond_code == LT_EXPR) - { - n1 = fd->loop.n1; - n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step); - } - else - { - n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step); - n2 = fd->loop.n1; - } - if (TREE_CODE (n1) != INTEGER_CST - || TREE_CODE (n2) != INTEGER_CST - || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0))) - bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type)); - } - - gimple_stmt_iterator gsif = gsi; - gsi_prev (&gsif); - - tree arr = NULL_TREE; - if (in_combined_parallel) - { - gcc_assert (fd->ordered == 0); - /* In a combined parallel loop, emit a call to - GOMP_loop_foo_next. */ - t = build_call_expr (builtin_decl_explicit (next_fn), 2, - build_fold_addr_expr (istart0), - build_fold_addr_expr (iend0)); - } - else - { - tree t0, t1, t2, t3, t4; - /* If this is not a combined parallel loop, emit a call to - GOMP_loop_foo_start in ENTRY_BB. */ - t4 = build_fold_addr_expr (iend0); - t3 = build_fold_addr_expr (istart0); - if (fd->ordered) - { - t0 = build_int_cst (unsigned_type_node, - fd->ordered - fd->collapse + 1); - arr = create_tmp_var (build_array_type_nelts (fd->iter_type, - fd->ordered - - fd->collapse + 1), - ".omp_counts"); - DECL_NAMELESS (arr) = 1; - TREE_ADDRESSABLE (arr) = 1; - TREE_STATIC (arr) = 1; - vec<constructor_elt, va_gc> *v; - vec_alloc (v, fd->ordered - fd->collapse + 1); - int idx; - - for (idx = 0; idx < fd->ordered - fd->collapse + 1; idx++) - { - tree c; - if (idx == 0 && fd->collapse > 1) - c = fd->loop.n2; - else - c = counts[idx + fd->collapse - 1]; - tree purpose = size_int (idx); - CONSTRUCTOR_APPEND_ELT (v, purpose, c); - if (TREE_CODE (c) != INTEGER_CST) - TREE_STATIC (arr) = 0; - } - - DECL_INITIAL (arr) = build_constructor (TREE_TYPE (arr), v); - if (!TREE_STATIC (arr)) - force_gimple_operand_gsi (&gsi, build1 (DECL_EXPR, - void_type_node, arr), - true, NULL_TREE, true, GSI_SAME_STMT); - t1 = build_fold_addr_expr (arr); - t2 = NULL_TREE; - } - else - { - t2 = fold_convert (fd->iter_type, fd->loop.step); - t1 = fd->loop.n2; - t0 = fd->loop.n1; - if (gimple_omp_for_combined_into_p (fd->for_stmt)) - { - tree innerc - = find_omp_clause (gimple_omp_for_clauses (fd->for_stmt), - OMP_CLAUSE__LOOPTEMP_); - gcc_assert (innerc); - t0 = OMP_CLAUSE_DECL (innerc); - innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc), - OMP_CLAUSE__LOOPTEMP_); - gcc_assert (innerc); - t1 = OMP_CLAUSE_DECL (innerc); - } - if (POINTER_TYPE_P (TREE_TYPE (t0)) - && TYPE_PRECISION (TREE_TYPE (t0)) - != TYPE_PRECISION (fd->iter_type)) - { - /* Avoid casting pointers to integer of a different size. */ - tree itype = signed_type_for (type); - t1 = fold_convert (fd->iter_type, fold_convert (itype, t1)); - t0 = fold_convert (fd->iter_type, fold_convert (itype, t0)); - } - else - { - t1 = fold_convert (fd->iter_type, t1); - t0 = fold_convert (fd->iter_type, t0); - } - if (bias) - { - t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias); - t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias); - } - } - if (fd->iter_type == long_integer_type_node || fd->ordered) - { - if (fd->chunk_size) - { - t = fold_convert (fd->iter_type, fd->chunk_size); - t = omp_adjust_chunk_size (t, fd->simd_schedule); - if (fd->ordered) - t = build_call_expr (builtin_decl_explicit (start_fn), - 5, t0, t1, t, t3, t4); - else - t = build_call_expr (builtin_decl_explicit (start_fn), - 6, t0, t1, t2, t, t3, t4); - } - else if (fd->ordered) - t = build_call_expr (builtin_decl_explicit (start_fn), - 4, t0, t1, t3, t4); - else - t = build_call_expr (builtin_decl_explicit (start_fn), - 5, t0, t1, t2, t3, t4); - } - else - { - tree t5; - tree c_bool_type; - tree bfn_decl; - - /* The GOMP_loop_ull_*start functions have additional boolean - argument, true for < loops and false for > loops. - In Fortran, the C bool type can be different from - boolean_type_node. */ - bfn_decl = builtin_decl_explicit (start_fn); - c_bool_type = TREE_TYPE (TREE_TYPE (bfn_decl)); - t5 = build_int_cst (c_bool_type, - fd->loop.cond_code == LT_EXPR ? 1 : 0); - if (fd->chunk_size) - { - tree bfn_decl = builtin_decl_explicit (start_fn); - t = fold_convert (fd->iter_type, fd->chunk_size); - t = omp_adjust_chunk_size (t, fd->simd_schedule); - t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4); - } - else - t = build_call_expr (builtin_decl_explicit (start_fn), - 6, t5, t0, t1, t2, t3, t4); - } - } - if (TREE_TYPE (t) != boolean_type_node) - t = fold_build2 (NE_EXPR, boolean_type_node, - t, build_int_cst (TREE_TYPE (t), 0)); - t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, - true, GSI_SAME_STMT); - if (arr && !TREE_STATIC (arr)) - { - tree clobber = build_constructor (TREE_TYPE (arr), NULL); - TREE_THIS_VOLATILE (clobber) = 1; - gsi_insert_before (&gsi, gimple_build_assign (arr, clobber), - GSI_SAME_STMT); - } - gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT); - - /* Remove the GIMPLE_OMP_FOR statement. */ - gsi_remove (&gsi, true); - - if (gsi_end_p (gsif)) - gsif = gsi_after_labels (gsi_bb (gsif)); - gsi_next (&gsif); - - /* Iteration setup for sequential loop goes in L0_BB. */ - tree startvar = fd->loop.v; - tree endvar = NULL_TREE; - - if (gimple_omp_for_combined_p (fd->for_stmt)) - { - gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR - && gimple_omp_for_kind (inner_stmt) - == GF_OMP_FOR_KIND_SIMD); - tree innerc = find_omp_clause (gimple_omp_for_clauses (inner_stmt), - OMP_CLAUSE__LOOPTEMP_); - gcc_assert (innerc); - startvar = OMP_CLAUSE_DECL (innerc); - innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc), - OMP_CLAUSE__LOOPTEMP_); - gcc_assert (innerc); - endvar = OMP_CLAUSE_DECL (innerc); - } - - gsi = gsi_start_bb (l0_bb); - t = istart0; - if (fd->ordered && fd->collapse == 1) - t = fold_build2 (MULT_EXPR, fd->iter_type, t, - fold_convert (fd->iter_type, fd->loop.step)); - else if (bias) - t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias); - if (fd->ordered && fd->collapse == 1) - { - if (POINTER_TYPE_P (TREE_TYPE (startvar))) - t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar), - fd->loop.n1, fold_convert (sizetype, t)); - else - { - t = fold_convert (TREE_TYPE (startvar), t); - t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar), - fd->loop.n1, t); - } - } - else - { - if (POINTER_TYPE_P (TREE_TYPE (startvar))) - t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t); - t = fold_convert (TREE_TYPE (startvar), t); - } - t = force_gimple_operand_gsi (&gsi, t, - DECL_P (startvar) - && TREE_ADDRESSABLE (startvar), - NULL_TREE, false, GSI_CONTINUE_LINKING); - assign_stmt = gimple_build_assign (startvar, t); - gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); - - t = iend0; - if (fd->ordered && fd->collapse == 1) - t = fold_build2 (MULT_EXPR, fd->iter_type, t, - fold_convert (fd->iter_type, fd->loop.step)); - else if (bias) - t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias); - if (fd->ordered && fd->collapse == 1) - { - if (POINTER_TYPE_P (TREE_TYPE (startvar))) - t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar), - fd->loop.n1, fold_convert (sizetype, t)); - else - { - t = fold_convert (TREE_TYPE (startvar), t); - t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar), - fd->loop.n1, t); - } - } - else - { - if (POINTER_TYPE_P (TREE_TYPE (startvar))) - t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t); - t = fold_convert (TREE_TYPE (startvar), t); - } - iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, - false, GSI_CONTINUE_LINKING); - if (endvar) - { - assign_stmt = gimple_build_assign (endvar, iend); - gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); - if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (iend))) - assign_stmt = gimple_build_assign (fd->loop.v, iend); - else - assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend); - gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); - } - /* Handle linear clause adjustments. */ - tree itercnt = NULL_TREE; - if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR) - for (tree c = gimple_omp_for_clauses (fd->for_stmt); - c; c = OMP_CLAUSE_CHAIN (c)) - if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR - && !OMP_CLAUSE_LINEAR_NO_COPYIN (c)) - { - tree d = OMP_CLAUSE_DECL (c); - bool is_ref = is_reference (d); - tree t = d, a, dest; - if (is_ref) - t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t); - tree type = TREE_TYPE (t); - if (POINTER_TYPE_P (type)) - type = sizetype; - dest = unshare_expr (t); - tree v = create_tmp_var (TREE_TYPE (t), NULL); - expand_omp_build_assign (&gsif, v, t); - if (itercnt == NULL_TREE) - { - itercnt = startvar; - tree n1 = fd->loop.n1; - if (POINTER_TYPE_P (TREE_TYPE (itercnt))) - { - itercnt - = fold_convert (signed_type_for (TREE_TYPE (itercnt)), - itercnt); - n1 = fold_convert (TREE_TYPE (itercnt), n1); - } - itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt), - itercnt, n1); - itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt), - itercnt, fd->loop.step); - itercnt = force_gimple_operand_gsi (&gsi, itercnt, true, - NULL_TREE, false, - GSI_CONTINUE_LINKING); - } - a = fold_build2 (MULT_EXPR, type, - fold_convert (type, itercnt), - fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c))); - t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR - : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a); - t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, - false, GSI_CONTINUE_LINKING); - assign_stmt = gimple_build_assign (dest, t); - gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); - } - if (fd->collapse > 1) - expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar); - - if (fd->ordered) - { - /* Until now, counts array contained number of iterations or - variable containing it for ith loop. From now on, we need - those counts only for collapsed loops, and only for the 2nd - till the last collapsed one. Move those one element earlier, - we'll use counts[fd->collapse - 1] for the first source/sink - iteration counter and so on and counts[fd->ordered] - as the array holding the current counter values for - depend(source). */ - if (fd->collapse > 1) - memmove (counts, counts + 1, (fd->collapse - 1) * sizeof (counts[0])); - if (broken_loop) - { - int i; - for (i = fd->collapse; i < fd->ordered; i++) - { - tree type = TREE_TYPE (fd->loops[i].v); - tree this_cond - = fold_build2 (fd->loops[i].cond_code, boolean_type_node, - fold_convert (type, fd->loops[i].n1), - fold_convert (type, fd->loops[i].n2)); - if (!integer_onep (this_cond)) - break; - } - if (i < fd->ordered) - { - cont_bb - = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb); - add_bb_to_loop (cont_bb, l1_bb->loop_father); - gimple_stmt_iterator gsi = gsi_after_labels (cont_bb); - gimple *g = gimple_build_omp_continue (fd->loop.v, fd->loop.v); - gsi_insert_before (&gsi, g, GSI_SAME_STMT); - make_edge (cont_bb, l3_bb, EDGE_FALLTHRU); - make_edge (cont_bb, l1_bb, 0); - l2_bb = create_empty_bb (cont_bb); - broken_loop = false; - } - } - expand_omp_ordered_source_sink (region, fd, counts, cont_bb); - cont_bb = expand_omp_for_ordered_loops (fd, counts, cont_bb, l1_bb, - ordered_lastprivate); - if (counts[fd->collapse - 1]) - { - gcc_assert (fd->collapse == 1); - gsi = gsi_last_bb (l0_bb); - expand_omp_build_assign (&gsi, counts[fd->collapse - 1], - istart0, true); - gsi = gsi_last_bb (cont_bb); - t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[fd->collapse - 1], - build_int_cst (fd->iter_type, 1)); - expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t); - tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered], - size_zero_node, NULL_TREE, NULL_TREE); - expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]); - t = counts[fd->collapse - 1]; - } - else if (fd->collapse > 1) - t = fd->loop.v; - else - { - t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v), - fd->loops[0].v, fd->loops[0].n1); - t = fold_convert (fd->iter_type, t); - } - gsi = gsi_last_bb (l0_bb); - tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered], - size_zero_node, NULL_TREE, NULL_TREE); - t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, - false, GSI_CONTINUE_LINKING); - expand_omp_build_assign (&gsi, aref, t, true); - } - - if (!broken_loop) - { - /* Code to control the increment and predicate for the sequential - loop goes in the CONT_BB. */ - gsi = gsi_last_bb (cont_bb); - gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi)); - gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE); - vmain = gimple_omp_continue_control_use (cont_stmt); - vback = gimple_omp_continue_control_def (cont_stmt); - - if (!gimple_omp_for_combined_p (fd->for_stmt)) - { - if (POINTER_TYPE_P (type)) - t = fold_build_pointer_plus (vmain, fd->loop.step); - else - t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step); - t = force_gimple_operand_gsi (&gsi, t, - DECL_P (vback) - && TREE_ADDRESSABLE (vback), - NULL_TREE, true, GSI_SAME_STMT); - assign_stmt = gimple_build_assign (vback, t); - gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); - - if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE) - { - if (fd->collapse > 1) - t = fd->loop.v; - else - { - t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v), - fd->loops[0].v, fd->loops[0].n1); - t = fold_convert (fd->iter_type, t); - } - tree aref = build4 (ARRAY_REF, fd->iter_type, - counts[fd->ordered], size_zero_node, - NULL_TREE, NULL_TREE); - t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, - true, GSI_SAME_STMT); - expand_omp_build_assign (&gsi, aref, t); - } - - t = build2 (fd->loop.cond_code, boolean_type_node, - DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback, - iend); - gcond *cond_stmt = gimple_build_cond_empty (t); - gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT); - } - - /* Remove GIMPLE_OMP_CONTINUE. */ - gsi_remove (&gsi, true); - - if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt)) - collapse_bb = extract_omp_for_update_vars (fd, cont_bb, l1_bb); - - /* Emit code to get the next parallel iteration in L2_BB. */ - gsi = gsi_start_bb (l2_bb); - - t = build_call_expr (builtin_decl_explicit (next_fn), 2, - build_fold_addr_expr (istart0), - build_fold_addr_expr (iend0)); - t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, - false, GSI_CONTINUE_LINKING); - if (TREE_TYPE (t) != boolean_type_node) - t = fold_build2 (NE_EXPR, boolean_type_node, - t, build_int_cst (TREE_TYPE (t), 0)); - gcond *cond_stmt = gimple_build_cond_empty (t); - gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING); - } - - /* Add the loop cleanup function. */ - gsi = gsi_last_bb (exit_bb); - if (gimple_omp_return_nowait_p (gsi_stmt (gsi))) - t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT); - else if (gimple_omp_return_lhs (gsi_stmt (gsi))) - t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL); - else - t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END); - gcall *call_stmt = gimple_build_call (t, 0); - if (gimple_omp_return_lhs (gsi_stmt (gsi))) - gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi))); - gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT); - if (fd->ordered) - { - tree arr = counts[fd->ordered]; - tree clobber = build_constructor (TREE_TYPE (arr), NULL); - TREE_THIS_VOLATILE (clobber) = 1; - gsi_insert_after (&gsi, gimple_build_assign (arr, clobber), - GSI_SAME_STMT); - } - gsi_remove (&gsi, true); - - /* Connect the new blocks. */ - find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE; - find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE; - - if (!broken_loop) - { - gimple_seq phis; - - e = find_edge (cont_bb, l3_bb); - ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE); - - phis = phi_nodes (l3_bb); - for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi)) - { - gimple *phi = gsi_stmt (gsi); - SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne), - PHI_ARG_DEF_FROM_EDGE (phi, e)); - } - remove_edge (e); - - make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE); - e = find_edge (cont_bb, l1_bb); - if (e == NULL) - { - e = BRANCH_EDGE (cont_bb); - gcc_assert (single_succ (e->dest) == l1_bb); - } - if (gimple_omp_for_combined_p (fd->for_stmt)) - { - remove_edge (e); - e = NULL; - } - else if (fd->collapse > 1) - { - remove_edge (e); - e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE); - } - else - e->flags = EDGE_TRUE_VALUE; - if (e) - { - e->probability = REG_BR_PROB_BASE * 7 / 8; - find_edge (cont_bb, l2_bb)->probability = REG_BR_PROB_BASE / 8; - } - else - { - e = find_edge (cont_bb, l2_bb); - e->flags = EDGE_FALLTHRU; - } - make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE); - - if (gimple_in_ssa_p (cfun)) - { - /* Add phis to the outer loop that connect to the phis in the inner, - original loop, and move the loop entry value of the inner phi to - the loop entry value of the outer phi. */ - gphi_iterator psi; - for (psi = gsi_start_phis (l3_bb); !gsi_end_p (psi); gsi_next (&psi)) - { - source_location locus; - gphi *nphi; - gphi *exit_phi = psi.phi (); - - edge l2_to_l3 = find_edge (l2_bb, l3_bb); - tree exit_res = PHI_ARG_DEF_FROM_EDGE (exit_phi, l2_to_l3); - - basic_block latch = BRANCH_EDGE (cont_bb)->dest; - edge latch_to_l1 = find_edge (latch, l1_bb); - gphi *inner_phi - = find_phi_with_arg_on_edge (exit_res, latch_to_l1); - - tree t = gimple_phi_result (exit_phi); - tree new_res = copy_ssa_name (t, NULL); - nphi = create_phi_node (new_res, l0_bb); - - edge l0_to_l1 = find_edge (l0_bb, l1_bb); - t = PHI_ARG_DEF_FROM_EDGE (inner_phi, l0_to_l1); - locus = gimple_phi_arg_location_from_edge (inner_phi, l0_to_l1); - edge entry_to_l0 = find_edge (entry_bb, l0_bb); - add_phi_arg (nphi, t, entry_to_l0, locus); - - edge l2_to_l0 = find_edge (l2_bb, l0_bb); - add_phi_arg (nphi, exit_res, l2_to_l0, UNKNOWN_LOCATION); - - add_phi_arg (inner_phi, new_res, l0_to_l1, UNKNOWN_LOCATION); - }; - } - - set_immediate_dominator (CDI_DOMINATORS, l2_bb, - recompute_dominator (CDI_DOMINATORS, l2_bb)); - set_immediate_dominator (CDI_DOMINATORS, l3_bb, - recompute_dominator (CDI_DOMINATORS, l3_bb)); - set_immediate_dominator (CDI_DOMINATORS, l0_bb, - recompute_dominator (CDI_DOMINATORS, l0_bb)); - set_immediate_dominator (CDI_DOMINATORS, l1_bb, - recompute_dominator (CDI_DOMINATORS, l1_bb)); - - /* We enter expand_omp_for_generic with a loop. This original loop may - have its own loop struct, or it may be part of an outer loop struct - (which may be the fake loop). */ - struct loop *outer_loop = entry_bb->loop_father; - bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop; - - add_bb_to_loop (l2_bb, outer_loop); - - /* We've added a new loop around the original loop. Allocate the - corresponding loop struct. */ - struct loop *new_loop = alloc_loop (); - new_loop->header = l0_bb; - new_loop->latch = l2_bb; - add_loop (new_loop, outer_loop); - - /* Allocate a loop structure for the original loop unless we already - had one. */ - if (!orig_loop_has_loop_struct - && !gimple_omp_for_combined_p (fd->for_stmt)) - { - struct loop *orig_loop = alloc_loop (); - orig_loop->header = l1_bb; - /* The loop may have multiple latches. */ - add_loop (orig_loop, new_loop); - } - } -} - - -/* A subroutine of expand_omp_for. Generate code for a parallel - loop with static schedule and no specified chunk size. Given - parameters: - - for (V = N1; V cond N2; V += STEP) BODY; - - where COND is "<" or ">", we generate pseudocode - - if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2; - if (cond is <) - adj = STEP - 1; - else - adj = STEP + 1; - if ((__typeof (V)) -1 > 0 && cond is >) - n = -(adj + N2 - N1) / -STEP; - else - n = (adj + N2 - N1) / STEP; - q = n / nthreads; - tt = n % nthreads; - if (threadid < tt) goto L3; else goto L4; - L3: - tt = 0; - q = q + 1; - L4: - s0 = q * threadid + tt; - e0 = s0 + q; - V = s0 * STEP + N1; - if (s0 >= e0) goto L2; else goto L0; - L0: - e = e0 * STEP + N1; - L1: - BODY; - V += STEP; - if (V cond e) goto L1; - L2: -*/ - -static void -expand_omp_for_static_nochunk (struct omp_region *region, - struct omp_for_data *fd, - gimple *inner_stmt) -{ - tree n, q, s0, e0, e, t, tt, nthreads, threadid; - tree type, itype, vmain, vback; - basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb; - basic_block body_bb, cont_bb, collapse_bb = NULL; - basic_block fin_bb; - gimple_stmt_iterator gsi; - edge ep; - bool broken_loop = region->cont == NULL; - tree *counts = NULL; - tree n1, n2, step; - - itype = type = TREE_TYPE (fd->loop.v); - if (POINTER_TYPE_P (type)) - itype = signed_type_for (type); - - entry_bb = region->entry; - cont_bb = region->cont; - gcc_assert (EDGE_COUNT (entry_bb->succs) == 2); - fin_bb = BRANCH_EDGE (entry_bb)->dest; - gcc_assert (broken_loop - || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest)); - seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb)); - body_bb = single_succ (seq_start_bb); - if (!broken_loop) - { - gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb - || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb); - gcc_assert (EDGE_COUNT (cont_bb->succs) == 2); - } - exit_bb = region->exit; - - /* Iteration space partitioning goes in ENTRY_BB. */ - gsi = gsi_last_bb (entry_bb); - gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); - - if (fd->collapse > 1) - { - int first_zero_iter = -1, dummy = -1; - basic_block l2_dom_bb = NULL, dummy_bb = NULL; - - counts = XALLOCAVEC (tree, fd->collapse); - expand_omp_for_init_counts (fd, &gsi, entry_bb, counts, - fin_bb, first_zero_iter, - dummy_bb, dummy, l2_dom_bb); - t = NULL_TREE; - } - else if (gimple_omp_for_combined_into_p (fd->for_stmt)) - t = integer_one_node; - else - t = fold_binary (fd->loop.cond_code, boolean_type_node, - fold_convert (type, fd->loop.n1), - fold_convert (type, fd->loop.n2)); - if (fd->collapse == 1 - && TYPE_UNSIGNED (type) - && (t == NULL_TREE || !integer_onep (t))) - { - n1 = fold_convert (type, unshare_expr (fd->loop.n1)); - n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE, - true, GSI_SAME_STMT); - n2 = fold_convert (type, unshare_expr (fd->loop.n2)); - n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE, - true, GSI_SAME_STMT); - gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2, - NULL_TREE, NULL_TREE); - gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT); - if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), - expand_omp_regimplify_p, NULL, NULL) - || walk_tree (gimple_cond_rhs_ptr (cond_stmt), - expand_omp_regimplify_p, NULL, NULL)) - { - gsi = gsi_for_stmt (cond_stmt); - gimple_regimplify_operands (cond_stmt, &gsi); - } - ep = split_block (entry_bb, cond_stmt); - ep->flags = EDGE_TRUE_VALUE; - entry_bb = ep->dest; - ep->probability = REG_BR_PROB_BASE - (REG_BR_PROB_BASE / 2000 - 1); - ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE); - ep->probability = REG_BR_PROB_BASE / 2000 - 1; - if (gimple_in_ssa_p (cfun)) - { - int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx; - for (gphi_iterator gpi = gsi_start_phis (fin_bb); - !gsi_end_p (gpi); gsi_next (&gpi)) - { - gphi *phi = gpi.phi (); - add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx), - ep, UNKNOWN_LOCATION); - } - } - gsi = gsi_last_bb (entry_bb); - } - - switch (gimple_omp_for_kind (fd->for_stmt)) - { - case GF_OMP_FOR_KIND_FOR: - nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS); - threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM); - break; - case GF_OMP_FOR_KIND_DISTRIBUTE: - nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS); - threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM); - break; - default: - gcc_unreachable (); - } - nthreads = build_call_expr (nthreads, 0); - nthreads = fold_convert (itype, nthreads); - nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE, - true, GSI_SAME_STMT); - threadid = build_call_expr (threadid, 0); - threadid = fold_convert (itype, threadid); - threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE, - true, GSI_SAME_STMT); - - n1 = fd->loop.n1; - n2 = fd->loop.n2; - step = fd->loop.step; - if (gimple_omp_for_combined_into_p (fd->for_stmt)) - { - tree innerc = find_omp_clause (gimple_omp_for_clauses (fd->for_stmt), - OMP_CLAUSE__LOOPTEMP_); - gcc_assert (innerc); - n1 = OMP_CLAUSE_DECL (innerc); - innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc), - OMP_CLAUSE__LOOPTEMP_); - gcc_assert (innerc); - n2 = OMP_CLAUSE_DECL (innerc); - } - n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1), - true, NULL_TREE, true, GSI_SAME_STMT); - n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2), - true, NULL_TREE, true, GSI_SAME_STMT); - step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step), - true, NULL_TREE, true, GSI_SAME_STMT); - - t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1)); - t = fold_build2 (PLUS_EXPR, itype, step, t); - t = fold_build2 (PLUS_EXPR, itype, t, n2); - t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1)); - if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR) - t = fold_build2 (TRUNC_DIV_EXPR, itype, - fold_build1 (NEGATE_EXPR, itype, t), - fold_build1 (NEGATE_EXPR, itype, step)); - else - t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step); - t = fold_convert (itype, t); - n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT); - - q = create_tmp_reg (itype, "q"); - t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads); - t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT); - gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT); - - tt = create_tmp_reg (itype, "tt"); - t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads); - t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT); - gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT); - - t = build2 (LT_EXPR, boolean_type_node, threadid, tt); - gcond *cond_stmt = gimple_build_cond_empty (t); - gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT); - - second_bb = split_block (entry_bb, cond_stmt)->dest; - gsi = gsi_last_bb (second_bb); - gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); - - gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)), - GSI_SAME_STMT); - gassign *assign_stmt - = gimple_build_assign (q, PLUS_EXPR, q, build_int_cst (itype, 1)); - gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); - - third_bb = split_block (second_bb, assign_stmt)->dest; - gsi = gsi_last_bb (third_bb); - gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); - - t = build2 (MULT_EXPR, itype, q, threadid); - t = build2 (PLUS_EXPR, itype, t, tt); - s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT); - - t = fold_build2 (PLUS_EXPR, itype, s0, q); - e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT); - - t = build2 (GE_EXPR, boolean_type_node, s0, e0); - gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT); - - /* Remove the GIMPLE_OMP_FOR statement. */ - gsi_remove (&gsi, true); - - /* Setup code for sequential iteration goes in SEQ_START_BB. */ - gsi = gsi_start_bb (seq_start_bb); - - tree startvar = fd->loop.v; - tree endvar = NULL_TREE; - - if (gimple_omp_for_combined_p (fd->for_stmt)) - { - tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL - ? gimple_omp_parallel_clauses (inner_stmt) - : gimple_omp_for_clauses (inner_stmt); - tree innerc = find_omp_clause (clauses, OMP_CLAUSE__LOOPTEMP_); - gcc_assert (innerc); - startvar = OMP_CLAUSE_DECL (innerc); - innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc), - OMP_CLAUSE__LOOPTEMP_); - gcc_assert (innerc); - endvar = OMP_CLAUSE_DECL (innerc); - if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST - && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE) - { - int i; - for (i = 1; i < fd->collapse; i++) - { - innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc), - OMP_CLAUSE__LOOPTEMP_); - gcc_assert (innerc); - } - innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc), - OMP_CLAUSE__LOOPTEMP_); - if (innerc) - { - /* If needed (distribute parallel for with lastprivate), - propagate down the total number of iterations. */ - tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)), - fd->loop.n2); - t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false, - GSI_CONTINUE_LINKING); - assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t); - gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); - } - } - } - t = fold_convert (itype, s0); - t = fold_build2 (MULT_EXPR, itype, t, step); - if (POINTER_TYPE_P (type)) - t = fold_build_pointer_plus (n1, t); - else - t = fold_build2 (PLUS_EXPR, type, t, n1); - t = fold_convert (TREE_TYPE (startvar), t); - t = force_gimple_operand_gsi (&gsi, t, - DECL_P (startvar) - && TREE_ADDRESSABLE (startvar), - NULL_TREE, false, GSI_CONTINUE_LINKING); - assign_stmt = gimple_build_assign (startvar, t); - gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); - - t = fold_convert (itype, e0); - t = fold_build2 (MULT_EXPR, itype, t, step); - if (POINTER_TYPE_P (type)) - t = fold_build_pointer_plus (n1, t); - else - t = fold_build2 (PLUS_EXPR, type, t, n1); - t = fold_convert (TREE_TYPE (startvar), t); - e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, - false, GSI_CONTINUE_LINKING); - if (endvar) - { - assign_stmt = gimple_build_assign (endvar, e); - gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); - if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e))) - assign_stmt = gimple_build_assign (fd->loop.v, e); - else - assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e); - gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); - } - /* Handle linear clause adjustments. */ - tree itercnt = NULL_TREE; - if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR) - for (tree c = gimple_omp_for_clauses (fd->for_stmt); - c; c = OMP_CLAUSE_CHAIN (c)) - if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR - && !OMP_CLAUSE_LINEAR_NO_COPYIN (c)) - { - tree d = OMP_CLAUSE_DECL (c); - bool is_ref = is_reference (d); - tree t = d, a, dest; - if (is_ref) - t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t); - if (itercnt == NULL_TREE) - { - if (gimple_omp_for_combined_into_p (fd->for_stmt)) - { - itercnt = fold_build2 (MINUS_EXPR, itype, - fold_convert (itype, n1), - fold_convert (itype, fd->loop.n1)); - itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step); - itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0); - itercnt = force_gimple_operand_gsi (&gsi, itercnt, true, - NULL_TREE, false, - GSI_CONTINUE_LINKING); - } - else - itercnt = s0; - } - tree type = TREE_TYPE (t); - if (POINTER_TYPE_P (type)) - type = sizetype; - a = fold_build2 (MULT_EXPR, type, - fold_convert (type, itercnt), - fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c))); - dest = unshare_expr (t); - t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR - : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a); - t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, - false, GSI_CONTINUE_LINKING); - assign_stmt = gimple_build_assign (dest, t); - gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); - } - if (fd->collapse > 1) - expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar); - - if (!broken_loop) - { - /* The code controlling the sequential loop replaces the - GIMPLE_OMP_CONTINUE. */ - gsi = gsi_last_bb (cont_bb); - gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi)); - gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE); - vmain = gimple_omp_continue_control_use (cont_stmt); - vback = gimple_omp_continue_control_def (cont_stmt); - - if (!gimple_omp_for_combined_p (fd->for_stmt)) - { - if (POINTER_TYPE_P (type)) - t = fold_build_pointer_plus (vmain, step); - else - t = fold_build2 (PLUS_EXPR, type, vmain, step); - t = force_gimple_operand_gsi (&gsi, t, - DECL_P (vback) - && TREE_ADDRESSABLE (vback), - NULL_TREE, true, GSI_SAME_STMT); - assign_stmt = gimple_build_assign (vback, t); - gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); - - t = build2 (fd->loop.cond_code, boolean_type_node, - DECL_P (vback) && TREE_ADDRESSABLE (vback) - ? t : vback, e); - gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT); - } - - /* Remove the GIMPLE_OMP_CONTINUE statement. */ - gsi_remove (&gsi, true); - - if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt)) - collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb); - } - - /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */ - gsi = gsi_last_bb (exit_bb); - if (!gimple_omp_return_nowait_p (gsi_stmt (gsi))) - { - t = gimple_omp_return_lhs (gsi_stmt (gsi)); - gsi_insert_after (&gsi, build_omp_barrier (t), GSI_SAME_STMT); - } - gsi_remove (&gsi, true); - - /* Connect all the blocks. */ - ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE); - ep->probability = REG_BR_PROB_BASE / 4 * 3; - ep = find_edge (entry_bb, second_bb); - ep->flags = EDGE_TRUE_VALUE; - ep->probability = REG_BR_PROB_BASE / 4; - find_edge (third_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE; - find_edge (third_bb, fin_bb)->flags = EDGE_TRUE_VALUE; - - if (!broken_loop) - { - ep = find_edge (cont_bb, body_bb); - if (ep == NULL) - { - ep = BRANCH_EDGE (cont_bb); - gcc_assert (single_succ (ep->dest) == body_bb); - } - if (gimple_omp_for_combined_p (fd->for_stmt)) - { - remove_edge (ep); - ep = NULL; - } - else if (fd->collapse > 1) - { - remove_edge (ep); - ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE); - } - else - ep->flags = EDGE_TRUE_VALUE; - find_edge (cont_bb, fin_bb)->flags - = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU; - } - - set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb); - set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb); - set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, third_bb); - - set_immediate_dominator (CDI_DOMINATORS, body_bb, - recompute_dominator (CDI_DOMINATORS, body_bb)); - set_immediate_dominator (CDI_DOMINATORS, fin_bb, - recompute_dominator (CDI_DOMINATORS, fin_bb)); - - struct loop *loop = body_bb->loop_father; - if (loop != entry_bb->loop_father) - { - gcc_assert (broken_loop || loop->header == body_bb); - gcc_assert (broken_loop - || loop->latch == region->cont - || single_pred (loop->latch) == region->cont); - return; - } - - if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt)) - { - loop = alloc_loop (); - loop->header = body_bb; - if (collapse_bb == NULL) - loop->latch = cont_bb; - add_loop (loop, body_bb->loop_father); - } -} - -/* Return phi in E->DEST with ARG on edge E. */ - -static gphi * -find_phi_with_arg_on_edge (tree arg, edge e) -{ - basic_block bb = e->dest; - - for (gphi_iterator gpi = gsi_start_phis (bb); - !gsi_end_p (gpi); - gsi_next (&gpi)) - { - gphi *phi = gpi.phi (); - if (PHI_ARG_DEF_FROM_EDGE (phi, e) == arg) - return phi; - } - - return NULL; -} - -/* A subroutine of expand_omp_for. Generate code for a parallel - loop with static schedule and a specified chunk size. Given - parameters: - - for (V = N1; V cond N2; V += STEP) BODY; - - where COND is "<" or ">", we generate pseudocode - - if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2; - if (cond is <) - adj = STEP - 1; - else - adj = STEP + 1; - if ((__typeof (V)) -1 > 0 && cond is >) - n = -(adj + N2 - N1) / -STEP; - else - n = (adj + N2 - N1) / STEP; - trip = 0; - V = threadid * CHUNK * STEP + N1; -- this extra definition of V is - here so that V is defined - if the loop is not entered - L0: - s0 = (trip * nthreads + threadid) * CHUNK; - e0 = min(s0 + CHUNK, n); - if (s0 < n) goto L1; else goto L4; - L1: - V = s0 * STEP + N1; - e = e0 * STEP + N1; - L2: - BODY; - V += STEP; - if (V cond e) goto L2; else goto L3; - L3: - trip += 1; - goto L0; - L4: -*/ - -static void -expand_omp_for_static_chunk (struct omp_region *region, - struct omp_for_data *fd, gimple *inner_stmt) -{ - tree n, s0, e0, e, t; - tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid; - tree type, itype, vmain, vback, vextra; - basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb; - basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb; - gimple_stmt_iterator gsi; - edge se; - bool broken_loop = region->cont == NULL; - tree *counts = NULL; - tree n1, n2, step; - - itype = type = TREE_TYPE (fd->loop.v); - if (POINTER_TYPE_P (type)) - itype = signed_type_for (type); - - entry_bb = region->entry; - se = split_block (entry_bb, last_stmt (entry_bb)); - entry_bb = se->src; - iter_part_bb = se->dest; - cont_bb = region->cont; - gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2); - fin_bb = BRANCH_EDGE (iter_part_bb)->dest; - gcc_assert (broken_loop - || fin_bb == FALLTHRU_EDGE (cont_bb)->dest); - seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb)); - body_bb = single_succ (seq_start_bb); - if (!broken_loop) - { - gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb - || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb); - gcc_assert (EDGE_COUNT (cont_bb->succs) == 2); - trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb)); - } - exit_bb = region->exit; - - /* Trip and adjustment setup goes in ENTRY_BB. */ - gsi = gsi_last_bb (entry_bb); - gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); - - if (fd->collapse > 1) - { - int first_zero_iter = -1, dummy = -1; - basic_block l2_dom_bb = NULL, dummy_bb = NULL; - - counts = XALLOCAVEC (tree, fd->collapse); - expand_omp_for_init_counts (fd, &gsi, entry_bb, counts, - fin_bb, first_zero_iter, - dummy_bb, dummy, l2_dom_bb); - t = NULL_TREE; - } - else if (gimple_omp_for_combined_into_p (fd->for_stmt)) - t = integer_one_node; - else - t = fold_binary (fd->loop.cond_code, boolean_type_node, - fold_convert (type, fd->loop.n1), - fold_convert (type, fd->loop.n2)); - if (fd->collapse == 1 - && TYPE_UNSIGNED (type) - && (t == NULL_TREE || !integer_onep (t))) - { - n1 = fold_convert (type, unshare_expr (fd->loop.n1)); - n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE, - true, GSI_SAME_STMT); - n2 = fold_convert (type, unshare_expr (fd->loop.n2)); - n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE, - true, GSI_SAME_STMT); - gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2, - NULL_TREE, NULL_TREE); - gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT); - if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), - expand_omp_regimplify_p, NULL, NULL) - || walk_tree (gimple_cond_rhs_ptr (cond_stmt), - expand_omp_regimplify_p, NULL, NULL)) - { - gsi = gsi_for_stmt (cond_stmt); - gimple_regimplify_operands (cond_stmt, &gsi); - } - se = split_block (entry_bb, cond_stmt); - se->flags = EDGE_TRUE_VALUE; - entry_bb = se->dest; - se->probability = REG_BR_PROB_BASE - (REG_BR_PROB_BASE / 2000 - 1); - se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE); - se->probability = REG_BR_PROB_BASE / 2000 - 1; - if (gimple_in_ssa_p (cfun)) - { - int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx; - for (gphi_iterator gpi = gsi_start_phis (fin_bb); - !gsi_end_p (gpi); gsi_next (&gpi)) - { - gphi *phi = gpi.phi (); - add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx), - se, UNKNOWN_LOCATION); - } - } - gsi = gsi_last_bb (entry_bb); - } - - switch (gimple_omp_for_kind (fd->for_stmt)) - { - case GF_OMP_FOR_KIND_FOR: - nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS); - threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM); - break; - case GF_OMP_FOR_KIND_DISTRIBUTE: - nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS); - threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM); - break; - default: - gcc_unreachable (); - } - nthreads = build_call_expr (nthreads, 0); - nthreads = fold_convert (itype, nthreads); - nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE, - true, GSI_SAME_STMT); - threadid = build_call_expr (threadid, 0); - threadid = fold_convert (itype, threadid); - threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE, - true, GSI_SAME_STMT); - - n1 = fd->loop.n1; - n2 = fd->loop.n2; - step = fd->loop.step; - if (gimple_omp_for_combined_into_p (fd->for_stmt)) - { - tree innerc = find_omp_clause (gimple_omp_for_clauses (fd->for_stmt), - OMP_CLAUSE__LOOPTEMP_); - gcc_assert (innerc); - n1 = OMP_CLAUSE_DECL (innerc); - innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc), - OMP_CLAUSE__LOOPTEMP_); - gcc_assert (innerc); - n2 = OMP_CLAUSE_DECL (innerc); - } - n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1), - true, NULL_TREE, true, GSI_SAME_STMT); - n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2), - true, NULL_TREE, true, GSI_SAME_STMT); - step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step), - true, NULL_TREE, true, GSI_SAME_STMT); - tree chunk_size = fold_convert (itype, fd->chunk_size); - chunk_size = omp_adjust_chunk_size (chunk_size, fd->simd_schedule); - chunk_size - = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true, - GSI_SAME_STMT); - - t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1)); - t = fold_build2 (PLUS_EXPR, itype, step, t); - t = fold_build2 (PLUS_EXPR, itype, t, n2); - t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1)); - if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR) - t = fold_build2 (TRUNC_DIV_EXPR, itype, - fold_build1 (NEGATE_EXPR, itype, t), - fold_build1 (NEGATE_EXPR, itype, step)); - else - t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step); - t = fold_convert (itype, t); - n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, - true, GSI_SAME_STMT); - - trip_var = create_tmp_reg (itype, ".trip"); - if (gimple_in_ssa_p (cfun)) - { - trip_init = make_ssa_name (trip_var); - trip_main = make_ssa_name (trip_var); - trip_back = make_ssa_name (trip_var); - } - else - { - trip_init = trip_var; - trip_main = trip_var; - trip_back = trip_var; - } - - gassign *assign_stmt - = gimple_build_assign (trip_init, build_int_cst (itype, 0)); - gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); - - t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size); - t = fold_build2 (MULT_EXPR, itype, t, step); - if (POINTER_TYPE_P (type)) - t = fold_build_pointer_plus (n1, t); - else - t = fold_build2 (PLUS_EXPR, type, t, n1); - vextra = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, - true, GSI_SAME_STMT); - - /* Remove the GIMPLE_OMP_FOR. */ - gsi_remove (&gsi, true); - - gimple_stmt_iterator gsif = gsi; - - /* Iteration space partitioning goes in ITER_PART_BB. */ - gsi = gsi_last_bb (iter_part_bb); - - t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads); - t = fold_build2 (PLUS_EXPR, itype, t, threadid); - t = fold_build2 (MULT_EXPR, itype, t, chunk_size); - s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, - false, GSI_CONTINUE_LINKING); - - t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size); - t = fold_build2 (MIN_EXPR, itype, t, n); - e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, - false, GSI_CONTINUE_LINKING); - - t = build2 (LT_EXPR, boolean_type_node, s0, n); - gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_CONTINUE_LINKING); - - /* Setup code for sequential iteration goes in SEQ_START_BB. */ - gsi = gsi_start_bb (seq_start_bb); - - tree startvar = fd->loop.v; - tree endvar = NULL_TREE; - - if (gimple_omp_for_combined_p (fd->for_stmt)) - { - tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL - ? gimple_omp_parallel_clauses (inner_stmt) - : gimple_omp_for_clauses (inner_stmt); - tree innerc = find_omp_clause (clauses, OMP_CLAUSE__LOOPTEMP_); - gcc_assert (innerc); - startvar = OMP_CLAUSE_DECL (innerc); - innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc), - OMP_CLAUSE__LOOPTEMP_); - gcc_assert (innerc); - endvar = OMP_CLAUSE_DECL (innerc); - if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST - && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE) - { - int i; - for (i = 1; i < fd->collapse; i++) - { - innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc), - OMP_CLAUSE__LOOPTEMP_); - gcc_assert (innerc); - } - innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc), - OMP_CLAUSE__LOOPTEMP_); - if (innerc) - { - /* If needed (distribute parallel for with lastprivate), - propagate down the total number of iterations. */ - tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)), - fd->loop.n2); - t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false, - GSI_CONTINUE_LINKING); - assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t); - gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); - } - } - } - - t = fold_convert (itype, s0); - t = fold_build2 (MULT_EXPR, itype, t, step); - if (POINTER_TYPE_P (type)) - t = fold_build_pointer_plus (n1, t); - else - t = fold_build2 (PLUS_EXPR, type, t, n1); - t = fold_convert (TREE_TYPE (startvar), t); - t = force_gimple_operand_gsi (&gsi, t, - DECL_P (startvar) - && TREE_ADDRESSABLE (startvar), - NULL_TREE, false, GSI_CONTINUE_LINKING); - assign_stmt = gimple_build_assign (startvar, t); - gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); - - t = fold_convert (itype, e0); - t = fold_build2 (MULT_EXPR, itype, t, step); - if (POINTER_TYPE_P (type)) - t = fold_build_pointer_plus (n1, t); - else - t = fold_build2 (PLUS_EXPR, type, t, n1); - t = fold_convert (TREE_TYPE (startvar), t); - e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, - false, GSI_CONTINUE_LINKING); - if (endvar) - { - assign_stmt = gimple_build_assign (endvar, e); - gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); - if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e))) - assign_stmt = gimple_build_assign (fd->loop.v, e); - else - assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e); - gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); - } - /* Handle linear clause adjustments. */ - tree itercnt = NULL_TREE, itercntbias = NULL_TREE; - if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR) - for (tree c = gimple_omp_for_clauses (fd->for_stmt); - c; c = OMP_CLAUSE_CHAIN (c)) - if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR - && !OMP_CLAUSE_LINEAR_NO_COPYIN (c)) - { - tree d = OMP_CLAUSE_DECL (c); - bool is_ref = is_reference (d); - tree t = d, a, dest; - if (is_ref) - t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t); - tree type = TREE_TYPE (t); - if (POINTER_TYPE_P (type)) - type = sizetype; - dest = unshare_expr (t); - tree v = create_tmp_var (TREE_TYPE (t), NULL); - expand_omp_build_assign (&gsif, v, t); - if (itercnt == NULL_TREE) - { - if (gimple_omp_for_combined_into_p (fd->for_stmt)) - { - itercntbias - = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1), - fold_convert (itype, fd->loop.n1)); - itercntbias = fold_build2 (EXACT_DIV_EXPR, itype, - itercntbias, step); - itercntbias - = force_gimple_operand_gsi (&gsif, itercntbias, true, - NULL_TREE, true, - GSI_SAME_STMT); - itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0); - itercnt = force_gimple_operand_gsi (&gsi, itercnt, true, - NULL_TREE, false, - GSI_CONTINUE_LINKING); - } - else - itercnt = s0; - } - a = fold_build2 (MULT_EXPR, type, - fold_convert (type, itercnt), - fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c))); - t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR - : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a); - t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, - false, GSI_CONTINUE_LINKING); - assign_stmt = gimple_build_assign (dest, t); - gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); - } - if (fd->collapse > 1) - expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar); - - if (!broken_loop) - { - /* The code controlling the sequential loop goes in CONT_BB, - replacing the GIMPLE_OMP_CONTINUE. */ - gsi = gsi_last_bb (cont_bb); - gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi)); - vmain = gimple_omp_continue_control_use (cont_stmt); - vback = gimple_omp_continue_control_def (cont_stmt); - - if (!gimple_omp_for_combined_p (fd->for_stmt)) - { - if (POINTER_TYPE_P (type)) - t = fold_build_pointer_plus (vmain, step); - else - t = fold_build2 (PLUS_EXPR, type, vmain, step); - if (DECL_P (vback) && TREE_ADDRESSABLE (vback)) - t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, - true, GSI_SAME_STMT); - assign_stmt = gimple_build_assign (vback, t); - gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); - - if (tree_int_cst_equal (fd->chunk_size, integer_one_node)) - t = build2 (EQ_EXPR, boolean_type_node, - build_int_cst (itype, 0), - build_int_cst (itype, 1)); - else - t = build2 (fd->loop.cond_code, boolean_type_node, - DECL_P (vback) && TREE_ADDRESSABLE (vback) - ? t : vback, e); - gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT); - } - - /* Remove GIMPLE_OMP_CONTINUE. */ - gsi_remove (&gsi, true); - - if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt)) - collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb); - - /* Trip update code goes into TRIP_UPDATE_BB. */ - gsi = gsi_start_bb (trip_update_bb); - - t = build_int_cst (itype, 1); - t = build2 (PLUS_EXPR, itype, trip_main, t); - assign_stmt = gimple_build_assign (trip_back, t); - gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); - } - - /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */ - gsi = gsi_last_bb (exit_bb); - if (!gimple_omp_return_nowait_p (gsi_stmt (gsi))) - { - t = gimple_omp_return_lhs (gsi_stmt (gsi)); - gsi_insert_after (&gsi, build_omp_barrier (t), GSI_SAME_STMT); - } - gsi_remove (&gsi, true); - - /* Connect the new blocks. */ - find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE; - find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE; - - if (!broken_loop) - { - se = find_edge (cont_bb, body_bb); - if (se == NULL) - { - se = BRANCH_EDGE (cont_bb); - gcc_assert (single_succ (se->dest) == body_bb); - } - if (gimple_omp_for_combined_p (fd->for_stmt)) - { - remove_edge (se); - se = NULL; - } - else if (fd->collapse > 1) - { - remove_edge (se); - se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE); - } - else - se->flags = EDGE_TRUE_VALUE; - find_edge (cont_bb, trip_update_bb)->flags - = se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU; - - redirect_edge_and_branch (single_succ_edge (trip_update_bb), iter_part_bb); - } - - if (gimple_in_ssa_p (cfun)) - { - gphi_iterator psi; - gphi *phi; - edge re, ene; - edge_var_map *vm; - size_t i; - - gcc_assert (fd->collapse == 1 && !broken_loop); - - /* When we redirect the edge from trip_update_bb to iter_part_bb, we - remove arguments of the phi nodes in fin_bb. We need to create - appropriate phi nodes in iter_part_bb instead. */ - se = find_edge (iter_part_bb, fin_bb); - re = single_succ_edge (trip_update_bb); - vec<edge_var_map> *head = redirect_edge_var_map_vector (re); - ene = single_succ_edge (entry_bb); - - psi = gsi_start_phis (fin_bb); - for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm); - gsi_next (&psi), ++i) - { - gphi *nphi; - source_location locus; - - phi = psi.phi (); - t = gimple_phi_result (phi); - gcc_assert (t == redirect_edge_var_map_result (vm)); - - if (!single_pred_p (fin_bb)) - t = copy_ssa_name (t, phi); - - nphi = create_phi_node (t, iter_part_bb); - - t = PHI_ARG_DEF_FROM_EDGE (phi, se); - locus = gimple_phi_arg_location_from_edge (phi, se); - - /* A special case -- fd->loop.v is not yet computed in - iter_part_bb, we need to use vextra instead. */ - if (t == fd->loop.v) - t = vextra; - add_phi_arg (nphi, t, ene, locus); - locus = redirect_edge_var_map_location (vm); - tree back_arg = redirect_edge_var_map_def (vm); - add_phi_arg (nphi, back_arg, re, locus); - edge ce = find_edge (cont_bb, body_bb); - if (ce == NULL) - { - ce = BRANCH_EDGE (cont_bb); - gcc_assert (single_succ (ce->dest) == body_bb); - ce = single_succ_edge (ce->dest); - } - gphi *inner_loop_phi = find_phi_with_arg_on_edge (back_arg, ce); - gcc_assert (inner_loop_phi != NULL); - add_phi_arg (inner_loop_phi, gimple_phi_result (nphi), - find_edge (seq_start_bb, body_bb), locus); - - if (!single_pred_p (fin_bb)) - add_phi_arg (phi, gimple_phi_result (nphi), se, locus); - } - gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ())); - redirect_edge_var_map_clear (re); - if (single_pred_p (fin_bb)) - while (1) - { - psi = gsi_start_phis (fin_bb); - if (gsi_end_p (psi)) - break; - remove_phi_node (&psi, false); - } - - /* Make phi node for trip. */ - phi = create_phi_node (trip_main, iter_part_bb); - add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb), - UNKNOWN_LOCATION); - add_phi_arg (phi, trip_init, single_succ_edge (entry_bb), - UNKNOWN_LOCATION); - } - - if (!broken_loop) - set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb); - set_immediate_dominator (CDI_DOMINATORS, iter_part_bb, - recompute_dominator (CDI_DOMINATORS, iter_part_bb)); - set_immediate_dominator (CDI_DOMINATORS, fin_bb, - recompute_dominator (CDI_DOMINATORS, fin_bb)); - set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, - recompute_dominator (CDI_DOMINATORS, seq_start_bb)); - set_immediate_dominator (CDI_DOMINATORS, body_bb, - recompute_dominator (CDI_DOMINATORS, body_bb)); - - if (!broken_loop) - { - struct loop *loop = body_bb->loop_father; - struct loop *trip_loop = alloc_loop (); - trip_loop->header = iter_part_bb; - trip_loop->latch = trip_update_bb; - add_loop (trip_loop, iter_part_bb->loop_father); - - if (loop != entry_bb->loop_father) - { - gcc_assert (loop->header == body_bb); - gcc_assert (loop->latch == region->cont - || single_pred (loop->latch) == region->cont); - trip_loop->inner = loop; - return; - } - - if (!gimple_omp_for_combined_p (fd->for_stmt)) - { - loop = alloc_loop (); - loop->header = body_bb; - if (collapse_bb == NULL) - loop->latch = cont_bb; - add_loop (loop, trip_loop); - } - } -} - -/* A subroutine of expand_omp_for. Generate code for _Cilk_for loop. - Given parameters: - for (V = N1; V cond N2; V += STEP) BODY; - - where COND is "<" or ">" or "!=", we generate pseudocode - - for (ind_var = low; ind_var < high; ind_var++) - { - V = n1 + (ind_var * STEP) - - <BODY> - } - - In the above pseudocode, low and high are function parameters of the - child function. In the function below, we are inserting a temp. - variable that will be making a call to two OMP functions that will not be - found in the body of _Cilk_for (since OMP_FOR cannot be mixed - with _Cilk_for). These functions are replaced with low and high - by the function that handles taskreg. */ - - -static void -expand_cilk_for (struct omp_region *region, struct omp_for_data *fd) -{ - bool broken_loop = region->cont == NULL; - basic_block entry_bb = region->entry; - basic_block cont_bb = region->cont; - - gcc_assert (EDGE_COUNT (entry_bb->succs) == 2); - gcc_assert (broken_loop - || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest); - basic_block l0_bb = FALLTHRU_EDGE (entry_bb)->dest; - basic_block l1_bb, l2_bb; - - if (!broken_loop) - { - gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb); - gcc_assert (EDGE_COUNT (cont_bb->succs) == 2); - l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest; - l2_bb = BRANCH_EDGE (entry_bb)->dest; - } - else - { - BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL; - l1_bb = split_edge (BRANCH_EDGE (entry_bb)); - l2_bb = single_succ (l1_bb); - } - basic_block exit_bb = region->exit; - basic_block l2_dom_bb = NULL; - - gimple_stmt_iterator gsi = gsi_last_bb (entry_bb); - - /* Below statements until the "tree high_val = ..." are pseudo statements - used to pass information to be used by expand_omp_taskreg. - low_val and high_val will be replaced by the __low and __high - parameter from the child function. - - The call_exprs part is a place-holder, it is mainly used - to distinctly identify to the top-level part that this is - where we should put low and high (reasoning given in header - comment). */ - - tree child_fndecl - = gimple_omp_parallel_child_fn ( - as_a <gomp_parallel *> (last_stmt (region->outer->entry))); - tree t, low_val = NULL_TREE, high_val = NULL_TREE; - for (t = DECL_ARGUMENTS (child_fndecl); t; t = TREE_CHAIN (t)) - { - if (!strcmp (IDENTIFIER_POINTER (DECL_NAME (t)), "__high")) - high_val = t; - else if (!strcmp (IDENTIFIER_POINTER (DECL_NAME (t)), "__low")) - low_val = t; - } - gcc_assert (low_val && high_val); - - tree type = TREE_TYPE (low_val); - tree ind_var = create_tmp_reg (type, "__cilk_ind_var"); - gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); - - /* Not needed in SSA form right now. */ - gcc_assert (!gimple_in_ssa_p (cfun)); - if (l2_dom_bb == NULL) - l2_dom_bb = l1_bb; - - tree n1 = low_val; - tree n2 = high_val; - - gimple *stmt = gimple_build_assign (ind_var, n1); - - /* Replace the GIMPLE_OMP_FOR statement. */ - gsi_replace (&gsi, stmt, true); - - if (!broken_loop) - { - /* Code to control the increment goes in the CONT_BB. */ - gsi = gsi_last_bb (cont_bb); - stmt = gsi_stmt (gsi); - gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE); - stmt = gimple_build_assign (ind_var, PLUS_EXPR, ind_var, - build_one_cst (type)); - - /* Replace GIMPLE_OMP_CONTINUE. */ - gsi_replace (&gsi, stmt, true); - } - - /* Emit the condition in L1_BB. */ - gsi = gsi_after_labels (l1_bb); - t = fold_build2 (MULT_EXPR, TREE_TYPE (fd->loop.step), - fold_convert (TREE_TYPE (fd->loop.step), ind_var), - fd->loop.step); - if (POINTER_TYPE_P (TREE_TYPE (fd->loop.n1))) - t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (fd->loop.n1), - fd->loop.n1, fold_convert (sizetype, t)); - else - t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loop.n1), - fd->loop.n1, fold_convert (TREE_TYPE (fd->loop.n1), t)); - t = fold_convert (TREE_TYPE (fd->loop.v), t); - expand_omp_build_assign (&gsi, fd->loop.v, t); - - /* The condition is always '<' since the runtime will fill in the low - and high values. */ - stmt = gimple_build_cond (LT_EXPR, ind_var, n2, NULL_TREE, NULL_TREE); - gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); - - /* Remove GIMPLE_OMP_RETURN. */ - gsi = gsi_last_bb (exit_bb); - gsi_remove (&gsi, true); - - /* Connect the new blocks. */ - remove_edge (FALLTHRU_EDGE (entry_bb)); - - edge e, ne; - if (!broken_loop) - { - remove_edge (BRANCH_EDGE (entry_bb)); - make_edge (entry_bb, l1_bb, EDGE_FALLTHRU); - - e = BRANCH_EDGE (l1_bb); - ne = FALLTHRU_EDGE (l1_bb); - e->flags = EDGE_TRUE_VALUE; - } - else - { - single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU; - - ne = single_succ_edge (l1_bb); - e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE); - - } - ne->flags = EDGE_FALSE_VALUE; - e->probability = REG_BR_PROB_BASE * 7 / 8; - ne->probability = REG_BR_PROB_BASE / 8; - - set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb); - set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb); - set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb); - - if (!broken_loop) - { - struct loop *loop = alloc_loop (); - loop->header = l1_bb; - loop->latch = cont_bb; - add_loop (loop, l1_bb->loop_father); - loop->safelen = INT_MAX; - } - - /* Pick the correct library function based on the precision of the - induction variable type. */ - tree lib_fun = NULL_TREE; - if (TYPE_PRECISION (type) == 32) - lib_fun = cilk_for_32_fndecl; - else if (TYPE_PRECISION (type) == 64) - lib_fun = cilk_for_64_fndecl; - else - gcc_unreachable (); - - gcc_assert (fd->sched_kind == OMP_CLAUSE_SCHEDULE_CILKFOR); - - /* WS_ARGS contains the library function flavor to call: - __libcilkrts_cilk_for_64 or __libcilkrts_cilk_for_32), and the - user-defined grain value. If the user does not define one, then zero - is passed in by the parser. */ - vec_alloc (region->ws_args, 2); - region->ws_args->quick_push (lib_fun); - region->ws_args->quick_push (fd->chunk_size); -} - -/* A subroutine of expand_omp_for. Generate code for a simd non-worksharing - loop. Given parameters: - - for (V = N1; V cond N2; V += STEP) BODY; - - where COND is "<" or ">", we generate pseudocode - - V = N1; - goto L1; - L0: - BODY; - V += STEP; - L1: - if (V cond N2) goto L0; else goto L2; - L2: - - For collapsed loops, given parameters: - collapse(3) - for (V1 = N11; V1 cond1 N12; V1 += STEP1) - for (V2 = N21; V2 cond2 N22; V2 += STEP2) - for (V3 = N31; V3 cond3 N32; V3 += STEP3) - BODY; - - we generate pseudocode - - if (cond3 is <) - adj = STEP3 - 1; - else - adj = STEP3 + 1; - count3 = (adj + N32 - N31) / STEP3; - if (cond2 is <) - adj = STEP2 - 1; - else - adj = STEP2 + 1; - count2 = (adj + N22 - N21) / STEP2; - if (cond1 is <) - adj = STEP1 - 1; - else - adj = STEP1 + 1; - count1 = (adj + N12 - N11) / STEP1; - count = count1 * count2 * count3; - V = 0; - V1 = N11; - V2 = N21; - V3 = N31; - goto L1; - L0: - BODY; - V += 1; - V3 += STEP3; - V2 += (V3 cond3 N32) ? 0 : STEP2; - V3 = (V3 cond3 N32) ? V3 : N31; - V1 += (V2 cond2 N22) ? 0 : STEP1; - V2 = (V2 cond2 N22) ? V2 : N21; - L1: - if (V < count) goto L0; else goto L2; - L2: - - */ - -static void -expand_omp_simd (struct omp_region *region, struct omp_for_data *fd) -{ - tree type, t; - basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb; - gimple_stmt_iterator gsi; - gimple *stmt; - gcond *cond_stmt; - bool broken_loop = region->cont == NULL; - edge e, ne; - tree *counts = NULL; - int i; - int safelen_int = INT_MAX; - tree safelen = find_omp_clause (gimple_omp_for_clauses (fd->for_stmt), - OMP_CLAUSE_SAFELEN); - tree simduid = find_omp_clause (gimple_omp_for_clauses (fd->for_stmt), - OMP_CLAUSE__SIMDUID_); - tree n1, n2; - - if (safelen) - { - safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen); - if (TREE_CODE (safelen) != INTEGER_CST) - safelen_int = 0; - else if (tree_fits_uhwi_p (safelen) && tree_to_uhwi (safelen) < INT_MAX) - safelen_int = tree_to_uhwi (safelen); - if (safelen_int == 1) - safelen_int = 0; - } - type = TREE_TYPE (fd->loop.v); - entry_bb = region->entry; - cont_bb = region->cont; - gcc_assert (EDGE_COUNT (entry_bb->succs) == 2); - gcc_assert (broken_loop - || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest); - l0_bb = FALLTHRU_EDGE (entry_bb)->dest; - if (!broken_loop) - { - gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb); - gcc_assert (EDGE_COUNT (cont_bb->succs) == 2); - l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest; - l2_bb = BRANCH_EDGE (entry_bb)->dest; - } - else - { - BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL; - l1_bb = split_edge (BRANCH_EDGE (entry_bb)); - l2_bb = single_succ (l1_bb); - } - exit_bb = region->exit; - l2_dom_bb = NULL; - - gsi = gsi_last_bb (entry_bb); - - gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); - /* Not needed in SSA form right now. */ - gcc_assert (!gimple_in_ssa_p (cfun)); - if (fd->collapse > 1) - { - int first_zero_iter = -1, dummy = -1; - basic_block zero_iter_bb = l2_bb, dummy_bb = NULL; - - counts = XALLOCAVEC (tree, fd->collapse); - expand_omp_for_init_counts (fd, &gsi, entry_bb, counts, - zero_iter_bb, first_zero_iter, - dummy_bb, dummy, l2_dom_bb); - } - if (l2_dom_bb == NULL) - l2_dom_bb = l1_bb; - - n1 = fd->loop.n1; - n2 = fd->loop.n2; - if (gimple_omp_for_combined_into_p (fd->for_stmt)) - { - tree innerc = find_omp_clause (gimple_omp_for_clauses (fd->for_stmt), - OMP_CLAUSE__LOOPTEMP_); - gcc_assert (innerc); - n1 = OMP_CLAUSE_DECL (innerc); - innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc), - OMP_CLAUSE__LOOPTEMP_); - gcc_assert (innerc); - n2 = OMP_CLAUSE_DECL (innerc); - } - tree step = fd->loop.step; - - bool is_simt = (safelen_int > 1 - && find_omp_clause (gimple_omp_for_clauses (fd->for_stmt), - OMP_CLAUSE__SIMT_)); - tree simt_lane = NULL_TREE, simt_maxlane = NULL_TREE; - if (is_simt) - { - cfun->curr_properties &= ~PROP_gimple_lomp_dev; - simt_lane = create_tmp_var (unsigned_type_node); - gimple *g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0); - gimple_call_set_lhs (g, simt_lane); - gsi_insert_before (&gsi, g, GSI_SAME_STMT); - tree offset = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, - fold_convert (TREE_TYPE (step), simt_lane)); - n1 = fold_convert (type, n1); - if (POINTER_TYPE_P (type)) - n1 = fold_build_pointer_plus (n1, offset); - else - n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, offset)); - - /* Collapsed loops not handled for SIMT yet: limit to one lane only. */ - if (fd->collapse > 1) - simt_maxlane = build_one_cst (unsigned_type_node); - else if (safelen_int < omp_max_simt_vf ()) - simt_maxlane = build_int_cst (unsigned_type_node, safelen_int); - tree vf - = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF, - unsigned_type_node, 0); - if (simt_maxlane) - vf = fold_build2 (MIN_EXPR, unsigned_type_node, vf, simt_maxlane); - vf = fold_convert (TREE_TYPE (step), vf); - step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf); - } - - expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1)); - if (fd->collapse > 1) - { - if (gimple_omp_for_combined_into_p (fd->for_stmt)) - { - gsi_prev (&gsi); - expand_omp_for_init_vars (fd, &gsi, counts, NULL, n1); - gsi_next (&gsi); - } - else - for (i = 0; i < fd->collapse; i++) - { - tree itype = TREE_TYPE (fd->loops[i].v); - if (POINTER_TYPE_P (itype)) - itype = signed_type_for (itype); - t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1); - expand_omp_build_assign (&gsi, fd->loops[i].v, t); - } - } - - /* Remove the GIMPLE_OMP_FOR statement. */ - gsi_remove (&gsi, true); - - if (!broken_loop) - { - /* Code to control the increment goes in the CONT_BB. */ - gsi = gsi_last_bb (cont_bb); - stmt = gsi_stmt (gsi); - gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE); - - if (POINTER_TYPE_P (type)) - t = fold_build_pointer_plus (fd->loop.v, step); - else - t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step); - expand_omp_build_assign (&gsi, fd->loop.v, t); - - if (fd->collapse > 1) - { - i = fd->collapse - 1; - if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))) - { - t = fold_convert (sizetype, fd->loops[i].step); - t = fold_build_pointer_plus (fd->loops[i].v, t); - } - else - { - t = fold_convert (TREE_TYPE (fd->loops[i].v), - fd->loops[i].step); - t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v), - fd->loops[i].v, t); - } - expand_omp_build_assign (&gsi, fd->loops[i].v, t); - - for (i = fd->collapse - 1; i > 0; i--) - { - tree itype = TREE_TYPE (fd->loops[i].v); - tree itype2 = TREE_TYPE (fd->loops[i - 1].v); - if (POINTER_TYPE_P (itype2)) - itype2 = signed_type_for (itype2); - t = build3 (COND_EXPR, itype2, - build2 (fd->loops[i].cond_code, boolean_type_node, - fd->loops[i].v, - fold_convert (itype, fd->loops[i].n2)), - build_int_cst (itype2, 0), - fold_convert (itype2, fd->loops[i - 1].step)); - if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i - 1].v))) - t = fold_build_pointer_plus (fd->loops[i - 1].v, t); - else - t = fold_build2 (PLUS_EXPR, itype2, fd->loops[i - 1].v, t); - expand_omp_build_assign (&gsi, fd->loops[i - 1].v, t); - - t = build3 (COND_EXPR, itype, - build2 (fd->loops[i].cond_code, boolean_type_node, - fd->loops[i].v, - fold_convert (itype, fd->loops[i].n2)), - fd->loops[i].v, - fold_convert (itype, fd->loops[i].n1)); - expand_omp_build_assign (&gsi, fd->loops[i].v, t); - } - } - - /* Remove GIMPLE_OMP_CONTINUE. */ - gsi_remove (&gsi, true); - } - - /* Emit the condition in L1_BB. */ - gsi = gsi_start_bb (l1_bb); - - t = fold_convert (type, n2); - t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, - false, GSI_CONTINUE_LINKING); - tree v = fd->loop.v; - if (DECL_P (v) && TREE_ADDRESSABLE (v)) - v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE, - false, GSI_CONTINUE_LINKING); - t = build2 (fd->loop.cond_code, boolean_type_node, v, t); - cond_stmt = gimple_build_cond_empty (t); - gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING); - if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p, - NULL, NULL) - || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p, - NULL, NULL)) - { - gsi = gsi_for_stmt (cond_stmt); - gimple_regimplify_operands (cond_stmt, &gsi); - } - - /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop. */ - if (is_simt) - { - gsi = gsi_start_bb (l2_bb); - step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), fd->loop.step, step); - if (POINTER_TYPE_P (type)) - t = fold_build_pointer_plus (fd->loop.v, step); - else - t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step); - expand_omp_build_assign (&gsi, fd->loop.v, t); - } - - /* Remove GIMPLE_OMP_RETURN. */ - gsi = gsi_last_bb (exit_bb); - gsi_remove (&gsi, true); - - /* Connect the new blocks. */ - remove_edge (FALLTHRU_EDGE (entry_bb)); - - if (!broken_loop) - { - remove_edge (BRANCH_EDGE (entry_bb)); - make_edge (entry_bb, l1_bb, EDGE_FALLTHRU); - - e = BRANCH_EDGE (l1_bb); - ne = FALLTHRU_EDGE (l1_bb); - e->flags = EDGE_TRUE_VALUE; - } - else - { - single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU; - - ne = single_succ_edge (l1_bb); - e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE); - - } - ne->flags = EDGE_FALSE_VALUE; - e->probability = REG_BR_PROB_BASE * 7 / 8; - ne->probability = REG_BR_PROB_BASE / 8; - - set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb); - set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb); - - if (simt_maxlane) - { - cond_stmt = gimple_build_cond (LT_EXPR, simt_lane, simt_maxlane, - NULL_TREE, NULL_TREE); - gsi = gsi_last_bb (entry_bb); - gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT); - make_edge (entry_bb, l2_bb, EDGE_FALSE_VALUE); - FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE; - FALLTHRU_EDGE (entry_bb)->probability = REG_BR_PROB_BASE * 7 / 8; - BRANCH_EDGE (entry_bb)->probability = REG_BR_PROB_BASE / 8; - l2_dom_bb = entry_bb; - } - set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb); - - if (!broken_loop) - { - struct loop *loop = alloc_loop (); - loop->header = l1_bb; - loop->latch = cont_bb; - add_loop (loop, l1_bb->loop_father); - loop->safelen = safelen_int; - if (simduid) - { - loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid); - cfun->has_simduid_loops = true; - } - /* If not -fno-tree-loop-vectorize, hint that we want to vectorize - the loop. */ - if ((flag_tree_loop_vectorize - || (!global_options_set.x_flag_tree_loop_vectorize - && !global_options_set.x_flag_tree_vectorize)) - && flag_tree_loop_optimize - && loop->safelen > 1) - { - loop->force_vectorize = true; - cfun->has_force_vectorize_loops = true; - } - } - else if (simduid) - cfun->has_simduid_loops = true; -} - -/* Taskloop construct is represented after gimplification with - two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched - in between them. This routine expands the outer GIMPLE_OMP_FOR, - which should just compute all the needed loop temporaries - for GIMPLE_OMP_TASK. */ - -static void -expand_omp_taskloop_for_outer (struct omp_region *region, - struct omp_for_data *fd, - gimple *inner_stmt) -{ - tree type, bias = NULL_TREE; - basic_block entry_bb, cont_bb, exit_bb; - gimple_stmt_iterator gsi; - gassign *assign_stmt; - tree *counts = NULL; - int i; - - gcc_assert (inner_stmt); - gcc_assert (region->cont); - gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_TASK - && gimple_omp_task_taskloop_p (inner_stmt)); - type = TREE_TYPE (fd->loop.v); - - /* See if we need to bias by LLONG_MIN. */ - if (fd->iter_type == long_long_unsigned_type_node - && TREE_CODE (type) == INTEGER_TYPE - && !TYPE_UNSIGNED (type)) - { - tree n1, n2; - - if (fd->loop.cond_code == LT_EXPR) - { - n1 = fd->loop.n1; - n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step); - } - else - { - n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step); - n2 = fd->loop.n1; - } - if (TREE_CODE (n1) != INTEGER_CST - || TREE_CODE (n2) != INTEGER_CST - || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0))) - bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type)); - } - - entry_bb = region->entry; - cont_bb = region->cont; - gcc_assert (EDGE_COUNT (entry_bb->succs) == 2); - gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest); - exit_bb = region->exit; - - gsi = gsi_last_bb (entry_bb); - gimple *for_stmt = gsi_stmt (gsi); - gcc_assert (gimple_code (for_stmt) == GIMPLE_OMP_FOR); - if (fd->collapse > 1) - { - int first_zero_iter = -1, dummy = -1; - basic_block zero_iter_bb = NULL, dummy_bb = NULL, l2_dom_bb = NULL; - - counts = XALLOCAVEC (tree, fd->collapse); - expand_omp_for_init_counts (fd, &gsi, entry_bb, counts, - zero_iter_bb, first_zero_iter, - dummy_bb, dummy, l2_dom_bb); - - if (zero_iter_bb) - { - /* Some counts[i] vars might be uninitialized if - some loop has zero iterations. But the body shouldn't - be executed in that case, so just avoid uninit warnings. */ - for (i = first_zero_iter; i < fd->collapse; i++) - if (SSA_VAR_P (counts[i])) - TREE_NO_WARNING (counts[i]) = 1; - gsi_prev (&gsi); - edge e = split_block (entry_bb, gsi_stmt (gsi)); - entry_bb = e->dest; - make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU); - gsi = gsi_last_bb (entry_bb); - set_immediate_dominator (CDI_DOMINATORS, entry_bb, - get_immediate_dominator (CDI_DOMINATORS, - zero_iter_bb)); - } - } - - tree t0, t1; - t1 = fd->loop.n2; - t0 = fd->loop.n1; - if (POINTER_TYPE_P (TREE_TYPE (t0)) - && TYPE_PRECISION (TREE_TYPE (t0)) - != TYPE_PRECISION (fd->iter_type)) - { - /* Avoid casting pointers to integer of a different size. */ - tree itype = signed_type_for (type); - t1 = fold_convert (fd->iter_type, fold_convert (itype, t1)); - t0 = fold_convert (fd->iter_type, fold_convert (itype, t0)); - } - else - { - t1 = fold_convert (fd->iter_type, t1); - t0 = fold_convert (fd->iter_type, t0); - } - if (bias) - { - t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias); - t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias); - } - - tree innerc = find_omp_clause (gimple_omp_task_clauses (inner_stmt), - OMP_CLAUSE__LOOPTEMP_); - gcc_assert (innerc); - tree startvar = OMP_CLAUSE_DECL (innerc); - innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_); - gcc_assert (innerc); - tree endvar = OMP_CLAUSE_DECL (innerc); - if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST) - { - gcc_assert (innerc); - for (i = 1; i < fd->collapse; i++) - { - innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc), - OMP_CLAUSE__LOOPTEMP_); - gcc_assert (innerc); - } - innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc), - OMP_CLAUSE__LOOPTEMP_); - if (innerc) - { - /* If needed (inner taskloop has lastprivate clause), propagate - down the total number of iterations. */ - tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false, - NULL_TREE, false, - GSI_CONTINUE_LINKING); - assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t); - gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); - } - } - - t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false, - GSI_CONTINUE_LINKING); - assign_stmt = gimple_build_assign (startvar, t0); - gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); - - t1 = force_gimple_operand_gsi (&gsi, t1, false, NULL_TREE, false, - GSI_CONTINUE_LINKING); - assign_stmt = gimple_build_assign (endvar, t1); - gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); - if (fd->collapse > 1) - expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar); - - /* Remove the GIMPLE_OMP_FOR statement. */ - gsi = gsi_for_stmt (for_stmt); - gsi_remove (&gsi, true); - - gsi = gsi_last_bb (cont_bb); - gsi_remove (&gsi, true); - - gsi = gsi_last_bb (exit_bb); - gsi_remove (&gsi, true); - - FALLTHRU_EDGE (entry_bb)->probability = REG_BR_PROB_BASE; - remove_edge (BRANCH_EDGE (entry_bb)); - FALLTHRU_EDGE (cont_bb)->probability = REG_BR_PROB_BASE; - remove_edge (BRANCH_EDGE (cont_bb)); - set_immediate_dominator (CDI_DOMINATORS, exit_bb, cont_bb); - set_immediate_dominator (CDI_DOMINATORS, region->entry, - recompute_dominator (CDI_DOMINATORS, region->entry)); -} - -/* Taskloop construct is represented after gimplification with - two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched - in between them. This routine expands the inner GIMPLE_OMP_FOR. - GOMP_taskloop{,_ull} function arranges for each task to be given just - a single range of iterations. */ - -static void -expand_omp_taskloop_for_inner (struct omp_region *region, - struct omp_for_data *fd, - gimple *inner_stmt) -{ - tree e, t, type, itype, vmain, vback, bias = NULL_TREE; - basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL; - basic_block fin_bb; - gimple_stmt_iterator gsi; - edge ep; - bool broken_loop = region->cont == NULL; - tree *counts = NULL; - tree n1, n2, step; - - itype = type = TREE_TYPE (fd->loop.v); - if (POINTER_TYPE_P (type)) - itype = signed_type_for (type); - - /* See if we need to bias by LLONG_MIN. */ - if (fd->iter_type == long_long_unsigned_type_node - && TREE_CODE (type) == INTEGER_TYPE - && !TYPE_UNSIGNED (type)) - { - tree n1, n2; - - if (fd->loop.cond_code == LT_EXPR) - { - n1 = fd->loop.n1; - n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step); - } - else - { - n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step); - n2 = fd->loop.n1; - } - if (TREE_CODE (n1) != INTEGER_CST - || TREE_CODE (n2) != INTEGER_CST - || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0))) - bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type)); - } - - entry_bb = region->entry; - cont_bb = region->cont; - gcc_assert (EDGE_COUNT (entry_bb->succs) == 2); - fin_bb = BRANCH_EDGE (entry_bb)->dest; - gcc_assert (broken_loop - || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest)); - body_bb = FALLTHRU_EDGE (entry_bb)->dest; - if (!broken_loop) - { - gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb); - gcc_assert (EDGE_COUNT (cont_bb->succs) == 2); - } - exit_bb = region->exit; - - /* Iteration space partitioning goes in ENTRY_BB. */ - gsi = gsi_last_bb (entry_bb); - gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR); - - if (fd->collapse > 1) - { - int first_zero_iter = -1, dummy = -1; - basic_block l2_dom_bb = NULL, dummy_bb = NULL; - - counts = XALLOCAVEC (tree, fd->collapse); - expand_omp_for_init_counts (fd, &gsi, entry_bb, counts, - fin_bb, first_zero_iter, - dummy_bb, dummy, l2_dom_bb); - t = NULL_TREE; - } - else - t = integer_one_node; - - step = fd->loop.step; - tree innerc = find_omp_clause (gimple_omp_for_clauses (fd->for_stmt), - OMP_CLAUSE__LOOPTEMP_); - gcc_assert (innerc); - n1 = OMP_CLAUSE_DECL (innerc); - innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_); - gcc_assert (innerc); - n2 = OMP_CLAUSE_DECL (innerc); - if (bias) - { - n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias); - n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias); - } - n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1), - true, NULL_TREE, true, GSI_SAME_STMT); - n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2), - true, NULL_TREE, true, GSI_SAME_STMT); - step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step), - true, NULL_TREE, true, GSI_SAME_STMT); - - tree startvar = fd->loop.v; - tree endvar = NULL_TREE; - - if (gimple_omp_for_combined_p (fd->for_stmt)) - { - tree clauses = gimple_omp_for_clauses (inner_stmt); - tree innerc = find_omp_clause (clauses, OMP_CLAUSE__LOOPTEMP_); - gcc_assert (innerc); - startvar = OMP_CLAUSE_DECL (innerc); - innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc), - OMP_CLAUSE__LOOPTEMP_); - gcc_assert (innerc); - endvar = OMP_CLAUSE_DECL (innerc); - } - t = fold_convert (TREE_TYPE (startvar), n1); - t = force_gimple_operand_gsi (&gsi, t, - DECL_P (startvar) - && TREE_ADDRESSABLE (startvar), - NULL_TREE, false, GSI_CONTINUE_LINKING); - gimple *assign_stmt = gimple_build_assign (startvar, t); - gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); - - t = fold_convert (TREE_TYPE (startvar), n2); - e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, - false, GSI_CONTINUE_LINKING); - if (endvar) - { - assign_stmt = gimple_build_assign (endvar, e); - gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); - if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e))) - assign_stmt = gimple_build_assign (fd->loop.v, e); - else - assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e); - gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); - } - if (fd->collapse > 1) - expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar); - - if (!broken_loop) - { - /* The code controlling the sequential loop replaces the - GIMPLE_OMP_CONTINUE. */ - gsi = gsi_last_bb (cont_bb); - gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi)); - gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE); - vmain = gimple_omp_continue_control_use (cont_stmt); - vback = gimple_omp_continue_control_def (cont_stmt); - - if (!gimple_omp_for_combined_p (fd->for_stmt)) - { - if (POINTER_TYPE_P (type)) - t = fold_build_pointer_plus (vmain, step); - else - t = fold_build2 (PLUS_EXPR, type, vmain, step); - t = force_gimple_operand_gsi (&gsi, t, - DECL_P (vback) - && TREE_ADDRESSABLE (vback), - NULL_TREE, true, GSI_SAME_STMT); - assign_stmt = gimple_build_assign (vback, t); - gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); - - t = build2 (fd->loop.cond_code, boolean_type_node, - DECL_P (vback) && TREE_ADDRESSABLE (vback) - ? t : vback, e); - gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT); - } - - /* Remove the GIMPLE_OMP_CONTINUE statement. */ - gsi_remove (&gsi, true); - - if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt)) - collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb); - } - - /* Remove the GIMPLE_OMP_FOR statement. */ - gsi = gsi_for_stmt (fd->for_stmt); - gsi_remove (&gsi, true); - - /* Remove the GIMPLE_OMP_RETURN statement. */ - gsi = gsi_last_bb (exit_bb); - gsi_remove (&gsi, true); - - FALLTHRU_EDGE (entry_bb)->probability = REG_BR_PROB_BASE; - if (!broken_loop) - remove_edge (BRANCH_EDGE (entry_bb)); - else - { - remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb)); - region->outer->cont = NULL; - } - - /* Connect all the blocks. */ - if (!broken_loop) - { - ep = find_edge (cont_bb, body_bb); - if (gimple_omp_for_combined_p (fd->for_stmt)) - { - remove_edge (ep); - ep = NULL; - } - else if (fd->collapse > 1) - { - remove_edge (ep); - ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE); - } - else - ep->flags = EDGE_TRUE_VALUE; - find_edge (cont_bb, fin_bb)->flags - = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU; - } - - set_immediate_dominator (CDI_DOMINATORS, body_bb, - recompute_dominator (CDI_DOMINATORS, body_bb)); - if (!broken_loop) - set_immediate_dominator (CDI_DOMINATORS, fin_bb, - recompute_dominator (CDI_DOMINATORS, fin_bb)); - - if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt)) - { - struct loop *loop = alloc_loop (); - loop->header = body_bb; - if (collapse_bb == NULL) - loop->latch = cont_bb; - add_loop (loop, body_bb->loop_father); - } -} - -/* A subroutine of expand_omp_for. Generate code for an OpenACC - partitioned loop. The lowering here is abstracted, in that the - loop parameters are passed through internal functions, which are - further lowered by oacc_device_lower, once we get to the target - compiler. The loop is of the form: - - for (V = B; V LTGT E; V += S) {BODY} - - where LTGT is < or >. We may have a specified chunking size, CHUNKING - (constant 0 for no chunking) and we will have a GWV partitioning - mask, specifying dimensions over which the loop is to be - partitioned (see note below). We generate code that looks like: - - <entry_bb> [incoming FALL->body, BRANCH->exit] - typedef signedintify (typeof (V)) T; // underlying signed integral type - T range = E - B; - T chunk_no = 0; - T DIR = LTGT == '<' ? +1 : -1; - T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV); - T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV); - - <head_bb> [created by splitting end of entry_bb] - T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no); - T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset); - if (!(offset LTGT bound)) goto bottom_bb; - - <body_bb> [incoming] - V = B + offset; - {BODY} - - <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb] - offset += step; - if (offset LTGT bound) goto body_bb; [*] - - <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb - chunk_no++; - if (chunk < chunk_max) goto head_bb; - - <exit_bb> [incoming] - V = B + ((range -/+ 1) / S +/- 1) * S [*] - - [*] Needed if V live at end of loop - - Note: CHUNKING & GWV mask are specified explicitly here. This is a - transition, and will be specified by a more general mechanism shortly. - */ - -static void -expand_oacc_for (struct omp_region *region, struct omp_for_data *fd) -{ - tree v = fd->loop.v; - enum tree_code cond_code = fd->loop.cond_code; - enum tree_code plus_code = PLUS_EXPR; - - tree chunk_size = integer_minus_one_node; - tree gwv = integer_zero_node; - tree iter_type = TREE_TYPE (v); - tree diff_type = iter_type; - tree plus_type = iter_type; - struct oacc_collapse *counts = NULL; - - gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt) - == GF_OMP_FOR_KIND_OACC_LOOP); - gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt)); - gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR); - - if (POINTER_TYPE_P (iter_type)) - { - plus_code = POINTER_PLUS_EXPR; - plus_type = sizetype; - } - if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type)) - diff_type = signed_type_for (diff_type); - - basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */ - basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */ - basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE */ - basic_block bottom_bb = NULL; - - /* entry_bb has two sucessors; the branch edge is to the exit - block, fallthrough edge to body. */ - gcc_assert (EDGE_COUNT (entry_bb->succs) == 2 - && BRANCH_EDGE (entry_bb)->dest == exit_bb); - - /* If cont_bb non-NULL, it has 2 successors. The branch successor is - body_bb, or to a block whose only successor is the body_bb. Its - fallthrough successor is the final block (same as the branch - successor of the entry_bb). */ - if (cont_bb) - { - basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest; - basic_block bed = BRANCH_EDGE (cont_bb)->dest; - - gcc_assert (FALLTHRU_EDGE (cont_bb)->dest == exit_bb); - gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb); - } - else - gcc_assert (!gimple_in_ssa_p (cfun)); - - /* The exit block only has entry_bb and cont_bb as predecessors. */ - gcc_assert (EDGE_COUNT (exit_bb->preds) == 1 + (cont_bb != NULL)); - - tree chunk_no; - tree chunk_max = NULL_TREE; - tree bound, offset; - tree step = create_tmp_var (diff_type, ".step"); - bool up = cond_code == LT_EXPR; - tree dir = build_int_cst (diff_type, up ? +1 : -1); - bool chunking = !gimple_in_ssa_p (cfun);; - bool negating; - - /* SSA instances. */ - tree offset_incr = NULL_TREE; - tree offset_init = NULL_TREE; - - gimple_stmt_iterator gsi; - gassign *ass; - gcall *call; - gimple *stmt; - tree expr; - location_t loc; - edge split, be, fte; - - /* Split the end of entry_bb to create head_bb. */ - split = split_block (entry_bb, last_stmt (entry_bb)); - basic_block head_bb = split->dest; - entry_bb = split->src; - - /* Chunk setup goes at end of entry_bb, replacing the omp_for. */ - gsi = gsi_last_bb (entry_bb); - gomp_for *for_stmt = as_a <gomp_for *> (gsi_stmt (gsi)); - loc = gimple_location (for_stmt); - - if (gimple_in_ssa_p (cfun)) - { - offset_init = gimple_omp_for_index (for_stmt, 0); - gcc_assert (integer_zerop (fd->loop.n1)); - /* The SSA parallelizer does gang parallelism. */ - gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG)); - } - - if (fd->collapse > 1) - { - counts = XALLOCAVEC (struct oacc_collapse, fd->collapse); - tree total = expand_oacc_collapse_init (fd, &gsi, counts, - TREE_TYPE (fd->loop.n2)); - - if (SSA_VAR_P (fd->loop.n2)) - { - total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE, - true, GSI_SAME_STMT); - ass = gimple_build_assign (fd->loop.n2, total); - gsi_insert_before (&gsi, ass, GSI_SAME_STMT); - } - - } - - tree b = fd->loop.n1; - tree e = fd->loop.n2; - tree s = fd->loop.step; - - b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT); - e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT); - - /* Convert the step, avoiding possible unsigned->signed overflow. */ - negating = !up && TYPE_UNSIGNED (TREE_TYPE (s)); - if (negating) - s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s); - s = fold_convert (diff_type, s); - if (negating) - s = fold_build1 (NEGATE_EXPR, diff_type, s); - s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT); - - if (!chunking) - chunk_size = integer_zero_node; - expr = fold_convert (diff_type, chunk_size); - chunk_size = force_gimple_operand_gsi (&gsi, expr, true, - NULL_TREE, true, GSI_SAME_STMT); - /* Determine the range, avoiding possible unsigned->signed overflow. */ - negating = !up && TYPE_UNSIGNED (iter_type); - expr = fold_build2 (MINUS_EXPR, plus_type, - fold_convert (plus_type, negating ? b : e), - fold_convert (plus_type, negating ? e : b)); - expr = fold_convert (diff_type, expr); - if (negating) - expr = fold_build1 (NEGATE_EXPR, diff_type, expr); - tree range = force_gimple_operand_gsi (&gsi, expr, true, - NULL_TREE, true, GSI_SAME_STMT); - - chunk_no = build_int_cst (diff_type, 0); - if (chunking) - { - gcc_assert (!gimple_in_ssa_p (cfun)); - - expr = chunk_no; - chunk_max = create_tmp_var (diff_type, ".chunk_max"); - chunk_no = create_tmp_var (diff_type, ".chunk_no"); - - ass = gimple_build_assign (chunk_no, expr); - gsi_insert_before (&gsi, ass, GSI_SAME_STMT); - - call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, - build_int_cst (integer_type_node, - IFN_GOACC_LOOP_CHUNKS), - dir, range, s, chunk_size, gwv); - gimple_call_set_lhs (call, chunk_max); - gimple_set_location (call, loc); - gsi_insert_before (&gsi, call, GSI_SAME_STMT); - } - else - chunk_size = chunk_no; - - call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, - build_int_cst (integer_type_node, - IFN_GOACC_LOOP_STEP), - dir, range, s, chunk_size, gwv); - gimple_call_set_lhs (call, step); - gimple_set_location (call, loc); - gsi_insert_before (&gsi, call, GSI_SAME_STMT); - - /* Remove the GIMPLE_OMP_FOR. */ - gsi_remove (&gsi, true); - - /* Fixup edges from head_bb */ - be = BRANCH_EDGE (head_bb); - fte = FALLTHRU_EDGE (head_bb); - be->flags |= EDGE_FALSE_VALUE; - fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE; - - basic_block body_bb = fte->dest; - - if (gimple_in_ssa_p (cfun)) - { - gsi = gsi_last_bb (cont_bb); - gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi)); - - offset = gimple_omp_continue_control_use (cont_stmt); - offset_incr = gimple_omp_continue_control_def (cont_stmt); - } - else - { - offset = create_tmp_var (diff_type, ".offset"); - offset_init = offset_incr = offset; - } - bound = create_tmp_var (TREE_TYPE (offset), ".bound"); - - /* Loop offset & bound go into head_bb. */ - gsi = gsi_start_bb (head_bb); - - call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, - build_int_cst (integer_type_node, - IFN_GOACC_LOOP_OFFSET), - dir, range, s, - chunk_size, gwv, chunk_no); - gimple_call_set_lhs (call, offset_init); - gimple_set_location (call, loc); - gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING); - - call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, - build_int_cst (integer_type_node, - IFN_GOACC_LOOP_BOUND), - dir, range, s, - chunk_size, gwv, offset_init); - gimple_call_set_lhs (call, bound); - gimple_set_location (call, loc); - gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING); - - expr = build2 (cond_code, boolean_type_node, offset_init, bound); - gsi_insert_after (&gsi, gimple_build_cond_empty (expr), - GSI_CONTINUE_LINKING); - - /* V assignment goes into body_bb. */ - if (!gimple_in_ssa_p (cfun)) - { - gsi = gsi_start_bb (body_bb); - - expr = build2 (plus_code, iter_type, b, - fold_convert (plus_type, offset)); - expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE, - true, GSI_SAME_STMT); - ass = gimple_build_assign (v, expr); - gsi_insert_before (&gsi, ass, GSI_SAME_STMT); - if (fd->collapse > 1) - expand_oacc_collapse_vars (fd, &gsi, counts, v); - } - - /* Loop increment goes into cont_bb. If this is not a loop, we - will have spawned threads as if it was, and each one will - execute one iteration. The specification is not explicit about - whether such constructs are ill-formed or not, and they can - occur, especially when noreturn routines are involved. */ - if (cont_bb) - { - gsi = gsi_last_bb (cont_bb); - gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi)); - loc = gimple_location (cont_stmt); - - /* Increment offset. */ - if (gimple_in_ssa_p (cfun)) - expr= build2 (plus_code, iter_type, offset, - fold_convert (plus_type, step)); - else - expr = build2 (PLUS_EXPR, diff_type, offset, step); - expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE, - true, GSI_SAME_STMT); - ass = gimple_build_assign (offset_incr, expr); - gsi_insert_before (&gsi, ass, GSI_SAME_STMT); - expr = build2 (cond_code, boolean_type_node, offset_incr, bound); - gsi_insert_before (&gsi, gimple_build_cond_empty (expr), GSI_SAME_STMT); - - /* Remove the GIMPLE_OMP_CONTINUE. */ - gsi_remove (&gsi, true); - - /* Fixup edges from cont_bb */ - be = BRANCH_EDGE (cont_bb); - fte = FALLTHRU_EDGE (cont_bb); - be->flags |= EDGE_TRUE_VALUE; - fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE; - - if (chunking) - { - /* Split the beginning of exit_bb to make bottom_bb. We - need to insert a nop at the start, because splitting is - after a stmt, not before. */ - gsi = gsi_start_bb (exit_bb); - stmt = gimple_build_nop (); - gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); - split = split_block (exit_bb, stmt); - bottom_bb = split->src; - exit_bb = split->dest; - gsi = gsi_last_bb (bottom_bb); - - /* Chunk increment and test goes into bottom_bb. */ - expr = build2 (PLUS_EXPR, diff_type, chunk_no, - build_int_cst (diff_type, 1)); - ass = gimple_build_assign (chunk_no, expr); - gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING); - - /* Chunk test at end of bottom_bb. */ - expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max); - gsi_insert_after (&gsi, gimple_build_cond_empty (expr), - GSI_CONTINUE_LINKING); - - /* Fixup edges from bottom_bb. */ - split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE; - make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE); - } - } - - gsi = gsi_last_bb (exit_bb); - gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN); - loc = gimple_location (gsi_stmt (gsi)); - - if (!gimple_in_ssa_p (cfun)) - { - /* Insert the final value of V, in case it is live. This is the - value for the only thread that survives past the join. */ - expr = fold_build2 (MINUS_EXPR, diff_type, range, dir); - expr = fold_build2 (PLUS_EXPR, diff_type, expr, s); - expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s); - expr = fold_build2 (MULT_EXPR, diff_type, expr, s); - expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr)); - expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE, - true, GSI_SAME_STMT); - ass = gimple_build_assign (v, expr); - gsi_insert_before (&gsi, ass, GSI_SAME_STMT); - } - - /* Remove the OMP_RETURN. */ - gsi_remove (&gsi, true); - - if (cont_bb) - { - /* We now have one or two nested loops. Update the loop - structures. */ - struct loop *parent = entry_bb->loop_father; - struct loop *body = body_bb->loop_father; - - if (chunking) - { - struct loop *chunk_loop = alloc_loop (); - chunk_loop->header = head_bb; - chunk_loop->latch = bottom_bb; - add_loop (chunk_loop, parent); - parent = chunk_loop; - } - else if (parent != body) - { - gcc_assert (body->header == body_bb); - gcc_assert (body->latch == cont_bb - || single_pred (body->latch) == cont_bb); - parent = NULL; - } - - if (parent) - { - struct loop *body_loop = alloc_loop (); - body_loop->header = body_bb; - body_loop->latch = cont_bb; - add_loop (body_loop, parent); - } - } -} - -/* Expand the OMP loop defined by REGION. */ - -static void -expand_omp_for (struct omp_region *region, gimple *inner_stmt) -{ - struct omp_for_data fd; - struct omp_for_data_loop *loops; - - loops - = (struct omp_for_data_loop *) - alloca (gimple_omp_for_collapse (last_stmt (region->entry)) - * sizeof (struct omp_for_data_loop)); - extract_omp_for_data (as_a <gomp_for *> (last_stmt (region->entry)), - &fd, loops); - region->sched_kind = fd.sched_kind; - region->sched_modifiers = fd.sched_modifiers; - - gcc_assert (EDGE_COUNT (region->entry->succs) == 2); - BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL; - FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL; - if (region->cont) - { - gcc_assert (EDGE_COUNT (region->cont->succs) == 2); - BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL; - FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL; - } - else - /* If there isn't a continue then this is a degerate case where - the introduction of abnormal edges during lowering will prevent - original loops from being detected. Fix that up. */ - loops_state_set (LOOPS_NEED_FIXUP); - - if (gimple_omp_for_kind (fd.for_stmt) & GF_OMP_FOR_SIMD) - expand_omp_simd (region, &fd); - else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_CILKFOR) - expand_cilk_for (region, &fd); - else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP) - { - gcc_assert (!inner_stmt); - expand_oacc_for (region, &fd); - } - else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP) - { - if (gimple_omp_for_combined_into_p (fd.for_stmt)) - expand_omp_taskloop_for_inner (region, &fd, inner_stmt); - else - expand_omp_taskloop_for_outer (region, &fd, inner_stmt); - } - else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC - && !fd.have_ordered) - { - if (fd.chunk_size == NULL) - expand_omp_for_static_nochunk (region, &fd, inner_stmt); - else - expand_omp_for_static_chunk (region, &fd, inner_stmt); - } - else - { - int fn_index, start_ix, next_ix; - - gcc_assert (gimple_omp_for_kind (fd.for_stmt) - == GF_OMP_FOR_KIND_FOR); - if (fd.chunk_size == NULL - && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC) - fd.chunk_size = integer_zero_node; - gcc_assert (fd.sched_kind != OMP_CLAUSE_SCHEDULE_AUTO); - switch (fd.sched_kind) - { - case OMP_CLAUSE_SCHEDULE_RUNTIME: - fn_index = 3; - break; - case OMP_CLAUSE_SCHEDULE_DYNAMIC: - case OMP_CLAUSE_SCHEDULE_GUIDED: - if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) - && !fd.ordered - && !fd.have_ordered) - { - fn_index = 3 + fd.sched_kind; - break; - } - /* FALLTHRU */ - default: - fn_index = fd.sched_kind; - break; - } - if (!fd.ordered) - fn_index += fd.have_ordered * 6; - if (fd.ordered) - start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index; - else - start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index; - next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index; - if (fd.iter_type == long_long_unsigned_type_node) - { - start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START - - (int)BUILT_IN_GOMP_LOOP_STATIC_START); - next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT - - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT); - } - expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix, - (enum built_in_function) next_ix, inner_stmt); - } - - if (gimple_in_ssa_p (cfun)) - update_ssa (TODO_update_ssa_only_virtuals); -} - - -/* Expand code for an OpenMP sections directive. In pseudo code, we generate - - v = GOMP_sections_start (n); - L0: - switch (v) - { - case 0: - goto L2; - case 1: - section 1; - goto L1; - case 2: - ... - case n: - ... - default: - abort (); - } - L1: - v = GOMP_sections_next (); - goto L0; - L2: - reduction; - - If this is a combined parallel sections, replace the call to - GOMP_sections_start with call to GOMP_sections_next. */ - -static void -expand_omp_sections (struct omp_region *region) -{ - tree t, u, vin = NULL, vmain, vnext, l2; - unsigned len; - basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb; - gimple_stmt_iterator si, switch_si; - gomp_sections *sections_stmt; - gimple *stmt; - gomp_continue *cont; - edge_iterator ei; - edge e; - struct omp_region *inner; - unsigned i, casei; - bool exit_reachable = region->cont != NULL; - - gcc_assert (region->exit != NULL); - entry_bb = region->entry; - l0_bb = single_succ (entry_bb); - l1_bb = region->cont; - l2_bb = region->exit; - if (single_pred_p (l2_bb) && single_pred (l2_bb) == l0_bb) - l2 = gimple_block_label (l2_bb); - else - { - /* This can happen if there are reductions. */ - len = EDGE_COUNT (l0_bb->succs); - gcc_assert (len > 0); - e = EDGE_SUCC (l0_bb, len - 1); - si = gsi_last_bb (e->dest); - l2 = NULL_TREE; - if (gsi_end_p (si) - || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION) - l2 = gimple_block_label (e->dest); - else - FOR_EACH_EDGE (e, ei, l0_bb->succs) - { - si = gsi_last_bb (e->dest); - if (gsi_end_p (si) - || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION) - { - l2 = gimple_block_label (e->dest); - break; - } - } - } - if (exit_reachable) - default_bb = create_empty_bb (l1_bb->prev_bb); - else - default_bb = create_empty_bb (l0_bb); - - /* We will build a switch() with enough cases for all the - GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work - and a default case to abort if something goes wrong. */ - len = EDGE_COUNT (l0_bb->succs); - - /* Use vec::quick_push on label_vec throughout, since we know the size - in advance. */ - auto_vec<tree> label_vec (len); - - /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the - GIMPLE_OMP_SECTIONS statement. */ - si = gsi_last_bb (entry_bb); - sections_stmt = as_a <gomp_sections *> (gsi_stmt (si)); - gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS); - vin = gimple_omp_sections_control (sections_stmt); - if (!is_combined_parallel (region)) - { - /* If we are not inside a combined parallel+sections region, - call GOMP_sections_start. */ - t = build_int_cst (unsigned_type_node, len - 1); - u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START); - stmt = gimple_build_call (u, 1, t); - } - else - { - /* Otherwise, call GOMP_sections_next. */ - u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT); - stmt = gimple_build_call (u, 0); - } - gimple_call_set_lhs (stmt, vin); - gsi_insert_after (&si, stmt, GSI_SAME_STMT); - gsi_remove (&si, true); - - /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in - L0_BB. */ - switch_si = gsi_last_bb (l0_bb); - gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH); - if (exit_reachable) - { - cont = as_a <gomp_continue *> (last_stmt (l1_bb)); - gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE); - vmain = gimple_omp_continue_control_use (cont); - vnext = gimple_omp_continue_control_def (cont); - } - else - { - vmain = vin; - vnext = NULL_TREE; - } - - t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2); - label_vec.quick_push (t); - i = 1; - - /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR. */ - for (inner = region->inner, casei = 1; - inner; - inner = inner->next, i++, casei++) - { - basic_block s_entry_bb, s_exit_bb; - - /* Skip optional reduction region. */ - if (inner->type == GIMPLE_OMP_ATOMIC_LOAD) - { - --i; - --casei; - continue; - } - - s_entry_bb = inner->entry; - s_exit_bb = inner->exit; - - t = gimple_block_label (s_entry_bb); - u = build_int_cst (unsigned_type_node, casei); - u = build_case_label (u, NULL, t); - label_vec.quick_push (u); - - si = gsi_last_bb (s_entry_bb); - gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION); - gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si))); - gsi_remove (&si, true); - single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU; - - if (s_exit_bb == NULL) - continue; - - si = gsi_last_bb (s_exit_bb); - gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN); - gsi_remove (&si, true); - - single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU; - } - - /* Error handling code goes in DEFAULT_BB. */ - t = gimple_block_label (default_bb); - u = build_case_label (NULL, NULL, t); - make_edge (l0_bb, default_bb, 0); - add_bb_to_loop (default_bb, current_loops->tree_root); - - stmt = gimple_build_switch (vmain, u, label_vec); - gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT); - gsi_remove (&switch_si, true); - - si = gsi_start_bb (default_bb); - stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0); - gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING); - - if (exit_reachable) - { - tree bfn_decl; - - /* Code to get the next section goes in L1_BB. */ - si = gsi_last_bb (l1_bb); - gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE); - - bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT); - stmt = gimple_build_call (bfn_decl, 0); - gimple_call_set_lhs (stmt, vnext); - gsi_insert_after (&si, stmt, GSI_SAME_STMT); - gsi_remove (&si, true); - - single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU; - } - - /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB. */ - si = gsi_last_bb (l2_bb); - if (gimple_omp_return_nowait_p (gsi_stmt (si))) - t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT); - else if (gimple_omp_return_lhs (gsi_stmt (si))) - t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL); - else - t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END); - stmt = gimple_build_call (t, 0); - if (gimple_omp_return_lhs (gsi_stmt (si))) - gimple_call_set_lhs (stmt, gimple_omp_return_lhs (gsi_stmt (si))); - gsi_insert_after (&si, stmt, GSI_SAME_STMT); - gsi_remove (&si, true); - - set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb); -} - - -/* Expand code for an OpenMP single directive. We've already expanded - much of the code, here we simply place the GOMP_barrier call. */ - -static void -expand_omp_single (struct omp_region *region) -{ - basic_block entry_bb, exit_bb; - gimple_stmt_iterator si; - - entry_bb = region->entry; - exit_bb = region->exit; - - si = gsi_last_bb (entry_bb); - gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE); - gsi_remove (&si, true); - single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU; - - si = gsi_last_bb (exit_bb); - if (!gimple_omp_return_nowait_p (gsi_stmt (si))) - { - tree t = gimple_omp_return_lhs (gsi_stmt (si)); - gsi_insert_after (&si, build_omp_barrier (t), GSI_SAME_STMT); - } - gsi_remove (&si, true); - single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU; -} - - -/* Generic expansion for OpenMP synchronization directives: master, - ordered and critical. All we need to do here is remove the entry - and exit markers for REGION. */ - -static void -expand_omp_synch (struct omp_region *region) -{ - basic_block entry_bb, exit_bb; - gimple_stmt_iterator si; - - entry_bb = region->entry; - exit_bb = region->exit; - - si = gsi_last_bb (entry_bb); - gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE - || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER - || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP - || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED - || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL - || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS); - gsi_remove (&si, true); - single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU; - - if (exit_bb) - { - si = gsi_last_bb (exit_bb); - gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN); - gsi_remove (&si, true); - single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU; - } -} - -/* A subroutine of expand_omp_atomic. Attempt to implement the atomic - operation as a normal volatile load. */ - -static bool -expand_omp_atomic_load (basic_block load_bb, tree addr, - tree loaded_val, int index) -{ - enum built_in_function tmpbase; - gimple_stmt_iterator gsi; - basic_block store_bb; - location_t loc; - gimple *stmt; - tree decl, call, type, itype; - - gsi = gsi_last_bb (load_bb); - stmt = gsi_stmt (gsi); - gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD); - loc = gimple_location (stmt); - - /* ??? If the target does not implement atomic_load_optab[mode], and mode - is smaller than word size, then expand_atomic_load assumes that the load - is atomic. We could avoid the builtin entirely in this case. */ - - tmpbase = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1); - decl = builtin_decl_explicit (tmpbase); - if (decl == NULL_TREE) - return false; - - type = TREE_TYPE (loaded_val); - itype = TREE_TYPE (TREE_TYPE (decl)); - - call = build_call_expr_loc (loc, decl, 2, addr, - build_int_cst (NULL, - gimple_omp_atomic_seq_cst_p (stmt) - ? MEMMODEL_SEQ_CST - : MEMMODEL_RELAXED)); - if (!useless_type_conversion_p (type, itype)) - call = fold_build1_loc (loc, VIEW_CONVERT_EXPR, type, call); - call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call); - - force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT); - gsi_remove (&gsi, true); - - store_bb = single_succ (load_bb); - gsi = gsi_last_bb (store_bb); - gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE); - gsi_remove (&gsi, true); - - if (gimple_in_ssa_p (cfun)) - update_ssa (TODO_update_ssa_no_phi); - - return true; -} - -/* A subroutine of expand_omp_atomic. Attempt to implement the atomic - operation as a normal volatile store. */ - -static bool -expand_omp_atomic_store (basic_block load_bb, tree addr, - tree loaded_val, tree stored_val, int index) -{ - enum built_in_function tmpbase; - gimple_stmt_iterator gsi; - basic_block store_bb = single_succ (load_bb); - location_t loc; - gimple *stmt; - tree decl, call, type, itype; - machine_mode imode; - bool exchange; - - gsi = gsi_last_bb (load_bb); - stmt = gsi_stmt (gsi); - gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD); - - /* If the load value is needed, then this isn't a store but an exchange. */ - exchange = gimple_omp_atomic_need_value_p (stmt); - - gsi = gsi_last_bb (store_bb); - stmt = gsi_stmt (gsi); - gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE); - loc = gimple_location (stmt); - - /* ??? If the target does not implement atomic_store_optab[mode], and mode - is smaller than word size, then expand_atomic_store assumes that the store - is atomic. We could avoid the builtin entirely in this case. */ - - tmpbase = (exchange ? BUILT_IN_ATOMIC_EXCHANGE_N : BUILT_IN_ATOMIC_STORE_N); - tmpbase = (enum built_in_function) ((int) tmpbase + index + 1); - decl = builtin_decl_explicit (tmpbase); - if (decl == NULL_TREE) - return false; - - type = TREE_TYPE (stored_val); - - /* Dig out the type of the function's second argument. */ - itype = TREE_TYPE (decl); - itype = TYPE_ARG_TYPES (itype); - itype = TREE_CHAIN (itype); - itype = TREE_VALUE (itype); - imode = TYPE_MODE (itype); - - if (exchange && !can_atomic_exchange_p (imode, true)) - return false; - - if (!useless_type_conversion_p (itype, type)) - stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val); - call = build_call_expr_loc (loc, decl, 3, addr, stored_val, - build_int_cst (NULL, - gimple_omp_atomic_seq_cst_p (stmt) - ? MEMMODEL_SEQ_CST - : MEMMODEL_RELAXED)); - if (exchange) - { - if (!useless_type_conversion_p (type, itype)) - call = build1_loc (loc, VIEW_CONVERT_EXPR, type, call); - call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call); - } - - force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT); - gsi_remove (&gsi, true); - - /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above. */ - gsi = gsi_last_bb (load_bb); - gsi_remove (&gsi, true); - - if (gimple_in_ssa_p (cfun)) - update_ssa (TODO_update_ssa_no_phi); - - return true; -} - -/* A subroutine of expand_omp_atomic. Attempt to implement the atomic - operation as a __atomic_fetch_op builtin. INDEX is log2 of the - size of the data type, and thus usable to find the index of the builtin - decl. Returns false if the expression is not of the proper form. */ - -static bool -expand_omp_atomic_fetch_op (basic_block load_bb, - tree addr, tree loaded_val, - tree stored_val, int index) -{ - enum built_in_function oldbase, newbase, tmpbase; - tree decl, itype, call; - tree lhs, rhs; - basic_block store_bb = single_succ (load_bb); - gimple_stmt_iterator gsi; - gimple *stmt; - location_t loc; - enum tree_code code; - bool need_old, need_new; - machine_mode imode; - bool seq_cst; - - /* We expect to find the following sequences: - - load_bb: - GIMPLE_OMP_ATOMIC_LOAD (tmp, mem) - - store_bb: - val = tmp OP something; (or: something OP tmp) - GIMPLE_OMP_STORE (val) - - ???FIXME: Allow a more flexible sequence. - Perhaps use data flow to pick the statements. - - */ - - gsi = gsi_after_labels (store_bb); - stmt = gsi_stmt (gsi); - loc = gimple_location (stmt); - if (!is_gimple_assign (stmt)) - return false; - gsi_next (&gsi); - if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE) - return false; - need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi)); - need_old = gimple_omp_atomic_need_value_p (last_stmt (load_bb)); - seq_cst = gimple_omp_atomic_seq_cst_p (last_stmt (load_bb)); - gcc_checking_assert (!need_old || !need_new); - - if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0)) - return false; - - /* Check for one of the supported fetch-op operations. */ - code = gimple_assign_rhs_code (stmt); - switch (code) - { - case PLUS_EXPR: - case POINTER_PLUS_EXPR: - oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N; - newbase = BUILT_IN_ATOMIC_ADD_FETCH_N; - break; - case MINUS_EXPR: - oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N; - newbase = BUILT_IN_ATOMIC_SUB_FETCH_N; - break; - case BIT_AND_EXPR: - oldbase = BUILT_IN_ATOMIC_FETCH_AND_N; - newbase = BUILT_IN_ATOMIC_AND_FETCH_N; - break; - case BIT_IOR_EXPR: - oldbase = BUILT_IN_ATOMIC_FETCH_OR_N; - newbase = BUILT_IN_ATOMIC_OR_FETCH_N; - break; - case BIT_XOR_EXPR: - oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N; - newbase = BUILT_IN_ATOMIC_XOR_FETCH_N; - break; - default: - return false; - } - - /* Make sure the expression is of the proper form. */ - if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0)) - rhs = gimple_assign_rhs2 (stmt); - else if (commutative_tree_code (gimple_assign_rhs_code (stmt)) - && operand_equal_p (gimple_assign_rhs2 (stmt), loaded_val, 0)) - rhs = gimple_assign_rhs1 (stmt); - else - return false; - - tmpbase = ((enum built_in_function) - ((need_new ? newbase : oldbase) + index + 1)); - decl = builtin_decl_explicit (tmpbase); - if (decl == NULL_TREE) - return false; - itype = TREE_TYPE (TREE_TYPE (decl)); - imode = TYPE_MODE (itype); - - /* We could test all of the various optabs involved, but the fact of the - matter is that (with the exception of i486 vs i586 and xadd) all targets - that support any atomic operaton optab also implements compare-and-swap. - Let optabs.c take care of expanding any compare-and-swap loop. */ - if (!can_compare_and_swap_p (imode, true)) - return false; - - gsi = gsi_last_bb (load_bb); - gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD); - - /* OpenMP does not imply any barrier-like semantics on its atomic ops. - It only requires that the operation happen atomically. Thus we can - use the RELAXED memory model. */ - call = build_call_expr_loc (loc, decl, 3, addr, - fold_convert_loc (loc, itype, rhs), - build_int_cst (NULL, - seq_cst ? MEMMODEL_SEQ_CST - : MEMMODEL_RELAXED)); - - if (need_old || need_new) - { - lhs = need_old ? loaded_val : stored_val; - call = fold_convert_loc (loc, TREE_TYPE (lhs), call); - call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call); - } - else - call = fold_convert_loc (loc, void_type_node, call); - force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT); - gsi_remove (&gsi, true); - - gsi = gsi_last_bb (store_bb); - gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE); - gsi_remove (&gsi, true); - gsi = gsi_last_bb (store_bb); - stmt = gsi_stmt (gsi); - gsi_remove (&gsi, true); - - if (gimple_in_ssa_p (cfun)) - { - release_defs (stmt); - update_ssa (TODO_update_ssa_no_phi); - } - - return true; -} - -/* A subroutine of expand_omp_atomic. Implement the atomic operation as: - - oldval = *addr; - repeat: - newval = rhs; // with oldval replacing *addr in rhs - oldval = __sync_val_compare_and_swap (addr, oldval, newval); - if (oldval != newval) - goto repeat; - - INDEX is log2 of the size of the data type, and thus usable to find the - index of the builtin decl. */ - -static bool -expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb, - tree addr, tree loaded_val, tree stored_val, - int index) -{ - tree loadedi, storedi, initial, new_storedi, old_vali; - tree type, itype, cmpxchg, iaddr; - gimple_stmt_iterator si; - basic_block loop_header = single_succ (load_bb); - gimple *phi, *stmt; - edge e; - enum built_in_function fncode; - - /* ??? We need a non-pointer interface to __atomic_compare_exchange in - order to use the RELAXED memory model effectively. */ - fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N - + index + 1); - cmpxchg = builtin_decl_explicit (fncode); - if (cmpxchg == NULL_TREE) - return false; - type = TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (addr))); - itype = TREE_TYPE (TREE_TYPE (cmpxchg)); - - if (!can_compare_and_swap_p (TYPE_MODE (itype), true)) - return false; - - /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD. */ - si = gsi_last_bb (load_bb); - gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD); - - /* For floating-point values, we'll need to view-convert them to integers - so that we can perform the atomic compare and swap. Simplify the - following code by always setting up the "i"ntegral variables. */ - if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type)) - { - tree iaddr_val; - - iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode, - true)); - iaddr_val - = force_gimple_operand_gsi (&si, - fold_convert (TREE_TYPE (iaddr), addr), - false, NULL_TREE, true, GSI_SAME_STMT); - stmt = gimple_build_assign (iaddr, iaddr_val); - gsi_insert_before (&si, stmt, GSI_SAME_STMT); - loadedi = create_tmp_var (itype); - if (gimple_in_ssa_p (cfun)) - loadedi = make_ssa_name (loadedi); - } - else - { - iaddr = addr; - loadedi = loaded_val; - } - - fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1); - tree loaddecl = builtin_decl_explicit (fncode); - if (loaddecl) - initial - = fold_convert (TREE_TYPE (TREE_TYPE (iaddr)), - build_call_expr (loaddecl, 2, iaddr, - build_int_cst (NULL_TREE, - MEMMODEL_RELAXED))); - else - initial = build2 (MEM_REF, TREE_TYPE (TREE_TYPE (iaddr)), iaddr, - build_int_cst (TREE_TYPE (iaddr), 0)); - - initial - = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true, - GSI_SAME_STMT); - - /* Move the value to the LOADEDI temporary. */ - if (gimple_in_ssa_p (cfun)) - { - gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header))); - phi = create_phi_node (loadedi, loop_header); - SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)), - initial); - } - else - gsi_insert_before (&si, - gimple_build_assign (loadedi, initial), - GSI_SAME_STMT); - if (loadedi != loaded_val) - { - gimple_stmt_iterator gsi2; - tree x; - - x = build1 (VIEW_CONVERT_EXPR, type, loadedi); - gsi2 = gsi_start_bb (loop_header); - if (gimple_in_ssa_p (cfun)) - { - gassign *stmt; - x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE, - true, GSI_SAME_STMT); - stmt = gimple_build_assign (loaded_val, x); - gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT); - } - else - { - x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x); - force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE, - true, GSI_SAME_STMT); - } - } - gsi_remove (&si, true); - - si = gsi_last_bb (store_bb); - gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE); - - if (iaddr == addr) - storedi = stored_val; - else - storedi = - force_gimple_operand_gsi (&si, - build1 (VIEW_CONVERT_EXPR, itype, - stored_val), true, NULL_TREE, true, - GSI_SAME_STMT); - - /* Build the compare&swap statement. */ - new_storedi = build_call_expr (cmpxchg, 3, iaddr, loadedi, storedi); - new_storedi = force_gimple_operand_gsi (&si, - fold_convert (TREE_TYPE (loadedi), - new_storedi), - true, NULL_TREE, - true, GSI_SAME_STMT); - - if (gimple_in_ssa_p (cfun)) - old_vali = loadedi; - else - { - old_vali = create_tmp_var (TREE_TYPE (loadedi)); - stmt = gimple_build_assign (old_vali, loadedi); - gsi_insert_before (&si, stmt, GSI_SAME_STMT); - - stmt = gimple_build_assign (loadedi, new_storedi); - gsi_insert_before (&si, stmt, GSI_SAME_STMT); - } - - /* Note that we always perform the comparison as an integer, even for - floating point. This allows the atomic operation to properly - succeed even with NaNs and -0.0. */ - stmt = gimple_build_cond_empty - (build2 (NE_EXPR, boolean_type_node, - new_storedi, old_vali)); - gsi_insert_before (&si, stmt, GSI_SAME_STMT); - - /* Update cfg. */ - e = single_succ_edge (store_bb); - e->flags &= ~EDGE_FALLTHRU; - e->flags |= EDGE_FALSE_VALUE; - - e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE); - - /* Copy the new value to loadedi (we already did that before the condition - if we are not in SSA). */ - if (gimple_in_ssa_p (cfun)) - { - phi = gimple_seq_first_stmt (phi_nodes (loop_header)); - SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi); - } - - /* Remove GIMPLE_OMP_ATOMIC_STORE. */ - gsi_remove (&si, true); - - struct loop *loop = alloc_loop (); - loop->header = loop_header; - loop->latch = store_bb; - add_loop (loop, loop_header->loop_father); - - if (gimple_in_ssa_p (cfun)) - update_ssa (TODO_update_ssa_no_phi); - - return true; -} - -/* A subroutine of expand_omp_atomic. Implement the atomic operation as: - - GOMP_atomic_start (); - *addr = rhs; - GOMP_atomic_end (); - - The result is not globally atomic, but works so long as all parallel - references are within #pragma omp atomic directives. According to - responses received from omp@openmp.org, appears to be within spec. - Which makes sense, since that's how several other compilers handle - this situation as well. - LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're - expanding. STORED_VAL is the operand of the matching - GIMPLE_OMP_ATOMIC_STORE. - - We replace - GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with - loaded_val = *addr; - - and replace - GIMPLE_OMP_ATOMIC_STORE (stored_val) with - *addr = stored_val; -*/ - -static bool -expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb, - tree addr, tree loaded_val, tree stored_val) -{ - gimple_stmt_iterator si; - gassign *stmt; - tree t; - - si = gsi_last_bb (load_bb); - gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD); - - t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START); - t = build_call_expr (t, 0); - force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT); - - stmt = gimple_build_assign (loaded_val, build_simple_mem_ref (addr)); - gsi_insert_before (&si, stmt, GSI_SAME_STMT); - gsi_remove (&si, true); - - si = gsi_last_bb (store_bb); - gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE); - - stmt = gimple_build_assign (build_simple_mem_ref (unshare_expr (addr)), - stored_val); - gsi_insert_before (&si, stmt, GSI_SAME_STMT); - - t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END); - t = build_call_expr (t, 0); - force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT); - gsi_remove (&si, true); - - if (gimple_in_ssa_p (cfun)) - update_ssa (TODO_update_ssa_no_phi); - return true; -} - -/* Expand an GIMPLE_OMP_ATOMIC statement. We try to expand - using expand_omp_atomic_fetch_op. If it failed, we try to - call expand_omp_atomic_pipeline, and if it fails too, the - ultimate fallback is wrapping the operation in a mutex - (expand_omp_atomic_mutex). REGION is the atomic region built - by build_omp_regions_1(). */ - -static void -expand_omp_atomic (struct omp_region *region) -{ - basic_block load_bb = region->entry, store_bb = region->exit; - gomp_atomic_load *load = as_a <gomp_atomic_load *> (last_stmt (load_bb)); - gomp_atomic_store *store = as_a <gomp_atomic_store *> (last_stmt (store_bb)); - tree loaded_val = gimple_omp_atomic_load_lhs (load); - tree addr = gimple_omp_atomic_load_rhs (load); - tree stored_val = gimple_omp_atomic_store_val (store); - tree type = TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (addr))); - HOST_WIDE_INT index; - - /* Make sure the type is one of the supported sizes. */ - index = tree_to_uhwi (TYPE_SIZE_UNIT (type)); - index = exact_log2 (index); - if (index >= 0 && index <= 4) - { - unsigned int align = TYPE_ALIGN_UNIT (type); - - /* __sync builtins require strict data alignment. */ - if (exact_log2 (align) >= index) - { - /* Atomic load. */ - if (loaded_val == stored_val - && (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_INT - || GET_MODE_CLASS (TYPE_MODE (type)) == MODE_FLOAT) - && GET_MODE_BITSIZE (TYPE_MODE (type)) <= BITS_PER_WORD - && expand_omp_atomic_load (load_bb, addr, loaded_val, index)) - return; - - /* Atomic store. */ - if ((GET_MODE_CLASS (TYPE_MODE (type)) == MODE_INT - || GET_MODE_CLASS (TYPE_MODE (type)) == MODE_FLOAT) - && GET_MODE_BITSIZE (TYPE_MODE (type)) <= BITS_PER_WORD - && store_bb == single_succ (load_bb) - && first_stmt (store_bb) == store - && expand_omp_atomic_store (load_bb, addr, loaded_val, - stored_val, index)) - return; - - /* When possible, use specialized atomic update functions. */ - if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type)) - && store_bb == single_succ (load_bb) - && expand_omp_atomic_fetch_op (load_bb, addr, - loaded_val, stored_val, index)) - return; - - /* If we don't have specialized __sync builtins, try and implement - as a compare and swap loop. */ - if (expand_omp_atomic_pipeline (load_bb, store_bb, addr, - loaded_val, stored_val, index)) - return; - } - } - - /* The ultimate fallback is wrapping the operation in a mutex. */ - expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val); -} - - -/* Encode an oacc launch argument. This matches the GOMP_LAUNCH_PACK - macro on gomp-constants.h. We do not check for overflow. */ - -static tree -oacc_launch_pack (unsigned code, tree device, unsigned op) -{ - tree res; - - res = build_int_cst (unsigned_type_node, GOMP_LAUNCH_PACK (code, 0, op)); - if (device) - { - device = fold_build2 (LSHIFT_EXPR, unsigned_type_node, - device, build_int_cst (unsigned_type_node, - GOMP_LAUNCH_DEVICE_SHIFT)); - res = fold_build2 (BIT_IOR_EXPR, unsigned_type_node, res, device); - } - return res; -} - -/* Look for compute grid dimension clauses and convert to an attribute - attached to FN. This permits the target-side code to (a) massage - the dimensions, (b) emit that data and (c) optimize. Non-constant - dimensions are pushed onto ARGS. - - The attribute value is a TREE_LIST. A set of dimensions is - represented as a list of INTEGER_CST. Those that are runtime - exprs are represented as an INTEGER_CST of zero. - - TOOO. Normally the attribute will just contain a single such list. If - however it contains a list of lists, this will represent the use of - device_type. Each member of the outer list is an assoc list of - dimensions, keyed by the device type. The first entry will be the - default. Well, that's the plan. */ - -#define OACC_FN_ATTRIB "oacc function" - -/* Replace any existing oacc fn attribute with updated dimensions. */ - -void -replace_oacc_fn_attrib (tree fn, tree dims) -{ - tree ident = get_identifier (OACC_FN_ATTRIB); - tree attribs = DECL_ATTRIBUTES (fn); - - /* If we happen to be present as the first attrib, drop it. */ - if (attribs && TREE_PURPOSE (attribs) == ident) - attribs = TREE_CHAIN (attribs); - DECL_ATTRIBUTES (fn) = tree_cons (ident, dims, attribs); -} - -/* Scan CLAUSES for launch dimensions and attach them to the oacc - function attribute. Push any that are non-constant onto the ARGS - list, along with an appropriate GOMP_LAUNCH_DIM tag. IS_KERNEL is - true, if these are for a kernels region offload function. */ - -void -set_oacc_fn_attrib (tree fn, tree clauses, bool is_kernel, vec<tree> *args) -{ - /* Must match GOMP_DIM ordering. */ - static const omp_clause_code ids[] - = { OMP_CLAUSE_NUM_GANGS, OMP_CLAUSE_NUM_WORKERS, - OMP_CLAUSE_VECTOR_LENGTH }; - unsigned ix; - tree dims[GOMP_DIM_MAX]; - tree attr = NULL_TREE; - unsigned non_const = 0; - - for (ix = GOMP_DIM_MAX; ix--;) - { - tree clause = find_omp_clause (clauses, ids[ix]); - tree dim = NULL_TREE; - - if (clause) - dim = OMP_CLAUSE_EXPR (clause, ids[ix]); - dims[ix] = dim; - if (dim && TREE_CODE (dim) != INTEGER_CST) - { - dim = integer_zero_node; - non_const |= GOMP_DIM_MASK (ix); - } - attr = tree_cons (NULL_TREE, dim, attr); - /* Note kernelness with TREE_PUBLIC. */ - if (is_kernel) - TREE_PUBLIC (attr) = 1; - } - - replace_oacc_fn_attrib (fn, attr); - - if (non_const) - { - /* Push a dynamic argument set. */ - args->safe_push (oacc_launch_pack (GOMP_LAUNCH_DIM, - NULL_TREE, non_const)); - for (unsigned ix = 0; ix != GOMP_DIM_MAX; ix++) - if (non_const & GOMP_DIM_MASK (ix)) - args->safe_push (dims[ix]); - } -} - -/* Process the routine's dimension clauess to generate an attribute - value. Issue diagnostics as appropriate. We default to SEQ - (OpenACC 2.5 clarifies this). All dimensions have a size of zero - (dynamic). TREE_PURPOSE is set to indicate whether that dimension - can have a loop partitioned on it. non-zero indicates - yes, zero indicates no. By construction once a non-zero has been - reached, further inner dimensions must also be non-zero. We set - TREE_VALUE to zero for the dimensions that may be partitioned and - 1 for the other ones -- if a loop is (erroneously) spawned at - an outer level, we don't want to try and partition it. */ - -tree -build_oacc_routine_dims (tree clauses) -{ - /* Must match GOMP_DIM ordering. */ - static const omp_clause_code ids[] = - {OMP_CLAUSE_GANG, OMP_CLAUSE_WORKER, OMP_CLAUSE_VECTOR, OMP_CLAUSE_SEQ}; - int ix; - int level = -1; - - for (; clauses; clauses = OMP_CLAUSE_CHAIN (clauses)) - for (ix = GOMP_DIM_MAX + 1; ix--;) - if (OMP_CLAUSE_CODE (clauses) == ids[ix]) - { - if (level >= 0) - error_at (OMP_CLAUSE_LOCATION (clauses), - "multiple loop axes specified for routine"); - level = ix; - break; - } - - /* Default to SEQ. */ - if (level < 0) - level = GOMP_DIM_MAX; - - tree dims = NULL_TREE; - - for (ix = GOMP_DIM_MAX; ix--;) - dims = tree_cons (build_int_cst (boolean_type_node, ix >= level), - build_int_cst (integer_type_node, ix < level), dims); - - return dims; -} - -/* Retrieve the oacc function attrib and return it. Non-oacc - functions will return NULL. */ - -tree -get_oacc_fn_attrib (tree fn) -{ - return lookup_attribute (OACC_FN_ATTRIB, DECL_ATTRIBUTES (fn)); -} - -/* Return true if this oacc fn attrib is for a kernels offload - region. We use the TREE_PUBLIC flag of each dimension -- only - need to check the first one. */ - -bool -oacc_fn_attrib_kernels_p (tree attr) -{ - return TREE_PUBLIC (TREE_VALUE (attr)); -} - -/* Return level at which oacc routine may spawn a partitioned loop, or - -1 if it is not a routine (i.e. is an offload fn). */ - -static int -oacc_fn_attrib_level (tree attr) -{ - tree pos = TREE_VALUE (attr); - - if (!TREE_PURPOSE (pos)) - return -1; - - int ix = 0; - for (ix = 0; ix != GOMP_DIM_MAX; - ix++, pos = TREE_CHAIN (pos)) - if (!integer_zerop (TREE_PURPOSE (pos))) - break; - - return ix; -} - -/* Extract an oacc execution dimension from FN. FN must be an - offloaded function or routine that has already had its execution - dimensions lowered to the target-specific values. */ - -int -get_oacc_fn_dim_size (tree fn, int axis) -{ - tree attrs = get_oacc_fn_attrib (fn); - - gcc_assert (axis < GOMP_DIM_MAX); - - tree dims = TREE_VALUE (attrs); - while (axis--) - dims = TREE_CHAIN (dims); - - int size = TREE_INT_CST_LOW (TREE_VALUE (dims)); - - return size; -} - -/* Extract the dimension axis from an IFN_GOACC_DIM_POS or - IFN_GOACC_DIM_SIZE call. */ - -int -get_oacc_ifn_dim_arg (const gimple *stmt) -{ - gcc_checking_assert (gimple_call_internal_fn (stmt) == IFN_GOACC_DIM_SIZE - || gimple_call_internal_fn (stmt) == IFN_GOACC_DIM_POS); - tree arg = gimple_call_arg (stmt, 0); - HOST_WIDE_INT axis = TREE_INT_CST_LOW (arg); - - gcc_checking_assert (axis >= 0 && axis < GOMP_DIM_MAX); - return (int) axis; -} - -/* Mark the loops inside the kernels region starting at REGION_ENTRY and ending - at REGION_EXIT. */ - -static void -mark_loops_in_oacc_kernels_region (basic_block region_entry, - basic_block region_exit) -{ - struct loop *outer = region_entry->loop_father; - gcc_assert (region_exit == NULL || outer == region_exit->loop_father); - - /* Don't parallelize the kernels region if it contains more than one outer - loop. */ - unsigned int nr_outer_loops = 0; - struct loop *single_outer = NULL; - for (struct loop *loop = outer->inner; loop != NULL; loop = loop->next) - { - gcc_assert (loop_outer (loop) == outer); - - if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry)) - continue; - - if (region_exit != NULL - && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit)) - continue; - - nr_outer_loops++; - single_outer = loop; - } - if (nr_outer_loops != 1) - return; - - for (struct loop *loop = single_outer->inner; loop != NULL; loop = loop->inner) - if (loop->next) - return; - - /* Mark the loops in the region. */ - for (struct loop *loop = single_outer; loop != NULL; loop = loop->inner) - loop->in_oacc_kernels_region = true; -} - -/* Types used to pass grid and wortkgroup sizes to kernel invocation. */ - -struct GTY(()) grid_launch_attributes_trees -{ - tree kernel_dim_array_type; - tree kernel_lattrs_dimnum_decl; - tree kernel_lattrs_grid_decl; - tree kernel_lattrs_group_decl; - tree kernel_launch_attributes_type; -}; - -static GTY(()) struct grid_launch_attributes_trees *grid_attr_trees; - -/* Create types used to pass kernel launch attributes to target. */ - -static void -grid_create_kernel_launch_attr_types (void) -{ - if (grid_attr_trees) - return; - grid_attr_trees = ggc_alloc <grid_launch_attributes_trees> (); - - tree dim_arr_index_type - = build_index_type (build_int_cst (integer_type_node, 2)); - grid_attr_trees->kernel_dim_array_type - = build_array_type (uint32_type_node, dim_arr_index_type); - - grid_attr_trees->kernel_launch_attributes_type = make_node (RECORD_TYPE); - grid_attr_trees->kernel_lattrs_dimnum_decl - = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("ndim"), - uint32_type_node); - DECL_CHAIN (grid_attr_trees->kernel_lattrs_dimnum_decl) = NULL_TREE; - - grid_attr_trees->kernel_lattrs_grid_decl - = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("grid_size"), - grid_attr_trees->kernel_dim_array_type); - DECL_CHAIN (grid_attr_trees->kernel_lattrs_grid_decl) - = grid_attr_trees->kernel_lattrs_dimnum_decl; - grid_attr_trees->kernel_lattrs_group_decl - = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("group_size"), - grid_attr_trees->kernel_dim_array_type); - DECL_CHAIN (grid_attr_trees->kernel_lattrs_group_decl) - = grid_attr_trees->kernel_lattrs_grid_decl; - finish_builtin_struct (grid_attr_trees->kernel_launch_attributes_type, - "__gomp_kernel_launch_attributes", - grid_attr_trees->kernel_lattrs_group_decl, NULL_TREE); -} - -/* Insert before the current statement in GSI a store of VALUE to INDEX of - array (of type kernel_dim_array_type) FLD_DECL of RANGE_VAR. VALUE must be - of type uint32_type_node. */ - -static void -grid_insert_store_range_dim (gimple_stmt_iterator *gsi, tree range_var, - tree fld_decl, int index, tree value) -{ - tree ref = build4 (ARRAY_REF, uint32_type_node, - build3 (COMPONENT_REF, - grid_attr_trees->kernel_dim_array_type, - range_var, fld_decl, NULL_TREE), - build_int_cst (integer_type_node, index), - NULL_TREE, NULL_TREE); - gsi_insert_before (gsi, gimple_build_assign (ref, value), GSI_SAME_STMT); -} - -/* Return a tree representation of a pointer to a structure with grid and - work-group size information. Statements filling that information will be - inserted before GSI, TGT_STMT is the target statement which has the - necessary information in it. */ - -static tree -grid_get_kernel_launch_attributes (gimple_stmt_iterator *gsi, - gomp_target *tgt_stmt) -{ - grid_create_kernel_launch_attr_types (); - tree lattrs = create_tmp_var (grid_attr_trees->kernel_launch_attributes_type, - "__kernel_launch_attrs"); - - unsigned max_dim = 0; - for (tree clause = gimple_omp_target_clauses (tgt_stmt); - clause; - clause = OMP_CLAUSE_CHAIN (clause)) - { - if (OMP_CLAUSE_CODE (clause) != OMP_CLAUSE__GRIDDIM_) - continue; - - unsigned dim = OMP_CLAUSE__GRIDDIM__DIMENSION (clause); - max_dim = MAX (dim, max_dim); - - grid_insert_store_range_dim (gsi, lattrs, - grid_attr_trees->kernel_lattrs_grid_decl, - dim, OMP_CLAUSE__GRIDDIM__SIZE (clause)); - grid_insert_store_range_dim (gsi, lattrs, - grid_attr_trees->kernel_lattrs_group_decl, - dim, OMP_CLAUSE__GRIDDIM__GROUP (clause)); - } - - tree dimref = build3 (COMPONENT_REF, uint32_type_node, lattrs, - grid_attr_trees->kernel_lattrs_dimnum_decl, NULL_TREE); - gcc_checking_assert (max_dim <= 2); - tree dimensions = build_int_cstu (uint32_type_node, max_dim + 1); - gsi_insert_before (gsi, gimple_build_assign (dimref, dimensions), - GSI_SAME_STMT); - TREE_ADDRESSABLE (lattrs) = 1; - return build_fold_addr_expr (lattrs); -} - -/* Build target argument identifier from the DEVICE identifier, value - identifier ID and whether the element also has a SUBSEQUENT_PARAM. */ - -static tree -get_target_argument_identifier_1 (int device, bool subseqent_param, int id) -{ - tree t = build_int_cst (integer_type_node, device); - if (subseqent_param) - t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t, - build_int_cst (integer_type_node, - GOMP_TARGET_ARG_SUBSEQUENT_PARAM)); - t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t, - build_int_cst (integer_type_node, id)); - return t; -} - -/* Like above but return it in type that can be directly stored as an element - of the argument array. */ - -static tree -get_target_argument_identifier (int device, bool subseqent_param, int id) -{ - tree t = get_target_argument_identifier_1 (device, subseqent_param, id); - return fold_convert (ptr_type_node, t); -} - -/* Return a target argument consisting of DEVICE identifier, value identifier - ID, and the actual VALUE. */ - -static tree -get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id, - tree value) -{ - tree t = fold_build2 (LSHIFT_EXPR, integer_type_node, - fold_convert (integer_type_node, value), - build_int_cst (unsigned_type_node, - GOMP_TARGET_ARG_VALUE_SHIFT)); - t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t, - get_target_argument_identifier_1 (device, false, id)); - t = fold_convert (ptr_type_node, t); - return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT); -} - -/* If VALUE is an integer constant greater than -2^15 and smaller than 2^15, - push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it, - otherwise push an identifier (with DEVICE and ID) and the VALUE in two - arguments. */ - -static void -push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device, - int id, tree value, vec <tree> *args) -{ - if (tree_fits_shwi_p (value) - && tree_to_shwi (value) > -(1 << 15) - && tree_to_shwi (value) < (1 << 15)) - args->quick_push (get_target_argument_value (gsi, device, id, value)); - else - { - args->quick_push (get_target_argument_identifier (device, true, id)); - value = fold_convert (ptr_type_node, value); - value = force_gimple_operand_gsi (gsi, value, true, NULL, true, - GSI_SAME_STMT); - args->quick_push (value); - } -} - -/* Create an array of arguments that is then passed to GOMP_target. */ - -static tree -get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt) -{ - auto_vec <tree, 6> args; - tree clauses = gimple_omp_target_clauses (tgt_stmt); - tree t, c = find_omp_clause (clauses, OMP_CLAUSE_NUM_TEAMS); - if (c) - t = OMP_CLAUSE_NUM_TEAMS_EXPR (c); - else - t = integer_minus_one_node; - push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL, - GOMP_TARGET_ARG_NUM_TEAMS, t, &args); - - c = find_omp_clause (clauses, OMP_CLAUSE_THREAD_LIMIT); - if (c) - t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c); - else - t = integer_minus_one_node; - push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL, - GOMP_TARGET_ARG_THREAD_LIMIT, t, - &args); - - /* Add HSA-specific grid sizes, if available. */ - if (find_omp_clause (gimple_omp_target_clauses (tgt_stmt), - OMP_CLAUSE__GRIDDIM_)) - { - t = get_target_argument_identifier (GOMP_DEVICE_HSA, true, - GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES); - args.quick_push (t); - args.quick_push (grid_get_kernel_launch_attributes (gsi, tgt_stmt)); - } - - /* Produce more, perhaps device specific, arguments here. */ - - tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node, - args.length () + 1), - ".omp_target_args"); - for (unsigned i = 0; i < args.length (); i++) - { - tree ref = build4 (ARRAY_REF, ptr_type_node, argarray, - build_int_cst (integer_type_node, i), - NULL_TREE, NULL_TREE); - gsi_insert_before (gsi, gimple_build_assign (ref, args[i]), - GSI_SAME_STMT); - } - tree ref = build4 (ARRAY_REF, ptr_type_node, argarray, - build_int_cst (integer_type_node, args.length ()), - NULL_TREE, NULL_TREE); - gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node), - GSI_SAME_STMT); - TREE_ADDRESSABLE (argarray) = 1; - return build_fold_addr_expr (argarray); -} - -/* Expand the GIMPLE_OMP_TARGET starting at REGION. */ - -static void -expand_omp_target (struct omp_region *region) -{ - basic_block entry_bb, exit_bb, new_bb; - struct function *child_cfun; - tree child_fn, block, t; - gimple_stmt_iterator gsi; - gomp_target *entry_stmt; - gimple *stmt; - edge e; - bool offloaded, data_region; - - entry_stmt = as_a <gomp_target *> (last_stmt (region->entry)); - new_bb = region->entry; - - offloaded = is_gimple_omp_offloaded (entry_stmt); - switch (gimple_omp_target_kind (entry_stmt)) - { - case GF_OMP_TARGET_KIND_REGION: - case GF_OMP_TARGET_KIND_UPDATE: - case GF_OMP_TARGET_KIND_ENTER_DATA: - case GF_OMP_TARGET_KIND_EXIT_DATA: - case GF_OMP_TARGET_KIND_OACC_PARALLEL: - case GF_OMP_TARGET_KIND_OACC_KERNELS: - case GF_OMP_TARGET_KIND_OACC_UPDATE: - case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA: - case GF_OMP_TARGET_KIND_OACC_DECLARE: - data_region = false; - break; - case GF_OMP_TARGET_KIND_DATA: - case GF_OMP_TARGET_KIND_OACC_DATA: - case GF_OMP_TARGET_KIND_OACC_HOST_DATA: - data_region = true; - break; - default: - gcc_unreachable (); - } - - child_fn = NULL_TREE; - child_cfun = NULL; - if (offloaded) - { - child_fn = gimple_omp_target_child_fn (entry_stmt); - child_cfun = DECL_STRUCT_FUNCTION (child_fn); - } - - /* Supported by expand_omp_taskreg, but not here. */ - if (child_cfun != NULL) - gcc_checking_assert (!child_cfun->cfg); - gcc_checking_assert (!gimple_in_ssa_p (cfun)); - - entry_bb = region->entry; - exit_bb = region->exit; - - if (gimple_omp_target_kind (entry_stmt) == GF_OMP_TARGET_KIND_OACC_KERNELS) - mark_loops_in_oacc_kernels_region (region->entry, region->exit); - - if (offloaded) - { - unsigned srcidx, dstidx, num; - - /* If the offloading region needs data sent from the parent - function, then the very first statement (except possible - tree profile counter updates) of the offloading body - is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since - &.OMP_DATA_O is passed as an argument to the child function, - we need to replace it with the argument as seen by the child - function. - - In most cases, this will end up being the identity assignment - .OMP_DATA_I = .OMP_DATA_I. However, if the offloading body had - a function call that has been inlined, the original PARM_DECL - .OMP_DATA_I may have been converted into a different local - variable. In which case, we need to keep the assignment. */ - tree data_arg = gimple_omp_target_data_arg (entry_stmt); - if (data_arg) - { - basic_block entry_succ_bb = single_succ (entry_bb); - gimple_stmt_iterator gsi; - tree arg; - gimple *tgtcopy_stmt = NULL; - tree sender = TREE_VEC_ELT (data_arg, 0); - - for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi)) - { - gcc_assert (!gsi_end_p (gsi)); - stmt = gsi_stmt (gsi); - if (gimple_code (stmt) != GIMPLE_ASSIGN) - continue; - - if (gimple_num_ops (stmt) == 2) - { - tree arg = gimple_assign_rhs1 (stmt); - - /* We're ignoring the subcode because we're - effectively doing a STRIP_NOPS. */ - - if (TREE_CODE (arg) == ADDR_EXPR - && TREE_OPERAND (arg, 0) == sender) - { - tgtcopy_stmt = stmt; - break; - } - } - } - - gcc_assert (tgtcopy_stmt != NULL); - arg = DECL_ARGUMENTS (child_fn); - - gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg); - gsi_remove (&gsi, true); - } - - /* Declare local variables needed in CHILD_CFUN. */ - block = DECL_INITIAL (child_fn); - BLOCK_VARS (block) = vec2chain (child_cfun->local_decls); - /* The gimplifier could record temporaries in the offloading block - rather than in containing function's local_decls chain, - which would mean cgraph missed finalizing them. Do it now. */ - for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t)) - if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t)) - varpool_node::finalize_decl (t); - DECL_SAVED_TREE (child_fn) = NULL; - /* We'll create a CFG for child_fn, so no gimple body is needed. */ - gimple_set_body (child_fn, NULL); - TREE_USED (block) = 1; - - /* Reset DECL_CONTEXT on function arguments. */ - for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t)) - DECL_CONTEXT (t) = child_fn; - - /* Split ENTRY_BB at GIMPLE_*, - so that it can be moved to the child function. */ - gsi = gsi_last_bb (entry_bb); - stmt = gsi_stmt (gsi); - gcc_assert (stmt - && gimple_code (stmt) == gimple_code (entry_stmt)); - e = split_block (entry_bb, stmt); - gsi_remove (&gsi, true); - entry_bb = e->dest; - single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU; - - /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR. */ - if (exit_bb) - { - gsi = gsi_last_bb (exit_bb); - gcc_assert (!gsi_end_p (gsi) - && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN); - stmt = gimple_build_return (NULL); - gsi_insert_after (&gsi, stmt, GSI_SAME_STMT); - gsi_remove (&gsi, true); - } - - /* Move the offloading region into CHILD_CFUN. */ - - block = gimple_block (entry_stmt); - - new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block); - if (exit_bb) - single_succ_edge (new_bb)->flags = EDGE_FALLTHRU; - /* When the OMP expansion process cannot guarantee an up-to-date - loop tree arrange for the child function to fixup loops. */ - if (loops_state_satisfies_p (LOOPS_NEED_FIXUP)) - child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP; - - /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */ - num = vec_safe_length (child_cfun->local_decls); - for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++) - { - t = (*child_cfun->local_decls)[srcidx]; - if (DECL_CONTEXT (t) == cfun->decl) - continue; - if (srcidx != dstidx) - (*child_cfun->local_decls)[dstidx] = t; - dstidx++; - } - if (dstidx != num) - vec_safe_truncate (child_cfun->local_decls, dstidx); - - /* Inform the callgraph about the new function. */ - child_cfun->curr_properties = cfun->curr_properties; - child_cfun->has_simduid_loops |= cfun->has_simduid_loops; - child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops; - cgraph_node *node = cgraph_node::get_create (child_fn); - node->parallelized_function = 1; - cgraph_node::add_new_function (child_fn, true); - - /* Add the new function to the offload table. */ - if (ENABLE_OFFLOADING) - vec_safe_push (offload_funcs, child_fn); - - bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl) - && !DECL_ASSEMBLER_NAME_SET_P (child_fn); - - /* Fix the callgraph edges for child_cfun. Those for cfun will be - fixed in a following pass. */ - push_cfun (child_cfun); - if (need_asm) - assign_assembler_name_if_neeeded (child_fn); - cgraph_edge::rebuild_edges (); - - /* Some EH regions might become dead, see PR34608. If - pass_cleanup_cfg isn't the first pass to happen with the - new child, these dead EH edges might cause problems. - Clean them up now. */ - if (flag_exceptions) - { - basic_block bb; - bool changed = false; - - FOR_EACH_BB_FN (bb, cfun) - changed |= gimple_purge_dead_eh_edges (bb); - if (changed) - cleanup_tree_cfg (); - } - if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP)) - verify_loop_structure (); - pop_cfun (); - - if (dump_file && !gimple_in_ssa_p (cfun)) - { - omp_any_child_fn_dumped = true; - dump_function_header (dump_file, child_fn, dump_flags); - dump_function_to_file (child_fn, dump_file, dump_flags); - } - } - - /* Emit a library call to launch the offloading region, or do data - transfers. */ - tree t1, t2, t3, t4, device, cond, depend, c, clauses; - enum built_in_function start_ix; - location_t clause_loc; - unsigned int flags_i = 0; - bool oacc_kernels_p = false; - - switch (gimple_omp_target_kind (entry_stmt)) - { - case GF_OMP_TARGET_KIND_REGION: - start_ix = BUILT_IN_GOMP_TARGET; - break; - case GF_OMP_TARGET_KIND_DATA: - start_ix = BUILT_IN_GOMP_TARGET_DATA; - break; - case GF_OMP_TARGET_KIND_UPDATE: - start_ix = BUILT_IN_GOMP_TARGET_UPDATE; - break; - case GF_OMP_TARGET_KIND_ENTER_DATA: - start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA; - break; - case GF_OMP_TARGET_KIND_EXIT_DATA: - start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA; - flags_i |= GOMP_TARGET_FLAG_EXIT_DATA; - break; - case GF_OMP_TARGET_KIND_OACC_KERNELS: - oacc_kernels_p = true; - /* FALLTHROUGH */ - case GF_OMP_TARGET_KIND_OACC_PARALLEL: - start_ix = BUILT_IN_GOACC_PARALLEL; - break; - case GF_OMP_TARGET_KIND_OACC_DATA: - case GF_OMP_TARGET_KIND_OACC_HOST_DATA: - start_ix = BUILT_IN_GOACC_DATA_START; - break; - case GF_OMP_TARGET_KIND_OACC_UPDATE: - start_ix = BUILT_IN_GOACC_UPDATE; - break; - case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA: - start_ix = BUILT_IN_GOACC_ENTER_EXIT_DATA; - break; - case GF_OMP_TARGET_KIND_OACC_DECLARE: - start_ix = BUILT_IN_GOACC_DECLARE; - break; - default: - gcc_unreachable (); - } - - clauses = gimple_omp_target_clauses (entry_stmt); - - /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime - library choose) and there is no conditional. */ - cond = NULL_TREE; - device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV); - - c = find_omp_clause (clauses, OMP_CLAUSE_IF); - if (c) - cond = OMP_CLAUSE_IF_EXPR (c); - - c = find_omp_clause (clauses, OMP_CLAUSE_DEVICE); - if (c) - { - /* Even if we pass it to all library function calls, it is currently only - defined/used for the OpenMP target ones. */ - gcc_checking_assert (start_ix == BUILT_IN_GOMP_TARGET - || start_ix == BUILT_IN_GOMP_TARGET_DATA - || start_ix == BUILT_IN_GOMP_TARGET_UPDATE - || start_ix == BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA); - - device = OMP_CLAUSE_DEVICE_ID (c); - clause_loc = OMP_CLAUSE_LOCATION (c); - } - else - clause_loc = gimple_location (entry_stmt); - - c = find_omp_clause (clauses, OMP_CLAUSE_NOWAIT); - if (c) - flags_i |= GOMP_TARGET_FLAG_NOWAIT; - - /* Ensure 'device' is of the correct type. */ - device = fold_convert_loc (clause_loc, integer_type_node, device); - - /* If we found the clause 'if (cond)', build - (cond ? device : GOMP_DEVICE_HOST_FALLBACK). */ - if (cond) - { - cond = gimple_boolify (cond); - - basic_block cond_bb, then_bb, else_bb; - edge e; - tree tmp_var; - - tmp_var = create_tmp_var (TREE_TYPE (device)); - if (offloaded) - e = split_block_after_labels (new_bb); - else - { - gsi = gsi_last_bb (new_bb); - gsi_prev (&gsi); - e = split_block (new_bb, gsi_stmt (gsi)); - } - cond_bb = e->src; - new_bb = e->dest; - remove_edge (e); - - then_bb = create_empty_bb (cond_bb); - else_bb = create_empty_bb (then_bb); - set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb); - set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb); - - stmt = gimple_build_cond_empty (cond); - gsi = gsi_last_bb (cond_bb); - gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); - - gsi = gsi_start_bb (then_bb); - stmt = gimple_build_assign (tmp_var, device); - gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); - - gsi = gsi_start_bb (else_bb); - stmt = gimple_build_assign (tmp_var, - build_int_cst (integer_type_node, - GOMP_DEVICE_HOST_FALLBACK)); - gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); - - make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE); - make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE); - add_bb_to_loop (then_bb, cond_bb->loop_father); - add_bb_to_loop (else_bb, cond_bb->loop_father); - make_edge (then_bb, new_bb, EDGE_FALLTHRU); - make_edge (else_bb, new_bb, EDGE_FALLTHRU); - - device = tmp_var; - gsi = gsi_last_bb (new_bb); - } - else - { - gsi = gsi_last_bb (new_bb); - device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE, - true, GSI_SAME_STMT); - } - - t = gimple_omp_target_data_arg (entry_stmt); - if (t == NULL) - { - t1 = size_zero_node; - t2 = build_zero_cst (ptr_type_node); - t3 = t2; - t4 = t2; - } - else - { - t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1)))); - t1 = size_binop (PLUS_EXPR, t1, size_int (1)); - t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0)); - t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1)); - t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2)); - } - - gimple *g; - bool tagging = false; - /* The maximum number used by any start_ix, without varargs. */ - auto_vec<tree, 11> args; - args.quick_push (device); - if (offloaded) - args.quick_push (build_fold_addr_expr (child_fn)); - args.quick_push (t1); - args.quick_push (t2); - args.quick_push (t3); - args.quick_push (t4); - switch (start_ix) - { - case BUILT_IN_GOACC_DATA_START: - case BUILT_IN_GOACC_DECLARE: - case BUILT_IN_GOMP_TARGET_DATA: - break; - case BUILT_IN_GOMP_TARGET: - case BUILT_IN_GOMP_TARGET_UPDATE: - case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA: - args.quick_push (build_int_cst (unsigned_type_node, flags_i)); - c = find_omp_clause (clauses, OMP_CLAUSE_DEPEND); - if (c) - depend = OMP_CLAUSE_DECL (c); - else - depend = build_int_cst (ptr_type_node, 0); - args.quick_push (depend); - if (start_ix == BUILT_IN_GOMP_TARGET) - args.quick_push (get_target_arguments (&gsi, entry_stmt)); - break; - case BUILT_IN_GOACC_PARALLEL: - { - set_oacc_fn_attrib (child_fn, clauses, oacc_kernels_p, &args); - tagging = true; - } - /* FALLTHRU */ - case BUILT_IN_GOACC_ENTER_EXIT_DATA: - case BUILT_IN_GOACC_UPDATE: - { - tree t_async = NULL_TREE; - - /* If present, use the value specified by the respective - clause, making sure that is of the correct type. */ - c = find_omp_clause (clauses, OMP_CLAUSE_ASYNC); - if (c) - t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c), - integer_type_node, - OMP_CLAUSE_ASYNC_EXPR (c)); - else if (!tagging) - /* Default values for t_async. */ - t_async = fold_convert_loc (gimple_location (entry_stmt), - integer_type_node, - build_int_cst (integer_type_node, - GOMP_ASYNC_SYNC)); - if (tagging && t_async) - { - unsigned HOST_WIDE_INT i_async = GOMP_LAUNCH_OP_MAX; - - if (TREE_CODE (t_async) == INTEGER_CST) - { - /* See if we can pack the async arg in to the tag's - operand. */ - i_async = TREE_INT_CST_LOW (t_async); - if (i_async < GOMP_LAUNCH_OP_MAX) - t_async = NULL_TREE; - else - i_async = GOMP_LAUNCH_OP_MAX; - } - args.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC, NULL_TREE, - i_async)); - } - if (t_async) - args.safe_push (t_async); - - /* Save the argument index, and ... */ - unsigned t_wait_idx = args.length (); - unsigned num_waits = 0; - c = find_omp_clause (clauses, OMP_CLAUSE_WAIT); - if (!tagging || c) - /* ... push a placeholder. */ - args.safe_push (integer_zero_node); - - for (; c; c = OMP_CLAUSE_CHAIN (c)) - if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT) - { - args.safe_push (fold_convert_loc (OMP_CLAUSE_LOCATION (c), - integer_type_node, - OMP_CLAUSE_WAIT_EXPR (c))); - num_waits++; - } - - if (!tagging || num_waits) - { - tree len; - - /* Now that we know the number, update the placeholder. */ - if (tagging) - len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, num_waits); - else - len = build_int_cst (integer_type_node, num_waits); - len = fold_convert_loc (gimple_location (entry_stmt), - unsigned_type_node, len); - args[t_wait_idx] = len; - } - } - break; - default: - gcc_unreachable (); - } - if (tagging) - /* Push terminal marker - zero. */ - args.safe_push (oacc_launch_pack (0, NULL_TREE, 0)); - - g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args); - gimple_set_location (g, gimple_location (entry_stmt)); - gsi_insert_before (&gsi, g, GSI_SAME_STMT); - if (!offloaded) - { - g = gsi_stmt (gsi); - gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET); - gsi_remove (&gsi, true); - } - if (data_region && region->exit) - { - gsi = gsi_last_bb (region->exit); - g = gsi_stmt (gsi); - gcc_assert (g && gimple_code (g) == GIMPLE_OMP_RETURN); - gsi_remove (&gsi, true); - } -} - -/* Expand KFOR loop as a HSA grifidied kernel, i.e. as a body only with - iteration variable derived from the thread number. INTRA_GROUP means this - is an expansion of a loop iterating over work-items within a separate - iteration over groups. */ - -static void -grid_expand_omp_for_loop (struct omp_region *kfor, bool intra_group) -{ - gimple_stmt_iterator gsi; - gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry)); - gcc_checking_assert (gimple_omp_for_kind (for_stmt) - == GF_OMP_FOR_KIND_GRID_LOOP); - size_t collapse = gimple_omp_for_collapse (for_stmt); - struct omp_for_data_loop *loops - = XALLOCAVEC (struct omp_for_data_loop, - gimple_omp_for_collapse (for_stmt)); - struct omp_for_data fd; - - remove_edge (BRANCH_EDGE (kfor->entry)); - basic_block body_bb = FALLTHRU_EDGE (kfor->entry)->dest; - - gcc_assert (kfor->cont); - extract_omp_for_data (for_stmt, &fd, loops); - - gsi = gsi_start_bb (body_bb); - - for (size_t dim = 0; dim < collapse; dim++) - { - tree type, itype; - itype = type = TREE_TYPE (fd.loops[dim].v); - if (POINTER_TYPE_P (type)) - itype = signed_type_for (type); - - tree n1 = fd.loops[dim].n1; - tree step = fd.loops[dim].step; - n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1), - true, NULL_TREE, true, GSI_SAME_STMT); - step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step), - true, NULL_TREE, true, GSI_SAME_STMT); - tree threadid; - if (gimple_omp_for_grid_group_iter (for_stmt)) - { - gcc_checking_assert (!intra_group); - threadid = build_call_expr (builtin_decl_explicit - (BUILT_IN_HSA_WORKGROUPID), 1, - build_int_cstu (unsigned_type_node, dim)); - } - else if (intra_group) - threadid = build_call_expr (builtin_decl_explicit - (BUILT_IN_HSA_WORKITEMID), 1, - build_int_cstu (unsigned_type_node, dim)); - else - threadid = build_call_expr (builtin_decl_explicit - (BUILT_IN_HSA_WORKITEMABSID), 1, - build_int_cstu (unsigned_type_node, dim)); - threadid = fold_convert (itype, threadid); - threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE, - true, GSI_SAME_STMT); - - tree startvar = fd.loops[dim].v; - tree t = fold_build2 (MULT_EXPR, itype, threadid, step); - if (POINTER_TYPE_P (type)) - t = fold_build_pointer_plus (n1, t); - else - t = fold_build2 (PLUS_EXPR, type, t, n1); - t = fold_convert (type, t); - t = force_gimple_operand_gsi (&gsi, t, - DECL_P (startvar) - && TREE_ADDRESSABLE (startvar), - NULL_TREE, true, GSI_SAME_STMT); - gassign *assign_stmt = gimple_build_assign (startvar, t); - gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); - } - /* Remove the omp for statement */ - gsi = gsi_last_bb (kfor->entry); - gsi_remove (&gsi, true); - - /* Remove the GIMPLE_OMP_CONTINUE statement. */ - gsi = gsi_last_bb (kfor->cont); - gcc_assert (!gsi_end_p (gsi) - && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_CONTINUE); - gsi_remove (&gsi, true); - - /* Replace the GIMPLE_OMP_RETURN with a barrier, if necessary. */ - gsi = gsi_last_bb (kfor->exit); - gcc_assert (!gsi_end_p (gsi) - && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN); - if (intra_group) - gsi_insert_before (&gsi, build_omp_barrier (NULL_TREE), GSI_SAME_STMT); - gsi_remove (&gsi, true); - - /* Fixup the much simpler CFG. */ - remove_edge (find_edge (kfor->cont, body_bb)); - - if (kfor->cont != body_bb) - set_immediate_dominator (CDI_DOMINATORS, kfor->cont, body_bb); - set_immediate_dominator (CDI_DOMINATORS, kfor->exit, kfor->cont); -} - -/* Structure passed to grid_remap_kernel_arg_accesses so that it can remap - argument_decls. */ - -struct grid_arg_decl_map -{ - tree old_arg; - tree new_arg; -}; - -/* Invoked through walk_gimple_op, will remap all PARM_DECLs to the ones - pertaining to kernel function. */ - -static tree -grid_remap_kernel_arg_accesses (tree *tp, int *walk_subtrees, void *data) -{ - struct walk_stmt_info *wi = (struct walk_stmt_info *) data; - struct grid_arg_decl_map *adm = (struct grid_arg_decl_map *) wi->info; - tree t = *tp; - - if (t == adm->old_arg) - *tp = adm->new_arg; - *walk_subtrees = !TYPE_P (t) && !DECL_P (t); - return NULL_TREE; -} - -static void expand_omp (struct omp_region *region); - -/* If TARGET region contains a kernel body for loop, remove its region from the - TARGET and expand it in HSA gridified kernel fashion. */ - -static void -grid_expand_target_grid_body (struct omp_region *target) -{ - if (!hsa_gen_requested_p ()) - return; - - gomp_target *tgt_stmt = as_a <gomp_target *> (last_stmt (target->entry)); - struct omp_region **pp; - - for (pp = &target->inner; *pp; pp = &(*pp)->next) - if ((*pp)->type == GIMPLE_OMP_GRID_BODY) - break; - - struct omp_region *gpukernel = *pp; - - tree orig_child_fndecl = gimple_omp_target_child_fn (tgt_stmt); - if (!gpukernel) - { - /* HSA cannot handle OACC stuff. */ - if (gimple_omp_target_kind (tgt_stmt) != GF_OMP_TARGET_KIND_REGION) - return; - gcc_checking_assert (orig_child_fndecl); - gcc_assert (!find_omp_clause (gimple_omp_target_clauses (tgt_stmt), - OMP_CLAUSE__GRIDDIM_)); - cgraph_node *n = cgraph_node::get (orig_child_fndecl); - - hsa_register_kernel (n); - return; - } - - gcc_assert (find_omp_clause (gimple_omp_target_clauses (tgt_stmt), - OMP_CLAUSE__GRIDDIM_)); - tree inside_block = gimple_block (first_stmt (single_succ (gpukernel->entry))); - *pp = gpukernel->next; - for (pp = &gpukernel->inner; *pp; pp = &(*pp)->next) - if ((*pp)->type == GIMPLE_OMP_FOR) - break; - - struct omp_region *kfor = *pp; - gcc_assert (kfor); - gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry)); - gcc_assert (gimple_omp_for_kind (for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP); - *pp = kfor->next; - if (kfor->inner) - { - if (gimple_omp_for_grid_group_iter (for_stmt)) - { - struct omp_region **next_pp; - for (pp = &kfor->inner; *pp; pp = next_pp) - { - next_pp = &(*pp)->next; - if ((*pp)->type != GIMPLE_OMP_FOR) - continue; - gomp_for *inner = as_a <gomp_for *> (last_stmt ((*pp)->entry)); - gcc_assert (gimple_omp_for_kind (inner) - == GF_OMP_FOR_KIND_GRID_LOOP); - grid_expand_omp_for_loop (*pp, true); - *pp = (*pp)->next; - next_pp = pp; - } - } - expand_omp (kfor->inner); - } - if (gpukernel->inner) - expand_omp (gpukernel->inner); - - tree kern_fndecl = copy_node (orig_child_fndecl); - DECL_NAME (kern_fndecl) = clone_function_name (kern_fndecl, "kernel"); - SET_DECL_ASSEMBLER_NAME (kern_fndecl, DECL_NAME (kern_fndecl)); - tree tgtblock = gimple_block (tgt_stmt); - tree fniniblock = make_node (BLOCK); - BLOCK_ABSTRACT_ORIGIN (fniniblock) = tgtblock; - BLOCK_SOURCE_LOCATION (fniniblock) = BLOCK_SOURCE_LOCATION (tgtblock); - BLOCK_SOURCE_END_LOCATION (fniniblock) = BLOCK_SOURCE_END_LOCATION (tgtblock); - BLOCK_SUPERCONTEXT (fniniblock) = kern_fndecl; - DECL_INITIAL (kern_fndecl) = fniniblock; - push_struct_function (kern_fndecl); - cfun->function_end_locus = gimple_location (tgt_stmt); - init_tree_ssa (cfun); - pop_cfun (); - - tree old_parm_decl = DECL_ARGUMENTS (kern_fndecl); - gcc_assert (!DECL_CHAIN (old_parm_decl)); - tree new_parm_decl = copy_node (DECL_ARGUMENTS (kern_fndecl)); - DECL_CONTEXT (new_parm_decl) = kern_fndecl; - DECL_ARGUMENTS (kern_fndecl) = new_parm_decl; - gcc_assert (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (kern_fndecl)))); - DECL_RESULT (kern_fndecl) = copy_node (DECL_RESULT (kern_fndecl)); - DECL_CONTEXT (DECL_RESULT (kern_fndecl)) = kern_fndecl; - struct function *kern_cfun = DECL_STRUCT_FUNCTION (kern_fndecl); - kern_cfun->curr_properties = cfun->curr_properties; - - grid_expand_omp_for_loop (kfor, false); - - /* Remove the omp for statement */ - gimple_stmt_iterator gsi = gsi_last_bb (gpukernel->entry); - gsi_remove (&gsi, true); - /* Replace the GIMPLE_OMP_RETURN at the end of the kernel region with a real - return. */ - gsi = gsi_last_bb (gpukernel->exit); - gcc_assert (!gsi_end_p (gsi) - && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN); - gimple *ret_stmt = gimple_build_return (NULL); - gsi_insert_after (&gsi, ret_stmt, GSI_SAME_STMT); - gsi_remove (&gsi, true); - - /* Statements in the first BB in the target construct have been produced by - target lowering and must be copied inside the GPUKERNEL, with the two - exceptions of the first OMP statement and the OMP_DATA assignment - statement. */ - gsi = gsi_start_bb (single_succ (gpukernel->entry)); - tree data_arg = gimple_omp_target_data_arg (tgt_stmt); - tree sender = data_arg ? TREE_VEC_ELT (data_arg, 0) : NULL; - for (gimple_stmt_iterator tsi = gsi_start_bb (single_succ (target->entry)); - !gsi_end_p (tsi); gsi_next (&tsi)) - { - gimple *stmt = gsi_stmt (tsi); - if (is_gimple_omp (stmt)) - break; - if (sender - && is_gimple_assign (stmt) - && TREE_CODE (gimple_assign_rhs1 (stmt)) == ADDR_EXPR - && TREE_OPERAND (gimple_assign_rhs1 (stmt), 0) == sender) - continue; - gimple *copy = gimple_copy (stmt); - gsi_insert_before (&gsi, copy, GSI_SAME_STMT); - gimple_set_block (copy, fniniblock); - } - - move_sese_region_to_fn (kern_cfun, single_succ (gpukernel->entry), - gpukernel->exit, inside_block); - - cgraph_node *kcn = cgraph_node::get_create (kern_fndecl); - kcn->mark_force_output (); - cgraph_node *orig_child = cgraph_node::get (orig_child_fndecl); - - hsa_register_kernel (kcn, orig_child); - - cgraph_node::add_new_function (kern_fndecl, true); - push_cfun (kern_cfun); - cgraph_edge::rebuild_edges (); - - /* Re-map any mention of the PARM_DECL of the original function to the - PARM_DECL of the new one. - - TODO: It would be great if lowering produced references into the GPU - kernel decl straight away and we did not have to do this. */ - struct grid_arg_decl_map adm; - adm.old_arg = old_parm_decl; - adm.new_arg = new_parm_decl; - basic_block bb; - FOR_EACH_BB_FN (bb, kern_cfun) - { - for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) - { - gimple *stmt = gsi_stmt (gsi); - struct walk_stmt_info wi; - memset (&wi, 0, sizeof (wi)); - wi.info = &adm; - walk_gimple_op (stmt, grid_remap_kernel_arg_accesses, &wi); - } - } - pop_cfun (); - - return; -} - -/* Expand the parallel region tree rooted at REGION. Expansion - proceeds in depth-first order. Innermost regions are expanded - first. This way, parallel regions that require a new function to - be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any - internal dependencies in their body. */ - -static void -expand_omp (struct omp_region *region) -{ - omp_any_child_fn_dumped = false; - while (region) - { - location_t saved_location; - gimple *inner_stmt = NULL; - - /* First, determine whether this is a combined parallel+workshare - region. */ - if (region->type == GIMPLE_OMP_PARALLEL) - determine_parallel_type (region); - else if (region->type == GIMPLE_OMP_TARGET) - grid_expand_target_grid_body (region); - - if (region->type == GIMPLE_OMP_FOR - && gimple_omp_for_combined_p (last_stmt (region->entry))) - inner_stmt = last_stmt (region->inner->entry); - - if (region->inner) - expand_omp (region->inner); - - saved_location = input_location; - if (gimple_has_location (last_stmt (region->entry))) - input_location = gimple_location (last_stmt (region->entry)); - - switch (region->type) - { - case GIMPLE_OMP_PARALLEL: - case GIMPLE_OMP_TASK: - expand_omp_taskreg (region); - break; - - case GIMPLE_OMP_FOR: - expand_omp_for (region, inner_stmt); - break; - - case GIMPLE_OMP_SECTIONS: - expand_omp_sections (region); - break; - - case GIMPLE_OMP_SECTION: - /* Individual omp sections are handled together with their - parent GIMPLE_OMP_SECTIONS region. */ - break; - - case GIMPLE_OMP_SINGLE: - expand_omp_single (region); - break; - - case GIMPLE_OMP_ORDERED: - { - gomp_ordered *ord_stmt - = as_a <gomp_ordered *> (last_stmt (region->entry)); - if (find_omp_clause (gimple_omp_ordered_clauses (ord_stmt), - OMP_CLAUSE_DEPEND)) - { - /* We'll expand these when expanding corresponding - worksharing region with ordered(n) clause. */ - gcc_assert (region->outer - && region->outer->type == GIMPLE_OMP_FOR); - region->ord_stmt = ord_stmt; - break; - } - } - /* FALLTHRU */ - case GIMPLE_OMP_MASTER: - case GIMPLE_OMP_TASKGROUP: - case GIMPLE_OMP_CRITICAL: - case GIMPLE_OMP_TEAMS: - expand_omp_synch (region); - break; - - case GIMPLE_OMP_ATOMIC_LOAD: - expand_omp_atomic (region); - break; - - case GIMPLE_OMP_TARGET: - expand_omp_target (region); - break; - - default: - gcc_unreachable (); - } - - input_location = saved_location; - region = region->next; - } - if (omp_any_child_fn_dumped) - { - if (dump_file) - dump_function_header (dump_file, current_function_decl, dump_flags); - omp_any_child_fn_dumped = false; - } -} - -/* Helper for build_omp_regions. Scan the dominator tree starting at - block BB. PARENT is the region that contains BB. If SINGLE_TREE is - true, the function ends once a single tree is built (otherwise, whole - forest of OMP constructs may be built). */ - -static void -build_omp_regions_1 (basic_block bb, struct omp_region *parent, - bool single_tree) -{ - gimple_stmt_iterator gsi; - gimple *stmt; - basic_block son; - - gsi = gsi_last_bb (bb); - if (!gsi_end_p (gsi) && is_gimple_omp (gsi_stmt (gsi))) - { - struct omp_region *region; - enum gimple_code code; - - stmt = gsi_stmt (gsi); - code = gimple_code (stmt); - if (code == GIMPLE_OMP_RETURN) - { - /* STMT is the return point out of region PARENT. Mark it - as the exit point and make PARENT the immediately - enclosing region. */ - gcc_assert (parent); - region = parent; - region->exit = bb; - parent = parent->outer; - } - else if (code == GIMPLE_OMP_ATOMIC_STORE) - { - /* GIMPLE_OMP_ATOMIC_STORE is analoguous to - GIMPLE_OMP_RETURN, but matches with - GIMPLE_OMP_ATOMIC_LOAD. */ - gcc_assert (parent); - gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD); - region = parent; - region->exit = bb; - parent = parent->outer; - } - else if (code == GIMPLE_OMP_CONTINUE) - { - gcc_assert (parent); - parent->cont = bb; - } - else if (code == GIMPLE_OMP_SECTIONS_SWITCH) - { - /* GIMPLE_OMP_SECTIONS_SWITCH is part of - GIMPLE_OMP_SECTIONS, and we do nothing for it. */ - } - else - { - region = new_omp_region (bb, code, parent); - /* Otherwise... */ - if (code == GIMPLE_OMP_TARGET) - { - switch (gimple_omp_target_kind (stmt)) - { - case GF_OMP_TARGET_KIND_REGION: - case GF_OMP_TARGET_KIND_DATA: - case GF_OMP_TARGET_KIND_OACC_PARALLEL: - case GF_OMP_TARGET_KIND_OACC_KERNELS: - case GF_OMP_TARGET_KIND_OACC_DATA: - case GF_OMP_TARGET_KIND_OACC_HOST_DATA: - break; - case GF_OMP_TARGET_KIND_UPDATE: - case GF_OMP_TARGET_KIND_ENTER_DATA: - case GF_OMP_TARGET_KIND_EXIT_DATA: - case GF_OMP_TARGET_KIND_OACC_UPDATE: - case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA: - case GF_OMP_TARGET_KIND_OACC_DECLARE: - /* ..., other than for those stand-alone directives... */ - region = NULL; - break; - default: - gcc_unreachable (); - } - } - else if (code == GIMPLE_OMP_ORDERED - && find_omp_clause (gimple_omp_ordered_clauses - (as_a <gomp_ordered *> (stmt)), - OMP_CLAUSE_DEPEND)) - /* #pragma omp ordered depend is also just a stand-alone - directive. */ - region = NULL; - /* ..., this directive becomes the parent for a new region. */ - if (region) - parent = region; - } - } - - if (single_tree && !parent) - return; - - for (son = first_dom_son (CDI_DOMINATORS, bb); - son; - son = next_dom_son (CDI_DOMINATORS, son)) - build_omp_regions_1 (son, parent, single_tree); -} - -/* Builds the tree of OMP regions rooted at ROOT, storing it to - root_omp_region. */ - -static void -build_omp_regions_root (basic_block root) -{ - gcc_assert (root_omp_region == NULL); - build_omp_regions_1 (root, NULL, true); - gcc_assert (root_omp_region != NULL); -} - -/* Expands omp construct (and its subconstructs) starting in HEAD. */ - -void -omp_expand_local (basic_block head) -{ - build_omp_regions_root (head); - if (dump_file && (dump_flags & TDF_DETAILS)) - { - fprintf (dump_file, "\nOMP region tree\n\n"); - dump_omp_region (dump_file, root_omp_region, 0); - fprintf (dump_file, "\n"); - } - - remove_exit_barriers (root_omp_region); - expand_omp (root_omp_region); - - free_omp_regions (); -} - -/* Scan the CFG and build a tree of OMP regions. Return the root of - the OMP region tree. */ - -static void -build_omp_regions (void) -{ - gcc_assert (root_omp_region == NULL); - calculate_dominance_info (CDI_DOMINATORS); - build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false); -} - -/* Main entry point for expanding OMP-GIMPLE into runtime calls. */ - -static unsigned int -execute_expand_omp (void) -{ - build_omp_regions (); - - if (!root_omp_region) - return 0; - - if (dump_file) - { - fprintf (dump_file, "\nOMP region tree\n\n"); - dump_omp_region (dump_file, root_omp_region, 0); - fprintf (dump_file, "\n"); - } - - remove_exit_barriers (root_omp_region); - - expand_omp (root_omp_region); - - if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP)) - verify_loop_structure (); - cleanup_tree_cfg (); - - free_omp_regions (); - - return 0; -} - -/* OMP expansion -- the default pass, run before creation of SSA form. */ - -namespace { - -const pass_data pass_data_expand_omp = -{ - GIMPLE_PASS, /* type */ - "ompexp", /* name */ - OPTGROUP_OPENMP, /* optinfo_flags */ - TV_NONE, /* tv_id */ - PROP_gimple_any, /* properties_required */ - PROP_gimple_eomp, /* properties_provided */ - 0, /* properties_destroyed */ - 0, /* todo_flags_start */ - 0, /* todo_flags_finish */ -}; - -class pass_expand_omp : public gimple_opt_pass -{ -public: - pass_expand_omp (gcc::context *ctxt) - : gimple_opt_pass (pass_data_expand_omp, ctxt) - {} - - /* opt_pass methods: */ - virtual unsigned int execute (function *) - { - bool gate = ((flag_cilkplus != 0 || flag_openacc != 0 || flag_openmp != 0 - || flag_openmp_simd != 0) - && !seen_error ()); - - /* This pass always runs, to provide PROP_gimple_eomp. - But often, there is nothing to do. */ - if (!gate) - return 0; - - return execute_expand_omp (); - } - -}; // class pass_expand_omp - -} // anon namespace - -gimple_opt_pass * -make_pass_expand_omp (gcc::context *ctxt) -{ - return new pass_expand_omp (ctxt); -} - -namespace { - -const pass_data pass_data_expand_omp_ssa = -{ - GIMPLE_PASS, /* type */ - "ompexpssa", /* name */ - OPTGROUP_OPENMP, /* optinfo_flags */ - TV_NONE, /* tv_id */ - PROP_cfg | PROP_ssa, /* properties_required */ - PROP_gimple_eomp, /* properties_provided */ - 0, /* properties_destroyed */ - 0, /* todo_flags_start */ - TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */ -}; - -class pass_expand_omp_ssa : public gimple_opt_pass -{ -public: - pass_expand_omp_ssa (gcc::context *ctxt) - : gimple_opt_pass (pass_data_expand_omp_ssa, ctxt) - {} - - /* opt_pass methods: */ - virtual bool gate (function *fun) - { - return !(fun->curr_properties & PROP_gimple_eomp); - } - virtual unsigned int execute (function *) { return execute_expand_omp (); } - opt_pass * clone () { return new pass_expand_omp_ssa (m_ctxt); } - -}; // class pass_expand_omp_ssa - -} // anon namespace - -gimple_opt_pass * -make_pass_expand_omp_ssa (gcc::context *ctxt) -{ - return new pass_expand_omp_ssa (ctxt); -} /* Routines to lower OMP directives into OMP-GIMPLE. */ @@ -14606,7 +5860,7 @@ lower_omp_sections (gimple_stmt_iterator *gsi_p, omp_context *ctx) new_body = maybe_catch_exception (new_body); t = gimple_build_omp_return - (!!find_omp_clause (gimple_omp_sections_clauses (stmt), + (!!omp_find_clause (gimple_omp_sections_clauses (stmt), OMP_CLAUSE_NOWAIT)); gimple_seq_add_stmt (&new_body, t); maybe_add_implicit_barrier_cancel (ctx, &new_body); @@ -14769,7 +6023,7 @@ lower_omp_single (gimple_stmt_iterator *gsi_p, omp_context *ctx) bind_body = maybe_catch_exception (bind_body); t = gimple_build_omp_return - (!!find_omp_clause (gimple_omp_single_clauses (single_stmt), + (!!omp_find_clause (gimple_omp_single_clauses (single_stmt), OMP_CLAUSE_NOWAIT)); gimple_seq_add_stmt (&bind_body_tail, t); maybe_add_implicit_barrier_cancel (ctx, &bind_body_tail); @@ -14876,7 +6130,7 @@ lower_omp_ordered_clauses (gimple_stmt_iterator *gsi_p, gomp_ordered *ord_stmt, unsigned int len = gimple_omp_for_collapse (ctx->outer->stmt); struct omp_for_data_loop *loops = XALLOCAVEC (struct omp_for_data_loop, len); - extract_omp_for_data (as_a <gomp_for *> (ctx->outer->stmt), &fd, loops); + omp_extract_for_data (as_a <gomp_for *> (ctx->outer->stmt), &fd, loops); if (!fd.ordered) return; @@ -14980,7 +6234,7 @@ lower_omp_ordered_clauses (gimple_stmt_iterator *gsi_p, gomp_ordered *ord_stmt, { gcc_assert (i < len); - /* extract_omp_for_data has canonicalized the condition. */ + /* omp_extract_for_data has canonicalized the condition. */ gcc_assert (fd.loops[i].cond_code == LT_EXPR || fd.loops[i].cond_code == GT_EXPR); bool forward = fd.loops[i].cond_code == LT_EXPR; @@ -15111,16 +6365,16 @@ lower_omp_ordered (gimple_stmt_iterator *gsi_p, omp_context *ctx) gomp_ordered *ord_stmt = as_a <gomp_ordered *> (stmt); gcall *x; gbind *bind; - bool simd = find_omp_clause (gimple_omp_ordered_clauses (ord_stmt), + bool simd = omp_find_clause (gimple_omp_ordered_clauses (ord_stmt), OMP_CLAUSE_SIMD); /* FIXME: this should check presence of OMP_CLAUSE__SIMT_ on the enclosing loop. */ bool maybe_simt = simd && omp_maybe_offloaded_ctx (ctx) && omp_max_simt_vf () > 1; - bool threads = find_omp_clause (gimple_omp_ordered_clauses (ord_stmt), + bool threads = omp_find_clause (gimple_omp_ordered_clauses (ord_stmt), OMP_CLAUSE_THREADS); - if (find_omp_clause (gimple_omp_ordered_clauses (ord_stmt), + if (omp_find_clause (gimple_omp_ordered_clauses (ord_stmt), OMP_CLAUSE_DEPEND)) { /* FIXME: This is needs to be moved to the expansion to verify various @@ -15322,47 +6576,6 @@ lower_omp_critical (gimple_stmt_iterator *gsi_p, omp_context *ctx) BLOCK_VARS (block) = gimple_bind_vars (bind); } -/* Return the lastprivate predicate for a given gridified loop described by FD). - TODO: When grid stuff is moved to a separate file, move this too. */ - -static tree -grid_lastprivate_predicate (struct omp_for_data *fd) -{ - /* When dealing with a gridified loop, we need to check up to three collapsed - iteration variables but they are not actually captured in this fd. - Fortunately, we can easily rely on HSA builtins to get this - information. */ - - tree id, size; - if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP - && gimple_omp_for_grid_intra_group (fd->for_stmt)) - { - id = builtin_decl_explicit (BUILT_IN_HSA_WORKITEMID); - size = builtin_decl_explicit (BUILT_IN_HSA_CURRENTWORKGROUPSIZE); - } - else - { - id = builtin_decl_explicit (BUILT_IN_HSA_WORKITEMABSID); - size = builtin_decl_explicit (BUILT_IN_HSA_GRIDSIZE); - } - tree cond = NULL; - for (int dim = 0; dim < fd->collapse; dim++) - { - tree dim_tree = build_int_cstu (unsigned_type_node, dim); - tree u1 = build_int_cstu (unsigned_type_node, 1); - tree c2 - = build2 (EQ_EXPR, boolean_type_node, - build2 (PLUS_EXPR, unsigned_type_node, - build_call_expr (id, 1, dim_tree), u1), - build_call_expr (size, 1, dim_tree)); - if (cond) - cond = build2 (TRUTH_AND_EXPR, boolean_type_node, cond, c2); - else - cond = c2; - } - return cond; -} - /* A subroutine of lower_omp_for. Generate code to emit the predicate for a lastprivate clause. Given a loop control predicate of (V cond N2), we gate the clause on (!(V cond N2)). The lowered form @@ -15391,7 +6604,7 @@ lower_omp_for_lastprivate (struct omp_for_data *fd, gimple_seq *body_p, if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP || gimple_omp_for_grid_phony (fd->for_stmt)) - cond = grid_lastprivate_predicate (fd); + cond = omp_grid_lastprivate_predicate (fd); else { tree n2 = fd->loop.n2; @@ -15415,7 +6628,7 @@ lower_omp_for_lastprivate (struct omp_for_data *fd, gimple_seq *body_p, else { struct omp_for_data outer_fd; - extract_omp_for_data (gfor, &outer_fd, NULL); + omp_extract_for_data (gfor, &outer_fd, NULL); n2 = fold_convert (TREE_TYPE (n2), outer_fd.loop.n2); } } @@ -15429,16 +6642,16 @@ lower_omp_for_lastprivate (struct omp_for_data *fd, gimple_seq *body_p, int i; tree taskreg_clauses = gimple_omp_taskreg_clauses (taskreg_ctx->stmt); - tree innerc = find_omp_clause (taskreg_clauses, + tree innerc = omp_find_clause (taskreg_clauses, OMP_CLAUSE__LOOPTEMP_); gcc_assert (innerc); for (i = 0; i < fd->collapse; i++) { - innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc), + innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_); gcc_assert (innerc); } - innerc = find_omp_clause (OMP_CLAUSE_CHAIN (innerc), + innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_); if (innerc) n2 = fold_convert (TREE_TYPE (n2), @@ -15516,7 +6729,7 @@ lower_omp_for (gimple_stmt_iterator *gsi_p, omp_context *ctx) if (gimple_omp_for_combined_into_p (stmt)) { - extract_omp_for_data (stmt, &fd, NULL); + omp_extract_for_data (stmt, &fd, NULL); fdp = &fd; /* We need two temporaries with fd.loop.v type (istart/iend) @@ -15534,7 +6747,7 @@ lower_omp_for (gimple_stmt_iterator *gsi_p, omp_context *ctx) tree clauses = *pc; if (taskreg_for) outerc - = find_omp_clause (gimple_omp_taskreg_clauses (ctx->outer->stmt), + = omp_find_clause (gimple_omp_taskreg_clauses (ctx->outer->stmt), OMP_CLAUSE__LOOPTEMP_); for (i = 0; i < count; i++) { @@ -15543,7 +6756,7 @@ lower_omp_for (gimple_stmt_iterator *gsi_p, omp_context *ctx) { gcc_assert (outerc); temp = lookup_decl (OMP_CLAUSE_DECL (outerc), ctx->outer); - outerc = find_omp_clause (OMP_CLAUSE_CHAIN (outerc), + outerc = omp_find_clause (OMP_CLAUSE_CHAIN (outerc), OMP_CLAUSE__LOOPTEMP_); } else @@ -15590,7 +6803,7 @@ lower_omp_for (gimple_stmt_iterator *gsi_p, omp_context *ctx) } /* Once lowered, extract the bounds and clauses. */ - extract_omp_for_data (stmt, &fd, NULL); + omp_extract_for_data (stmt, &fd, NULL); if (is_gimple_omp_oacc (ctx->stmt) && !ctx_in_oacc_kernels_region (ctx)) @@ -15879,7 +7092,7 @@ create_task_copyfn (gomp_task *task_stmt, omp_context *ctx) sf = *tcctx.cb.decl_map->get (sf); src = build_simple_mem_ref_loc (loc, sarg); src = omp_build_component_ref (src, sf); - if (use_pointer_for_field (decl, NULL) || is_reference (decl)) + if (use_pointer_for_field (decl, NULL) || omp_is_reference (decl)) src = build_simple_mem_ref_loc (loc, src); } else @@ -15977,7 +7190,7 @@ lower_depend_clauses (tree *pclauses, gimple_seq *iseq, gimple_seq *oseq) gimple *g; size_t n_in = 0, n_out = 0, idx = 2, i; - clauses = find_omp_clause (*pclauses, OMP_CLAUSE_DEPEND); + clauses = omp_find_clause (*pclauses, OMP_CLAUSE_DEPEND); gcc_assert (clauses); for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c)) if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_DEPEND) @@ -16068,7 +7281,7 @@ lower_omp_taskreg (gimple_stmt_iterator *gsi_p, omp_context *ctx) gimple_seq dep_ilist = NULL; gimple_seq dep_olist = NULL; if (gimple_code (stmt) == GIMPLE_OMP_TASK - && find_omp_clause (clauses, OMP_CLAUSE_DEPEND)) + && omp_find_clause (clauses, OMP_CLAUSE_DEPEND)) { push_gimplify_context (); dep_bind = gimple_build_bind (NULL, NULL, make_node (BLOCK)); @@ -16218,7 +7431,7 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) gimple_seq dep_ilist = NULL; gimple_seq dep_olist = NULL; - if (find_omp_clause (clauses, OMP_CLAUSE_DEPEND)) + if (omp_find_clause (clauses, OMP_CLAUSE_DEPEND)) { push_gimplify_context (); dep_bind = gimple_build_bind (NULL, NULL, make_node (BLOCK)); @@ -16348,7 +7561,7 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_FIRSTPRIVATE) { gcc_assert (is_gimple_omp_oacc (ctx->stmt)); - if (is_reference (new_var)) + if (omp_is_reference (new_var)) { /* Create a local object to hold the instance value. */ @@ -16376,7 +7589,7 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) goto oacc_firstprivate; map_cnt++; var = OMP_CLAUSE_DECL (c); - if (!is_reference (var) + if (!omp_is_reference (var) && !is_gimple_reg_type (TREE_TYPE (var))) { tree new_var = lookup_decl (var, ctx); @@ -16582,7 +7795,7 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_FIRSTPRIVATE) { gcc_assert (is_gimple_omp_oacc (ctx->stmt)); - if (!is_reference (var)) + if (!omp_is_reference (var)) { if (is_gimple_reg (var) && OMP_CLAUSE_FIRSTPRIVATE_IMPLICIT (c)) @@ -16732,7 +7945,7 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) if (is_oacc_parallel (ctx)) goto oacc_firstprivate_map; ovar = OMP_CLAUSE_DECL (c); - if (is_reference (ovar)) + if (omp_is_reference (ovar)) talign = TYPE_ALIGN_UNIT (TREE_TYPE (TREE_TYPE (ovar))); else talign = DECL_ALIGN_UNIT (ovar); @@ -16740,7 +7953,7 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) x = build_sender_ref (ovar, ctx); tkind = GOMP_MAP_FIRSTPRIVATE; type = TREE_TYPE (ovar); - if (is_reference (ovar)) + if (omp_is_reference (ovar)) type = TREE_TYPE (type); if ((INTEGRAL_TYPE_P (type) && TYPE_PRECISION (type) <= POINTER_SIZE) @@ -16748,7 +7961,7 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) { tkind = GOMP_MAP_FIRSTPRIVATE_INT; tree t = var; - if (is_reference (var)) + if (omp_is_reference (var)) t = build_simple_mem_ref (var); else if (OMP_CLAUSE_FIRSTPRIVATE_IMPLICIT (c)) TREE_NO_WARNING (var) = 1; @@ -16757,7 +7970,7 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) t = fold_convert (TREE_TYPE (x), t); gimplify_assign (x, t, &ilist); } - else if (is_reference (var)) + else if (omp_is_reference (var)) gimplify_assign (x, var, &ilist); else if (is_gimple_reg (var)) { @@ -16776,7 +7989,7 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) } if (tkind == GOMP_MAP_FIRSTPRIVATE_INT) s = size_int (0); - else if (is_reference (ovar)) + else if (omp_is_reference (ovar)) s = TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (ovar))); else s = TYPE_SIZE_UNIT (TREE_TYPE (ovar)); @@ -16810,7 +8023,7 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) var = build_fold_addr_expr (var); else { - if (is_reference (ovar)) + if (omp_is_reference (ovar)) { type = TREE_TYPE (type); if (TREE_CODE (type) != ARRAY_TYPE) @@ -16889,13 +8102,13 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) if (is_gimple_omp_oacc (ctx->stmt)) break; var = OMP_CLAUSE_DECL (c); - if (is_reference (var) + if (omp_is_reference (var) || is_gimple_reg_type (TREE_TYPE (var))) { tree new_var = lookup_decl (var, ctx); tree type; type = TREE_TYPE (var); - if (is_reference (var)) + if (omp_is_reference (var)) type = TREE_TYPE (type); if ((INTEGRAL_TYPE_P (type) && TYPE_PRECISION (type) <= POINTER_SIZE) @@ -16907,7 +8120,7 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) x = fold_convert (type, x); gimplify_expr (&x, &new_body, NULL, is_gimple_val, fb_rvalue); - if (is_reference (var)) + if (omp_is_reference (var)) { tree v = create_tmp_var_raw (type, get_name (var)); gimple_add_tmp_var (v); @@ -16921,7 +8134,7 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) } else { - x = build_receiver_ref (var, !is_reference (var), ctx); + x = build_receiver_ref (var, !omp_is_reference (var), ctx); gimplify_expr (&x, &new_body, NULL, is_gimple_val, fb_rvalue); gimple_seq_add_stmt (&new_body, @@ -16945,7 +8158,7 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) if (is_gimple_omp_oacc (ctx->stmt)) break; var = OMP_CLAUSE_DECL (c); - if (is_reference (var)) + if (omp_is_reference (var)) { location_t clause_loc = OMP_CLAUSE_LOCATION (c); tree new_var = lookup_decl (var, ctx); @@ -17000,7 +8213,7 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) { tree type = TREE_TYPE (var); tree new_var = lookup_decl (var, ctx); - if (is_reference (var)) + if (omp_is_reference (var)) { type = TREE_TYPE (type); if (TREE_CODE (type) != ARRAY_TYPE) @@ -17089,7 +8302,7 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) offset)); } else - is_ref = is_reference (var); + is_ref = omp_is_reference (var); if (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_FIRSTPRIVATE_REFERENCE) is_ref = false; bool ref_to_array = false; @@ -17169,7 +8382,7 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, omp_context *ctx) gimple_seq_add_stmt (&new_body, gimple_build_assign (new_pvar, x)); } - else if (is_reference (var) && !is_gimple_omp_oacc (ctx->stmt)) + else if (omp_is_reference (var) && !is_gimple_omp_oacc (ctx->stmt)) { location_t clause_loc = OMP_CLAUSE_LOCATION (c); tree new_var = lookup_decl (var, ctx); @@ -17251,7 +8464,7 @@ lower_omp_teams (gimple_stmt_iterator *gsi_p, omp_context *ctx) gimple_seq dlist = NULL; gimple_seq olist = NULL; - tree num_teams = find_omp_clause (gimple_omp_teams_clauses (teams_stmt), + tree num_teams = omp_find_clause (gimple_omp_teams_clauses (teams_stmt), OMP_CLAUSE_NUM_TEAMS); if (num_teams == NULL_TREE) num_teams = build_int_cst (unsigned_type_node, 0); @@ -17261,7 +8474,7 @@ lower_omp_teams (gimple_stmt_iterator *gsi_p, omp_context *ctx) num_teams = fold_convert (unsigned_type_node, num_teams); gimplify_expr (&num_teams, &bind_body, NULL, is_gimple_val, fb_rvalue); } - tree thread_limit = find_omp_clause (gimple_omp_teams_clauses (teams_stmt), + tree thread_limit = omp_find_clause (gimple_omp_teams_clauses (teams_stmt), OMP_CLAUSE_THREAD_LIMIT); if (thread_limit == NULL_TREE) thread_limit = build_int_cst (unsigned_type_node, 0); @@ -17620,1333 +8833,6 @@ lower_omp (gimple_seq *body, omp_context *ctx) input_location = saved_location; } -/* Structure describing the basic properties of the loop we ara analyzing - whether it can be gridified and when it is gridified. */ - -struct grid_prop -{ - /* True when we are doing tiling gridification, i.e. when there is a distinct - distribute loop over groups and a loop construct over work-items. False - when distribute and parallel for loops form a combined construct. */ - bool tiling; - /* Location of the target construct for optimization information - messages. */ - location_t target_loc; - /* The collapse clause of the involved loops. Collapse value of all of them - must be the same for gridification to take place. */ - size_t collapse; - /* Group sizes, if requested by the user or NULL if not requested. */ - tree group_sizes[3]; -}; - -#define GRID_MISSED_MSG_PREFIX "Will not turn target construct into a " \ - "gridified HSA kernel because " - -/* Return true if STMT is an assignment of a register-type into a local - VAR_DECL. If GRID is non-NULL, the assignment additionally must not be to - any of the trees specifying group sizes there. */ - -static bool -grid_safe_assignment_p (gimple *stmt, grid_prop *grid) -{ - gassign *assign = dyn_cast <gassign *> (stmt); - if (!assign) - return false; - if (gimple_clobber_p (assign)) - return true; - tree lhs = gimple_assign_lhs (assign); - if (!VAR_P (lhs) - || !is_gimple_reg_type (TREE_TYPE (lhs)) - || is_global_var (lhs)) - return false; - if (grid) - for (unsigned i = 0; i < grid->collapse; i++) - if (lhs == grid->group_sizes[i]) - return false; - return true; -} - -/* Return true if all statements in SEQ are assignments to local register-type - variables that do not hold group size information. */ - -static bool -grid_seq_only_contains_local_assignments (gimple_seq seq, grid_prop *grid) -{ - if (!seq) - return true; - - gimple_stmt_iterator gsi; - for (gsi = gsi_start (seq); !gsi_end_p (gsi); gsi_next (&gsi)) - if (!grid_safe_assignment_p (gsi_stmt (gsi), grid)) - return false; - return true; -} - -/* Scan statements in SEQ and call itself recursively on any bind. GRID - describes hitherto discovered properties of the loop that is evaluated for - possible gridification. If during whole search only assignments to - register-type local variables (that do not overwrite group size information) - and one single OMP statement is encountered, return true, otherwise return - false. RET is where we store any OMP statement encountered. */ - -static bool -grid_find_single_omp_among_assignments_1 (gimple_seq seq, grid_prop *grid, - const char *name, gimple **ret) -{ - gimple_stmt_iterator gsi; - for (gsi = gsi_start (seq); !gsi_end_p (gsi); gsi_next (&gsi)) - { - gimple *stmt = gsi_stmt (gsi); - - if (grid_safe_assignment_p (stmt, grid)) - continue; - if (gbind *bind = dyn_cast <gbind *> (stmt)) - { - if (!grid_find_single_omp_among_assignments_1 (gimple_bind_body (bind), - grid, name, ret)) - return false; - } - else if (is_gimple_omp (stmt)) - { - if (*ret) - { - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, - GRID_MISSED_MSG_PREFIX "%s construct " - "contains multiple OpenMP constructs\n", - name); - dump_printf_loc (MSG_NOTE, gimple_location (*ret), - "The first OpenMP construct within " - "a parallel\n"); - dump_printf_loc (MSG_NOTE, gimple_location (stmt), - "The second OpenMP construct within " - "a parallel\n"); - } - return false; - } - *ret = stmt; - } - else - { - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, - GRID_MISSED_MSG_PREFIX "%s construct contains " - "a complex statement\n", name); - dump_printf_loc (MSG_NOTE, gimple_location (stmt), - "This statement cannot be analyzed for " - "gridification\n"); - } - return false; - } - } - return true; -} - -/* Scan statements in SEQ and make sure that it and any binds in it contain - only assignments to local register-type variables (that do not overwrite - group size information) and one OMP construct. If so, return that - construct, otherwise return NULL. GRID describes hitherto discovered - properties of the loop that is evaluated for possible gridification. If - dumping is enabled and function fails, use NAME to dump a note with the - reason for failure. */ - -static gimple * -grid_find_single_omp_among_assignments (gimple_seq seq, grid_prop *grid, - const char *name) -{ - if (!seq) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, - GRID_MISSED_MSG_PREFIX "%s construct has empty body\n", - name); - return NULL; - } - - gimple *ret = NULL; - if (grid_find_single_omp_among_assignments_1 (seq, grid, name, &ret)) - { - if (!ret && dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, - GRID_MISSED_MSG_PREFIX "%s construct does not contain" - "any other OpenMP construct\n", name); - return ret; - } - else - return NULL; -} - -/* Walker function looking for statements there is no point gridifying (and for - noreturn function calls which we cannot do). Return non-NULL if such a - function is found. */ - -static tree -grid_find_ungridifiable_statement (gimple_stmt_iterator *gsi, - bool *handled_ops_p, - struct walk_stmt_info *wi) -{ - *handled_ops_p = false; - gimple *stmt = gsi_stmt (*gsi); - switch (gimple_code (stmt)) - { - case GIMPLE_CALL: - if (gimple_call_noreturn_p (as_a <gcall *> (stmt))) - { - *handled_ops_p = true; - wi->info = stmt; - return error_mark_node; - } - break; - - /* We may reduce the following list if we find a way to implement the - clauses, but now there is no point trying further. */ - case GIMPLE_OMP_CRITICAL: - case GIMPLE_OMP_TASKGROUP: - case GIMPLE_OMP_TASK: - case GIMPLE_OMP_SECTION: - case GIMPLE_OMP_SECTIONS: - case GIMPLE_OMP_SECTIONS_SWITCH: - case GIMPLE_OMP_TARGET: - case GIMPLE_OMP_ORDERED: - *handled_ops_p = true; - wi->info = stmt; - return error_mark_node; - default: - break; - } - return NULL; -} - -/* Examine clauses of omp parallel statement PAR and if any prevents - gridification, issue a missed-optimization diagnostics and return false, - otherwise return true. GRID describes hitherto discovered properties of the - loop that is evaluated for possible gridification. */ - -static bool -grid_parallel_clauses_gridifiable (gomp_parallel *par, location_t tloc) -{ - tree clauses = gimple_omp_parallel_clauses (par); - while (clauses) - { - switch (OMP_CLAUSE_CODE (clauses)) - { - case OMP_CLAUSE_NUM_THREADS: - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, - GRID_MISSED_MSG_PREFIX "because there is " - "a num_threads clause of the parallel " - "construct\n"); - dump_printf_loc (MSG_NOTE, gimple_location (par), - "Parallel construct has a num_threads clause\n"); - } - return false; - - case OMP_CLAUSE_REDUCTION: - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, - GRID_MISSED_MSG_PREFIX "a reduction clause" - "is present\n "); - dump_printf_loc (MSG_NOTE, gimple_location (par), - "Parallel construct has a reduction clause\n"); - } - return false; - - default: - break; - } - clauses = OMP_CLAUSE_CHAIN (clauses); - } - return true; -} - -/* Examine clauses and the body of omp loop statement GFOR and if something - prevents gridification, issue a missed-optimization diagnostics and return - false, otherwise return true. GRID describes hitherto discovered properties - of the loop that is evaluated for possible gridification. */ - -static bool -grid_inner_loop_gridifiable_p (gomp_for *gfor, grid_prop *grid) -{ - if (!grid_seq_only_contains_local_assignments (gimple_omp_for_pre_body (gfor), - grid)) - { - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, - GRID_MISSED_MSG_PREFIX "the inner loop " - "loop bounds computation contains a complex " - "statement\n"); - dump_printf_loc (MSG_NOTE, gimple_location (gfor), - "Loop construct cannot be analyzed for " - "gridification\n"); - } - return false; - } - - tree clauses = gimple_omp_for_clauses (gfor); - while (clauses) - { - switch (OMP_CLAUSE_CODE (clauses)) - { - case OMP_CLAUSE_SCHEDULE: - if (OMP_CLAUSE_SCHEDULE_KIND (clauses) != OMP_CLAUSE_SCHEDULE_AUTO) - { - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, - GRID_MISSED_MSG_PREFIX "the inner loop " - "has a non-automatic schedule clause\n"); - dump_printf_loc (MSG_NOTE, gimple_location (gfor), - "Loop construct has a non automatic " - "schedule clause\n"); - } - return false; - } - break; - - case OMP_CLAUSE_REDUCTION: - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, - GRID_MISSED_MSG_PREFIX "a reduction " - "clause is present\n "); - dump_printf_loc (MSG_NOTE, gimple_location (gfor), - "Loop construct has a reduction schedule " - "clause\n"); - } - return false; - - default: - break; - } - clauses = OMP_CLAUSE_CHAIN (clauses); - } - struct walk_stmt_info wi; - memset (&wi, 0, sizeof (wi)); - if (walk_gimple_seq (gimple_omp_body (gfor), - grid_find_ungridifiable_statement, - NULL, &wi)) - { - gimple *bad = (gimple *) wi.info; - if (dump_enabled_p ()) - { - if (is_gimple_call (bad)) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, - GRID_MISSED_MSG_PREFIX "the inner loop contains " - "call to a noreturn function\n"); - else - dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, - GRID_MISSED_MSG_PREFIX "the inner loop contains " - "statement %s which cannot be transformed\n", - gimple_code_name[(int) gimple_code (bad)]); - dump_printf_loc (MSG_NOTE, gimple_location (bad), - "This statement cannot be analyzed for " - "gridification\n"); - } - return false; - } - return true; -} - -/* Given distribute omp construct represented by DIST, which in the original - source forms a compound construct with a looping construct, return true if it - can be turned into a gridified HSA kernel. Otherwise return false. GRID - describes hitherto discovered properties of the loop that is evaluated for - possible gridification. */ - -static bool -grid_dist_follows_simple_pattern (gomp_for *dist, grid_prop *grid) -{ - location_t tloc = grid->target_loc; - gimple *stmt = grid_find_single_omp_among_assignments (gimple_omp_body (dist), - grid, "distribute"); - gomp_parallel *par; - if (!stmt - || !(par = dyn_cast <gomp_parallel *> (stmt)) - || !grid_parallel_clauses_gridifiable (par, tloc)) - return false; - - stmt = grid_find_single_omp_among_assignments (gimple_omp_body (par), grid, - "parallel"); - gomp_for *gfor; - if (!stmt || !(gfor = dyn_cast <gomp_for *> (stmt))) - return false; - - if (gimple_omp_for_kind (gfor) != GF_OMP_FOR_KIND_FOR) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, - GRID_MISSED_MSG_PREFIX "the inner loop is not " - "a simple for loop\n"); - return false; - } - gcc_assert (gimple_omp_for_collapse (gfor) == grid->collapse); - - if (!grid_inner_loop_gridifiable_p (gfor, grid)) - return false; - - return true; -} - -/* Given an omp loop statement GFOR, return true if it can participate in - tiling gridification, i.e. in one where the distribute and parallel for - loops do not form a compound statement. GRID describes hitherto discovered - properties of the loop that is evaluated for possible gridification. */ - -static bool -grid_gfor_follows_tiling_pattern (gomp_for *gfor, grid_prop *grid) -{ - if (gimple_omp_for_kind (gfor) != GF_OMP_FOR_KIND_FOR) - { - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, - GRID_MISSED_MSG_PREFIX "an inner loop is not " - "a simple for loop\n"); - dump_printf_loc (MSG_NOTE, gimple_location (gfor), - "This statement is not a simple for loop\n"); - } - return false; - } - - if (!grid_inner_loop_gridifiable_p (gfor, grid)) - return false; - - if (gimple_omp_for_collapse (gfor) != grid->collapse) - { - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, - GRID_MISSED_MSG_PREFIX "an inner loop does not " - "have use the same collapse clause\n"); - dump_printf_loc (MSG_NOTE, gimple_location (gfor), - "Loop construct uses a different collapse clause\n"); - } - return false; - } - - struct omp_for_data fd; - struct omp_for_data_loop *loops - = (struct omp_for_data_loop *)alloca (grid->collapse - * sizeof (struct omp_for_data_loop)); - extract_omp_for_data (gfor, &fd, loops); - for (unsigned i = 0; i < grid->collapse; i++) - { - tree itype, type = TREE_TYPE (fd.loops[i].v); - if (POINTER_TYPE_P (type)) - itype = signed_type_for (type); - else - itype = type; - - tree n1 = fold_convert (itype, fd.loops[i].n1); - tree n2 = fold_convert (itype, fd.loops[i].n2); - tree t = build_int_cst (itype, - (fd.loops[i].cond_code == LT_EXPR ? -1 : 1)); - t = fold_build2 (PLUS_EXPR, itype, fd.loops[i].step, t); - t = fold_build2 (PLUS_EXPR, itype, t, n2); - t = fold_build2 (MINUS_EXPR, itype, t, n1); - if (TYPE_UNSIGNED (itype) && fd.loops[i].cond_code == GT_EXPR) - t = fold_build2 (TRUNC_DIV_EXPR, itype, - fold_build1 (NEGATE_EXPR, itype, t), - fold_build1 (NEGATE_EXPR, itype, fd.loops[i].step)); - else - t = fold_build2 (TRUNC_DIV_EXPR, itype, t, fd.loops[i].step); - - if (!operand_equal_p (grid->group_sizes[i], t, 0)) - { - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, - GRID_MISSED_MSG_PREFIX "the distribute and " - "an internal loop do not agree on tile size\n"); - dump_printf_loc (MSG_NOTE, gimple_location (gfor), - "Loop construct does not seem to loop over " - "a tile size\n"); - } - return false; - } - } - return true; -} - -/* Facing a call to FNDECL in the body of a distribute construct, return true - if we can handle it or false if it precludes gridification. */ - -static bool -grid_call_permissible_in_distribute_p (tree fndecl) -{ - if (DECL_PURE_P (fndecl) || TREE_READONLY (fndecl)) - return true; - - const char *name = IDENTIFIER_POINTER (DECL_NAME (fndecl)); - if (strstr (name, "omp_") != name) - return false; - - if ((strcmp (name, "omp_get_thread_num") == 0) - || (strcmp (name, "omp_get_num_threads") == 0) - || (strcmp (name, "omp_get_num_teams") == 0) - || (strcmp (name, "omp_get_team_num") == 0) - || (strcmp (name, "omp_get_level") == 0) - || (strcmp (name, "omp_get_active_level") == 0) - || (strcmp (name, "omp_in_parallel") == 0)) - return true; - - return false; -} - -/* Facing a call satisfying grid_call_permissible_in_distribute_p in the body - of a distribute construct that is pointed at by GSI, modify it as necessary - for gridification. If the statement itself got removed, return true. */ - -static bool -grid_handle_call_in_distribute (gimple_stmt_iterator *gsi) -{ - gimple *stmt = gsi_stmt (*gsi); - tree fndecl = gimple_call_fndecl (stmt); - gcc_checking_assert (stmt); - if (DECL_PURE_P (fndecl) || TREE_READONLY (fndecl)) - return false; - - const char *name = IDENTIFIER_POINTER (DECL_NAME (fndecl)); - if ((strcmp (name, "omp_get_thread_num") == 0) - || (strcmp (name, "omp_get_level") == 0) - || (strcmp (name, "omp_get_active_level") == 0) - || (strcmp (name, "omp_in_parallel") == 0)) - { - tree lhs = gimple_call_lhs (stmt); - if (lhs) - { - gassign *assign - = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs))); - gsi_insert_before (gsi, assign, GSI_SAME_STMT); - } - gsi_remove (gsi, true); - return true; - } - - /* The rest of the omp functions can stay as they are, HSA back-end will - handle them correctly. */ - gcc_checking_assert ((strcmp (name, "omp_get_num_threads") == 0) - || (strcmp (name, "omp_get_num_teams") == 0) - || (strcmp (name, "omp_get_team_num") == 0)); - return false; -} - -/* Given a sequence of statements within a distribute omp construct or a - parallel construct, which in the original source does not form a compound - construct with a looping construct, return true if it does not prevent us - from turning it into a gridified HSA kernel. Otherwise return false. GRID - describes hitherto discovered properties of the loop that is evaluated for - possible gridification. IN_PARALLEL must be true if seq is within a - parallel construct and flase if it is only within a distribute - construct. */ - -static bool -grid_dist_follows_tiling_pattern (gimple_seq seq, grid_prop *grid, - bool in_parallel) -{ - gimple_stmt_iterator gsi; - for (gsi = gsi_start (seq); !gsi_end_p (gsi); gsi_next (&gsi)) - { - gimple *stmt = gsi_stmt (gsi); - - if (grid_safe_assignment_p (stmt, grid) - || gimple_code (stmt) == GIMPLE_GOTO - || gimple_code (stmt) == GIMPLE_LABEL - || gimple_code (stmt) == GIMPLE_COND) - continue; - else if (gbind *bind = dyn_cast <gbind *> (stmt)) - { - if (!grid_dist_follows_tiling_pattern (gimple_bind_body (bind), - grid, in_parallel)) - return false; - continue; - } - else if (gtry *try_stmt = dyn_cast <gtry *> (stmt)) - { - if (gimple_try_kind (try_stmt) == GIMPLE_TRY_CATCH) - { - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, - GRID_MISSED_MSG_PREFIX "the distribute " - "construct contains a try..catch region\n"); - dump_printf_loc (MSG_NOTE, gimple_location (try_stmt), - "This statement cannot be analyzed for " - "tiled gridification\n"); - } - return false; - } - if (!grid_dist_follows_tiling_pattern (gimple_try_eval (try_stmt), - grid, in_parallel)) - return false; - if (!grid_dist_follows_tiling_pattern (gimple_try_cleanup (try_stmt), - grid, in_parallel)) - return false; - continue; - } - else if (is_gimple_call (stmt)) - { - tree fndecl = gimple_call_fndecl (stmt); - if (fndecl && grid_call_permissible_in_distribute_p (fndecl)) - continue; - - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, - GRID_MISSED_MSG_PREFIX "the distribute " - "construct contains a call\n"); - dump_printf_loc (MSG_NOTE, gimple_location (stmt), - "This statement cannot be analyzed for " - "tiled gridification\n"); - } - return false; - } - else if (gomp_parallel *par = dyn_cast <gomp_parallel *> (stmt)) - { - if (in_parallel) - { - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, - GRID_MISSED_MSG_PREFIX "a parallel " - "construct contains another parallel " - "construct\n"); - dump_printf_loc (MSG_NOTE, gimple_location (stmt), - "This parallel construct is nested in " - "another one\n"); - } - return false; - } - if (!grid_parallel_clauses_gridifiable (par, grid->target_loc) - || !grid_dist_follows_tiling_pattern (gimple_omp_body (par), - grid, true)) - return false; - } - else if (gomp_for *gfor = dyn_cast <gomp_for *> (stmt)) - { - if (!in_parallel) - { - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, - GRID_MISSED_MSG_PREFIX "a loop " - "construct is not nested within a parallel " - "construct\n"); - dump_printf_loc (MSG_NOTE, gimple_location (stmt), - "This loop construct is not nested in " - "a parallel construct\n"); - } - return false; - } - if (!grid_gfor_follows_tiling_pattern (gfor, grid)) - return false; - } - else - { - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, - GRID_MISSED_MSG_PREFIX "the distribute " - "construct contains a complex statement\n"); - dump_printf_loc (MSG_NOTE, gimple_location (stmt), - "This statement cannot be analyzed for " - "tiled gridification\n"); - } - return false; - } - } - return true; -} - -/* If TARGET follows a pattern that can be turned into a gridified HSA kernel, - return true, otherwise return false. In the case of success, also fill in - GRID with information describing the kernel grid. */ - -static bool -grid_target_follows_gridifiable_pattern (gomp_target *target, grid_prop *grid) -{ - if (gimple_omp_target_kind (target) != GF_OMP_TARGET_KIND_REGION) - return false; - - location_t tloc = gimple_location (target); - grid->target_loc = tloc; - gimple *stmt - = grid_find_single_omp_among_assignments (gimple_omp_body (target), - grid, "target"); - if (!stmt) - return false; - gomp_teams *teams = dyn_cast <gomp_teams *> (stmt); - tree group_size = NULL; - if (!teams) - { - dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, - GRID_MISSED_MSG_PREFIX "it does not have a sole teams " - "construct in it.\n"); - return false; - } - - tree clauses = gimple_omp_teams_clauses (teams); - while (clauses) - { - switch (OMP_CLAUSE_CODE (clauses)) - { - case OMP_CLAUSE_NUM_TEAMS: - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, - GRID_MISSED_MSG_PREFIX "the teams construct " - "contains a num_teams clause\n "); - return false; - - case OMP_CLAUSE_REDUCTION: - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, - GRID_MISSED_MSG_PREFIX "a reduction " - "clause is present\n "); - return false; - - case OMP_CLAUSE_THREAD_LIMIT: - if (!integer_zerop (OMP_CLAUSE_OPERAND (clauses, 0))) - group_size = OMP_CLAUSE_OPERAND (clauses, 0); - break; - - default: - break; - } - clauses = OMP_CLAUSE_CHAIN (clauses); - } - - stmt = grid_find_single_omp_among_assignments (gimple_omp_body (teams), grid, - "teams"); - if (!stmt) - return false; - gomp_for *dist = dyn_cast <gomp_for *> (stmt); - if (!dist) - { - dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, - GRID_MISSED_MSG_PREFIX "the teams construct does not " - "have a single distribute construct in it.\n"); - return false; - } - - gcc_assert (gimple_omp_for_kind (dist) == GF_OMP_FOR_KIND_DISTRIBUTE); - - grid->collapse = gimple_omp_for_collapse (dist); - if (grid->collapse > 3) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, - GRID_MISSED_MSG_PREFIX "the distribute construct " - "contains collapse clause with parameter greater " - "than 3\n"); - return false; - } - - struct omp_for_data fd; - struct omp_for_data_loop *dist_loops - = (struct omp_for_data_loop *)alloca (grid->collapse - * sizeof (struct omp_for_data_loop)); - extract_omp_for_data (dist, &fd, dist_loops); - if (fd.chunk_size) - { - if (group_size && !operand_equal_p (group_size, fd.chunk_size, 0)) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, - GRID_MISSED_MSG_PREFIX "the teams " - "thread limit is different from distribute " - "schedule chunk\n"); - return false; - } - group_size = fd.chunk_size; - } - if (group_size && grid->collapse > 1) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, - GRID_MISSED_MSG_PREFIX "group size cannot be " - "set using thread_limit or schedule clauses " - "when also using a collapse clause greater than 1\n"); - return false; - } - - if (gimple_omp_for_combined_p (dist)) - { - grid->tiling = false; - grid->group_sizes[0] = group_size; - for (unsigned i = 1; i < grid->collapse; i++) - grid->group_sizes[i] = NULL; - return grid_dist_follows_simple_pattern (dist, grid); - } - else - { - grid->tiling = true; - if (group_size) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, - GRID_MISSED_MSG_PREFIX "group size cannot be set " - "using thread_limit or schedule clauses when " - "distribute and loop constructs do not form " - "one combined construct\n"); - return false; - } - for (unsigned i = 0; i < grid->collapse; i++) - { - if (fd.loops[i].cond_code == GT_EXPR) - grid->group_sizes[i] = fold_build1 (NEGATE_EXPR, - TREE_TYPE (fd.loops[i].step), - fd.loops[i].step); - else - grid->group_sizes[i] = fd.loops[i].step; - } - return grid_dist_follows_tiling_pattern (gimple_omp_body (dist), grid, - false); - } -} - -/* Operand walker, used to remap pre-body declarations according to a hash map - provided in DATA. */ - -static tree -grid_remap_prebody_decls (tree *tp, int *walk_subtrees, void *data) -{ - tree t = *tp; - - if (DECL_P (t) || TYPE_P (t)) - *walk_subtrees = 0; - else - *walk_subtrees = 1; - - if (VAR_P (t)) - { - struct walk_stmt_info *wi = (struct walk_stmt_info *) data; - hash_map<tree, tree> *declmap = (hash_map<tree, tree> *) wi->info; - tree *repl = declmap->get (t); - if (repl) - *tp = *repl; - } - return NULL_TREE; -} - -/* Identifiers of segments into which a particular variable should be places - when gridifying. */ - -enum grid_var_segment {GRID_SEGMENT_PRIVATE, GRID_SEGMENT_GROUP, - GRID_SEGMENT_GLOBAL}; - -/* Mark VAR so that it is eventually placed into SEGMENT. Place an artificial - builtin call into SEQ that will make sure the variable is always considered - address taken. */ - -static void -grid_mark_variable_segment (tree var, enum grid_var_segment segment) -{ - /* Making a non-addressable variables would require that we re-gimplify all - their uses. Fortunately, we do not have to do this because if they are - not addressable, it means they are not used in atomic or parallel - statements and so relaxed GPU consistency rules mean we can just keep them - private. */ - if (!TREE_ADDRESSABLE (var)) - return; - - switch (segment) - { - case GRID_SEGMENT_GROUP: - DECL_ATTRIBUTES (var) = tree_cons (get_identifier ("hsa_group_segment"), - NULL, DECL_ATTRIBUTES (var)); - break; - case GRID_SEGMENT_GLOBAL: - DECL_ATTRIBUTES (var) = tree_cons (get_identifier ("hsa_global_segment"), - NULL, DECL_ATTRIBUTES (var)); - break; - default: - gcc_unreachable (); - } - - if (!TREE_STATIC (var)) - { - TREE_STATIC (var) = 1; - varpool_node::finalize_decl (var); - } - -} - -/* Copy leading register-type assignments to local variables in SRC to just - before DST, Creating temporaries, adjusting mapping of operands in WI and - remapping operands as necessary. Add any new temporaries to TGT_BIND. - Return the first statement that does not conform to grid_safe_assignment_p - or NULL. If VAR_SEGMENT is not GRID_SEGMENT_PRIVATE, also mark all - variables in traversed bind statements so that they are put into the - appropriate segment. */ - -static gimple * -grid_copy_leading_local_assignments (gimple_seq src, gimple_stmt_iterator *dst, - gbind *tgt_bind, - enum grid_var_segment var_segment, - struct walk_stmt_info *wi) -{ - hash_map<tree, tree> *declmap = (hash_map<tree, tree> *) wi->info; - gimple_stmt_iterator gsi; - for (gsi = gsi_start (src); !gsi_end_p (gsi); gsi_next (&gsi)) - { - gimple *stmt = gsi_stmt (gsi); - if (gbind *bind = dyn_cast <gbind *> (stmt)) - { - gimple *r = grid_copy_leading_local_assignments - (gimple_bind_body (bind), dst, tgt_bind, var_segment, wi); - - if (var_segment != GRID_SEGMENT_PRIVATE) - for (tree var = gimple_bind_vars (bind); var; var = DECL_CHAIN (var)) - grid_mark_variable_segment (var, var_segment); - if (r) - return r; - else - continue; - } - if (!grid_safe_assignment_p (stmt, NULL)) - return stmt; - tree lhs = gimple_assign_lhs (as_a <gassign *> (stmt)); - tree repl = copy_var_decl (lhs, create_tmp_var_name (NULL), - TREE_TYPE (lhs)); - DECL_CONTEXT (repl) = current_function_decl; - gimple_bind_append_vars (tgt_bind, repl); - - declmap->put (lhs, repl); - gassign *copy = as_a <gassign *> (gimple_copy (stmt)); - walk_gimple_op (copy, grid_remap_prebody_decls, wi); - gsi_insert_before (dst, copy, GSI_SAME_STMT); - } - return NULL; -} - -/* Statement walker function to make adjustments to statements within the - gridifed kernel copy. */ - -static tree -grid_process_grid_body (gimple_stmt_iterator *gsi, bool *handled_ops_p, - struct walk_stmt_info *) -{ - *handled_ops_p = false; - gimple *stmt = gsi_stmt (*gsi); - if (gimple_code (stmt) == GIMPLE_OMP_FOR - && (gimple_omp_for_kind (stmt) & GF_OMP_FOR_SIMD)) - { - gomp_for *loop = as_a <gomp_for *> (stmt); - tree clauses = gimple_omp_for_clauses (loop); - tree cl = find_omp_clause (clauses, OMP_CLAUSE_SAFELEN); - if (cl) - OMP_CLAUSE_SAFELEN_EXPR (cl) = integer_one_node; - else - { - tree c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE_SAFELEN); - OMP_CLAUSE_SAFELEN_EXPR (c) = integer_one_node; - OMP_CLAUSE_CHAIN (c) = clauses; - gimple_omp_for_set_clauses (loop, c); - } - } - return NULL_TREE; -} - -/* Given a PARLOOP that is a normal for looping construct but also a part of a - combined construct with a simd loop, eliminate the simd loop. */ - -static void -grid_eliminate_combined_simd_part (gomp_for *parloop) -{ - struct walk_stmt_info wi; - - memset (&wi, 0, sizeof (wi)); - wi.val_only = true; - enum gf_mask msk = GF_OMP_FOR_SIMD; - wi.info = (void *) &msk; - walk_gimple_seq (gimple_omp_body (parloop), find_combined_for, NULL, &wi); - gimple *stmt = (gimple *) wi.info; - /* We expect that the SIMD id the only statement in the parallel loop. */ - gcc_assert (stmt - && gimple_code (stmt) == GIMPLE_OMP_FOR - && (gimple_omp_for_kind (stmt) == GF_OMP_FOR_SIMD) - && gimple_omp_for_combined_into_p (stmt) - && !gimple_omp_for_combined_p (stmt)); - gomp_for *simd = as_a <gomp_for *> (stmt); - - /* Copy over the iteration properties because the body refers to the index in - the bottmom-most loop. */ - unsigned i, collapse = gimple_omp_for_collapse (parloop); - gcc_checking_assert (collapse == gimple_omp_for_collapse (simd)); - for (i = 0; i < collapse; i++) - { - gimple_omp_for_set_index (parloop, i, gimple_omp_for_index (simd, i)); - gimple_omp_for_set_initial (parloop, i, gimple_omp_for_initial (simd, i)); - gimple_omp_for_set_final (parloop, i, gimple_omp_for_final (simd, i)); - gimple_omp_for_set_incr (parloop, i, gimple_omp_for_incr (simd, i)); - } - - tree *tgt= gimple_omp_for_clauses_ptr (parloop); - while (*tgt) - tgt = &OMP_CLAUSE_CHAIN (*tgt); - - /* Copy over all clauses, except for linaer clauses, which are turned into - private clauses, and all other simd-specificl clauses, which are - ignored. */ - tree *pc = gimple_omp_for_clauses_ptr (simd); - while (*pc) - { - tree c = *pc; - switch (TREE_CODE (c)) - { - case OMP_CLAUSE_LINEAR: - { - tree priv = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE_PRIVATE); - OMP_CLAUSE_DECL (priv) = OMP_CLAUSE_DECL (c); - OMP_CLAUSE_CHAIN (priv) = NULL; - *tgt = priv; - tgt = &OMP_CLAUSE_CHAIN (priv); - pc = &OMP_CLAUSE_CHAIN (c); - break; - } - - case OMP_CLAUSE_SAFELEN: - case OMP_CLAUSE_SIMDLEN: - case OMP_CLAUSE_ALIGNED: - pc = &OMP_CLAUSE_CHAIN (c); - break; - - default: - *pc = OMP_CLAUSE_CHAIN (c); - OMP_CLAUSE_CHAIN (c) = NULL; - *tgt = c; - tgt = &OMP_CLAUSE_CHAIN(c); - break; - } - } - - /* Finally, throw away the simd and mark the parallel loop as not - combined. */ - gimple_omp_set_body (parloop, gimple_omp_body (simd)); - gimple_omp_for_set_combined_p (parloop, false); -} - -/* Statement walker function marking all parallels as grid_phony and loops as - grid ones representing threads of a particular thread group. */ - -static tree -grid_mark_tiling_loops (gimple_stmt_iterator *gsi, bool *handled_ops_p, - struct walk_stmt_info *wi_in) -{ - *handled_ops_p = false; - if (gomp_for *loop = dyn_cast <gomp_for *> (gsi_stmt (*gsi))) - { - *handled_ops_p = true; - gimple_omp_for_set_kind (loop, GF_OMP_FOR_KIND_GRID_LOOP); - gimple_omp_for_set_grid_intra_group (loop, true); - if (gimple_omp_for_combined_p (loop)) - grid_eliminate_combined_simd_part (loop); - - struct walk_stmt_info body_wi; - memset (&body_wi, 0, sizeof (body_wi)); - walk_gimple_seq_mod (gimple_omp_body_ptr (loop), - grid_process_grid_body, NULL, &body_wi); - - gbind *bind = (gbind *) wi_in->info; - tree c; - for (c = gimple_omp_for_clauses (loop); c; c = OMP_CLAUSE_CHAIN (c)) - if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE) - { - push_gimplify_context (); - tree ov = OMP_CLAUSE_DECL (c); - tree gv = copy_var_decl (ov, create_tmp_var_name (NULL), - TREE_TYPE (ov)); - - grid_mark_variable_segment (gv, GRID_SEGMENT_GROUP); - DECL_CONTEXT (gv) = current_function_decl; - gimple_bind_append_vars (bind, gv); - tree x = lang_hooks.decls.omp_clause_assign_op (c, gv, ov); - gimplify_and_add (x, &OMP_CLAUSE_LASTPRIVATE_GIMPLE_SEQ (c)); - x = lang_hooks.decls.omp_clause_copy_ctor (c, ov, gv); - gimple_seq l = NULL; - gimplify_and_add (x, &l); - gsi_insert_seq_after (gsi, l, GSI_SAME_STMT); - pop_gimplify_context (bind); - } - } - return NULL_TREE; -} - -/* Statement walker function marking all parallels as grid_phony and loops as - grid ones representing threads of a particular thread group. */ - -static tree -grid_mark_tiling_parallels_and_loops (gimple_stmt_iterator *gsi, - bool *handled_ops_p, - struct walk_stmt_info *wi_in) -{ - *handled_ops_p = false; - wi_in->removed_stmt = false; - gimple *stmt = gsi_stmt (*gsi); - if (gbind *bind = dyn_cast <gbind *> (stmt)) - { - for (tree var = gimple_bind_vars (bind); var; var = DECL_CHAIN (var)) - grid_mark_variable_segment (var, GRID_SEGMENT_GROUP); - } - else if (gomp_parallel *parallel = dyn_cast <gomp_parallel *> (stmt)) - { - *handled_ops_p = true; - gimple_omp_parallel_set_grid_phony (parallel, true); - - gbind *new_bind = gimple_build_bind (NULL, NULL, make_node (BLOCK)); - gimple_bind_set_body (new_bind, gimple_omp_body (parallel)); - gimple_seq s = NULL; - gimple_seq_add_stmt (&s, new_bind); - gimple_omp_set_body (parallel, s); - - struct walk_stmt_info wi_par; - memset (&wi_par, 0, sizeof (wi_par)); - wi_par.info = new_bind; - walk_gimple_seq_mod (gimple_bind_body_ptr (new_bind), - grid_mark_tiling_loops, NULL, &wi_par); - } - else if (is_a <gcall *> (stmt)) - wi_in->removed_stmt = grid_handle_call_in_distribute (gsi); - return NULL_TREE; -} - -/* Given freshly copied top level kernel SEQ, identify the individual OMP - components, mark them as part of kernel, copy assignment leading to them - just before DST, remapping them using WI and adding new temporaries to - TGT_BIND, and and return the loop that will be used for kernel dispatch. */ - -static gomp_for * -grid_process_kernel_body_copy (grid_prop *grid, gimple_seq seq, - gimple_stmt_iterator *dst, - gbind *tgt_bind, struct walk_stmt_info *wi) -{ - gimple *stmt = grid_copy_leading_local_assignments (seq, dst, tgt_bind, - GRID_SEGMENT_GLOBAL, wi); - gomp_teams *teams = dyn_cast <gomp_teams *> (stmt); - gcc_assert (teams); - gimple_omp_teams_set_grid_phony (teams, true); - stmt = grid_copy_leading_local_assignments (gimple_omp_body (teams), dst, - tgt_bind, GRID_SEGMENT_GLOBAL, wi); - gcc_checking_assert (stmt); - gomp_for *dist = dyn_cast <gomp_for *> (stmt); - gcc_assert (dist); - gimple_seq prebody = gimple_omp_for_pre_body (dist); - if (prebody) - grid_copy_leading_local_assignments (prebody, dst, tgt_bind, - GRID_SEGMENT_GROUP, wi); - - if (grid->tiling) - { - gimple_omp_for_set_kind (dist, GF_OMP_FOR_KIND_GRID_LOOP); - gimple_omp_for_set_grid_group_iter (dist, true); - - struct walk_stmt_info wi_tiled; - memset (&wi_tiled, 0, sizeof (wi_tiled)); - walk_gimple_seq_mod (gimple_omp_body_ptr (dist), - grid_mark_tiling_parallels_and_loops, NULL, - &wi_tiled); - return dist; - } - else - { - gimple_omp_for_set_grid_phony (dist, true); - stmt = grid_copy_leading_local_assignments (gimple_omp_body (dist), dst, - tgt_bind, - GRID_SEGMENT_PRIVATE, wi); - gcc_checking_assert (stmt); - gomp_parallel *parallel = as_a <gomp_parallel *> (stmt); - gimple_omp_parallel_set_grid_phony (parallel, true); - stmt = grid_copy_leading_local_assignments (gimple_omp_body (parallel), - dst, tgt_bind, - GRID_SEGMENT_PRIVATE, wi); - gomp_for *inner_loop = as_a <gomp_for *> (stmt); - gimple_omp_for_set_kind (inner_loop, GF_OMP_FOR_KIND_GRID_LOOP); - prebody = gimple_omp_for_pre_body (inner_loop); - if (prebody) - grid_copy_leading_local_assignments (prebody, dst, tgt_bind, - GRID_SEGMENT_PRIVATE, wi); - - if (gimple_omp_for_combined_p (inner_loop)) - grid_eliminate_combined_simd_part (inner_loop); - struct walk_stmt_info body_wi;; - memset (&body_wi, 0, sizeof (body_wi)); - walk_gimple_seq_mod (gimple_omp_body_ptr (inner_loop), - grid_process_grid_body, NULL, &body_wi); - - return inner_loop; - } -} - -/* If TARGET points to a GOMP_TARGET which follows a gridifiable pattern, - create a GPU kernel for it. GSI must point to the same statement, TGT_BIND - is the bind into which temporaries inserted before TARGET should be - added. */ - -static void -grid_attempt_target_gridification (gomp_target *target, - gimple_stmt_iterator *gsi, - gbind *tgt_bind) -{ - /* removed group_size */ - grid_prop grid; - memset (&grid, 0, sizeof (grid)); - if (!target || !grid_target_follows_gridifiable_pattern (target, &grid)) - return; - - location_t loc = gimple_location (target); - if (dump_enabled_p ()) - dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loc, - "Target construct will be turned into a gridified HSA " - "kernel\n"); - - /* Copy target body to a GPUKERNEL construct: */ - gimple_seq kernel_seq = copy_gimple_seq_and_replace_locals - (gimple_omp_body (target)); - - hash_map<tree, tree> *declmap = new hash_map<tree, tree>; - struct walk_stmt_info wi; - memset (&wi, 0, sizeof (struct walk_stmt_info)); - wi.info = declmap; - - /* Copy assignments in between OMP statements before target, mark OMP - statements within copy appropriately. */ - gomp_for *inner_loop = grid_process_kernel_body_copy (&grid, kernel_seq, gsi, - tgt_bind, &wi); - - gbind *old_bind = as_a <gbind *> (gimple_seq_first (gimple_omp_body (target))); - gbind *new_bind = as_a <gbind *> (gimple_seq_first (kernel_seq)); - tree new_block = gimple_bind_block (new_bind); - tree enc_block = BLOCK_SUPERCONTEXT (gimple_bind_block (old_bind)); - BLOCK_CHAIN (new_block) = BLOCK_SUBBLOCKS (enc_block); - BLOCK_SUBBLOCKS (enc_block) = new_block; - BLOCK_SUPERCONTEXT (new_block) = enc_block; - gimple *gpukernel = gimple_build_omp_grid_body (kernel_seq); - gimple_seq_add_stmt - (gimple_bind_body_ptr (as_a <gbind *> (gimple_omp_body (target))), - gpukernel); - - for (size_t i = 0; i < grid.collapse; i++) - walk_tree (&grid.group_sizes[i], grid_remap_prebody_decls, &wi, NULL); - push_gimplify_context (); - for (size_t i = 0; i < grid.collapse; i++) - { - tree itype, type = TREE_TYPE (gimple_omp_for_index (inner_loop, i)); - if (POINTER_TYPE_P (type)) - itype = signed_type_for (type); - else - itype = type; - - enum tree_code cond_code = gimple_omp_for_cond (inner_loop, i); - tree n1 = unshare_expr (gimple_omp_for_initial (inner_loop, i)); - walk_tree (&n1, grid_remap_prebody_decls, &wi, NULL); - tree n2 = unshare_expr (gimple_omp_for_final (inner_loop, i)); - walk_tree (&n2, grid_remap_prebody_decls, &wi, NULL); - adjust_for_condition (loc, &cond_code, &n2); - n1 = fold_convert (itype, n1); - n2 = fold_convert (itype, n2); - - tree step - = get_omp_for_step_from_incr (loc, gimple_omp_for_incr (inner_loop, i)); - - tree t = build_int_cst (itype, (cond_code == LT_EXPR ? -1 : 1)); - t = fold_build2 (PLUS_EXPR, itype, step, t); - t = fold_build2 (PLUS_EXPR, itype, t, n2); - t = fold_build2 (MINUS_EXPR, itype, t, n1); - if (TYPE_UNSIGNED (itype) && cond_code == GT_EXPR) - t = fold_build2 (TRUNC_DIV_EXPR, itype, - fold_build1 (NEGATE_EXPR, itype, t), - fold_build1 (NEGATE_EXPR, itype, step)); - else - t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step); - if (grid.tiling) - { - if (cond_code == GT_EXPR) - step = fold_build1 (NEGATE_EXPR, itype, step); - t = fold_build2 (MULT_EXPR, itype, t, step); - } - - tree gs = fold_convert (uint32_type_node, t); - gimple_seq tmpseq = NULL; - gimplify_expr (&gs, &tmpseq, NULL, is_gimple_val, fb_rvalue); - if (!gimple_seq_empty_p (tmpseq)) - gsi_insert_seq_before (gsi, tmpseq, GSI_SAME_STMT); - - tree ws; - if (grid.group_sizes[i]) - { - ws = fold_convert (uint32_type_node, grid.group_sizes[i]); - tmpseq = NULL; - gimplify_expr (&ws, &tmpseq, NULL, is_gimple_val, fb_rvalue); - if (!gimple_seq_empty_p (tmpseq)) - gsi_insert_seq_before (gsi, tmpseq, GSI_SAME_STMT); - } - else - ws = build_zero_cst (uint32_type_node); - - tree c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE__GRIDDIM_); - OMP_CLAUSE__GRIDDIM__DIMENSION (c) = i; - OMP_CLAUSE__GRIDDIM__SIZE (c) = gs; - OMP_CLAUSE__GRIDDIM__GROUP (c) = ws; - OMP_CLAUSE_CHAIN (c) = gimple_omp_target_clauses (target); - gimple_omp_target_set_clauses (target, c); - } - pop_gimplify_context (tgt_bind); - delete declmap; - return; -} - -/* Walker function doing all the work for create_target_kernels. */ - -static tree -grid_gridify_all_targets_stmt (gimple_stmt_iterator *gsi, - bool *handled_ops_p, - struct walk_stmt_info *incoming) -{ - *handled_ops_p = false; - - gimple *stmt = gsi_stmt (*gsi); - gomp_target *target = dyn_cast <gomp_target *> (stmt); - if (target) - { - gbind *tgt_bind = (gbind *) incoming->info; - gcc_checking_assert (tgt_bind); - grid_attempt_target_gridification (target, gsi, tgt_bind); - return NULL_TREE; - } - gbind *bind = dyn_cast <gbind *> (stmt); - if (bind) - { - *handled_ops_p = true; - struct walk_stmt_info wi; - memset (&wi, 0, sizeof (wi)); - wi.info = bind; - walk_gimple_seq_mod (gimple_bind_body_ptr (bind), - grid_gridify_all_targets_stmt, NULL, &wi); - } - return NULL_TREE; -} - -/* Attempt to gridify all target constructs in BODY_P. All such targets will - have their bodies duplicated, with the new copy being put into a - gimple_omp_grid_body statement. All kernel-related construct within the - grid_body will be marked with phony flags or kernel kinds. Moreover, some - re-structuring is often needed, such as copying pre-bodies before the target - construct so that kernel grid sizes can be computed. */ - -static void -grid_gridify_all_targets (gimple_seq *body_p) -{ - struct walk_stmt_info wi; - memset (&wi, 0, sizeof (wi)); - walk_gimple_seq_mod (body_p, grid_gridify_all_targets_stmt, NULL, &wi); -} - - /* Main entry point. */ static unsigned int @@ -18968,7 +8854,7 @@ execute_lower_omp (void) body = gimple_body (current_function_decl); if (hsa_gen_requested_p ()) - grid_gridify_all_targets (&body); + omp_grid_gridify_all_targets (&body); scan_omp (&body, NULL); gcc_assert (taskreg_nesting_level == 0); @@ -19285,163 +9171,6 @@ diagnose_sb_2 (gimple_stmt_iterator *gsi_p, bool *handled_ops_p, return NULL_TREE; } -/* Called from tree-cfg.c::make_edges to create cfg edges for all relevant - GIMPLE_* codes. */ -bool -make_gimple_omp_edges (basic_block bb, struct omp_region **region, - int *region_idx) -{ - gimple *last = last_stmt (bb); - enum gimple_code code = gimple_code (last); - struct omp_region *cur_region = *region; - bool fallthru = false; - - switch (code) - { - case GIMPLE_OMP_PARALLEL: - case GIMPLE_OMP_TASK: - case GIMPLE_OMP_FOR: - case GIMPLE_OMP_SINGLE: - case GIMPLE_OMP_TEAMS: - case GIMPLE_OMP_MASTER: - case GIMPLE_OMP_TASKGROUP: - case GIMPLE_OMP_CRITICAL: - case GIMPLE_OMP_SECTION: - case GIMPLE_OMP_GRID_BODY: - cur_region = new_omp_region (bb, code, cur_region); - fallthru = true; - break; - - case GIMPLE_OMP_ORDERED: - cur_region = new_omp_region (bb, code, cur_region); - fallthru = true; - if (find_omp_clause (gimple_omp_ordered_clauses - (as_a <gomp_ordered *> (last)), - OMP_CLAUSE_DEPEND)) - cur_region = cur_region->outer; - break; - - case GIMPLE_OMP_TARGET: - cur_region = new_omp_region (bb, code, cur_region); - fallthru = true; - switch (gimple_omp_target_kind (last)) - { - case GF_OMP_TARGET_KIND_REGION: - case GF_OMP_TARGET_KIND_DATA: - case GF_OMP_TARGET_KIND_OACC_PARALLEL: - case GF_OMP_TARGET_KIND_OACC_KERNELS: - case GF_OMP_TARGET_KIND_OACC_DATA: - case GF_OMP_TARGET_KIND_OACC_HOST_DATA: - break; - case GF_OMP_TARGET_KIND_UPDATE: - case GF_OMP_TARGET_KIND_ENTER_DATA: - case GF_OMP_TARGET_KIND_EXIT_DATA: - case GF_OMP_TARGET_KIND_OACC_UPDATE: - case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA: - case GF_OMP_TARGET_KIND_OACC_DECLARE: - cur_region = cur_region->outer; - break; - default: - gcc_unreachable (); - } - break; - - case GIMPLE_OMP_SECTIONS: - cur_region = new_omp_region (bb, code, cur_region); - fallthru = true; - break; - - case GIMPLE_OMP_SECTIONS_SWITCH: - fallthru = false; - break; - - case GIMPLE_OMP_ATOMIC_LOAD: - case GIMPLE_OMP_ATOMIC_STORE: - fallthru = true; - break; - - case GIMPLE_OMP_RETURN: - /* In the case of a GIMPLE_OMP_SECTION, the edge will go - somewhere other than the next block. This will be - created later. */ - cur_region->exit = bb; - if (cur_region->type == GIMPLE_OMP_TASK) - /* Add an edge corresponding to not scheduling the task - immediately. */ - make_edge (cur_region->entry, bb, EDGE_ABNORMAL); - fallthru = cur_region->type != GIMPLE_OMP_SECTION; - cur_region = cur_region->outer; - break; - - case GIMPLE_OMP_CONTINUE: - cur_region->cont = bb; - switch (cur_region->type) - { - case GIMPLE_OMP_FOR: - /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE - succs edges as abnormal to prevent splitting - them. */ - single_succ_edge (cur_region->entry)->flags |= EDGE_ABNORMAL; - /* Make the loopback edge. */ - make_edge (bb, single_succ (cur_region->entry), - EDGE_ABNORMAL); - - /* Create an edge from GIMPLE_OMP_FOR to exit, which - corresponds to the case that the body of the loop - is not executed at all. */ - make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL); - make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL); - fallthru = false; - break; - - case GIMPLE_OMP_SECTIONS: - /* Wire up the edges into and out of the nested sections. */ - { - basic_block switch_bb = single_succ (cur_region->entry); - - struct omp_region *i; - for (i = cur_region->inner; i ; i = i->next) - { - gcc_assert (i->type == GIMPLE_OMP_SECTION); - make_edge (switch_bb, i->entry, 0); - make_edge (i->exit, bb, EDGE_FALLTHRU); - } - - /* Make the loopback edge to the block with - GIMPLE_OMP_SECTIONS_SWITCH. */ - make_edge (bb, switch_bb, 0); - - /* Make the edge from the switch to exit. */ - make_edge (switch_bb, bb->next_bb, 0); - fallthru = false; - } - break; - - case GIMPLE_OMP_TASK: - fallthru = true; - break; - - default: - gcc_unreachable (); - } - break; - - default: - gcc_unreachable (); - } - - if (*region != cur_region) - { - *region = cur_region; - if (cur_region) - *region_idx = cur_region->entry->index; - else - *region_idx = 0; - } - - return fallthru; -} - static unsigned int diagnose_omp_structured_block_errors (void) { @@ -19507,1623 +9236,5 @@ make_pass_diagnose_omp_blocks (gcc::context *ctxt) return new pass_diagnose_omp_blocks (ctxt); } -/* Helper function for omp_finish_file routine. Takes decls from V_DECLS and - adds their addresses and sizes to constructor-vector V_CTOR. */ -static void -add_decls_addresses_to_decl_constructor (vec<tree, va_gc> *v_decls, - vec<constructor_elt, va_gc> *v_ctor) -{ - unsigned len = vec_safe_length (v_decls); - for (unsigned i = 0; i < len; i++) - { - tree it = (*v_decls)[i]; - bool is_var = VAR_P (it); - bool is_link_var - = is_var -#ifdef ACCEL_COMPILER - && DECL_HAS_VALUE_EXPR_P (it) -#endif - && lookup_attribute ("omp declare target link", DECL_ATTRIBUTES (it)); - - tree size = NULL_TREE; - if (is_var) - size = fold_convert (const_ptr_type_node, DECL_SIZE_UNIT (it)); - - tree addr; - if (!is_link_var) - addr = build_fold_addr_expr (it); - else - { -#ifdef ACCEL_COMPILER - /* For "omp declare target link" vars add address of the pointer to - the target table, instead of address of the var. */ - tree value_expr = DECL_VALUE_EXPR (it); - tree link_ptr_decl = TREE_OPERAND (value_expr, 0); - varpool_node::finalize_decl (link_ptr_decl); - addr = build_fold_addr_expr (link_ptr_decl); -#else - addr = build_fold_addr_expr (it); -#endif - - /* Most significant bit of the size marks "omp declare target link" - vars in host and target tables. */ - unsigned HOST_WIDE_INT isize = tree_to_uhwi (size); - isize |= 1ULL << (int_size_in_bytes (const_ptr_type_node) - * BITS_PER_UNIT - 1); - size = wide_int_to_tree (const_ptr_type_node, isize); - } - - CONSTRUCTOR_APPEND_ELT (v_ctor, NULL_TREE, addr); - if (is_var) - CONSTRUCTOR_APPEND_ELT (v_ctor, NULL_TREE, size); - } -} - -/* Create new symbols containing (address, size) pairs for global variables, - marked with "omp declare target" attribute, as well as addresses for the - functions, which are outlined offloading regions. */ -void -omp_finish_file (void) -{ - unsigned num_funcs = vec_safe_length (offload_funcs); - unsigned num_vars = vec_safe_length (offload_vars); - - if (num_funcs == 0 && num_vars == 0) - return; - - if (targetm_common.have_named_sections) - { - vec<constructor_elt, va_gc> *v_f, *v_v; - vec_alloc (v_f, num_funcs); - vec_alloc (v_v, num_vars * 2); - - add_decls_addresses_to_decl_constructor (offload_funcs, v_f); - add_decls_addresses_to_decl_constructor (offload_vars, v_v); - - tree vars_decl_type = build_array_type_nelts (pointer_sized_int_node, - num_vars * 2); - tree funcs_decl_type = build_array_type_nelts (pointer_sized_int_node, - num_funcs); - SET_TYPE_ALIGN (vars_decl_type, TYPE_ALIGN (pointer_sized_int_node)); - SET_TYPE_ALIGN (funcs_decl_type, TYPE_ALIGN (pointer_sized_int_node)); - tree ctor_v = build_constructor (vars_decl_type, v_v); - tree ctor_f = build_constructor (funcs_decl_type, v_f); - TREE_CONSTANT (ctor_v) = TREE_CONSTANT (ctor_f) = 1; - TREE_STATIC (ctor_v) = TREE_STATIC (ctor_f) = 1; - tree funcs_decl = build_decl (UNKNOWN_LOCATION, VAR_DECL, - get_identifier (".offload_func_table"), - funcs_decl_type); - tree vars_decl = build_decl (UNKNOWN_LOCATION, VAR_DECL, - get_identifier (".offload_var_table"), - vars_decl_type); - TREE_STATIC (funcs_decl) = TREE_STATIC (vars_decl) = 1; - /* Do not align tables more than TYPE_ALIGN (pointer_sized_int_node), - otherwise a joint table in a binary will contain padding between - tables from multiple object files. */ - DECL_USER_ALIGN (funcs_decl) = DECL_USER_ALIGN (vars_decl) = 1; - SET_DECL_ALIGN (funcs_decl, TYPE_ALIGN (funcs_decl_type)); - SET_DECL_ALIGN (vars_decl, TYPE_ALIGN (vars_decl_type)); - DECL_INITIAL (funcs_decl) = ctor_f; - DECL_INITIAL (vars_decl) = ctor_v; - set_decl_section_name (funcs_decl, OFFLOAD_FUNC_TABLE_SECTION_NAME); - set_decl_section_name (vars_decl, OFFLOAD_VAR_TABLE_SECTION_NAME); - - varpool_node::finalize_decl (vars_decl); - varpool_node::finalize_decl (funcs_decl); - } - else - { - for (unsigned i = 0; i < num_funcs; i++) - { - tree it = (*offload_funcs)[i]; - targetm.record_offload_symbol (it); - } - for (unsigned i = 0; i < num_vars; i++) - { - tree it = (*offload_vars)[i]; - targetm.record_offload_symbol (it); - } - } -} - -/* Find the number of threads (POS = false), or thread number (POS = - true) for an OpenACC region partitioned as MASK. Setup code - required for the calculation is added to SEQ. */ - -static tree -oacc_thread_numbers (bool pos, int mask, gimple_seq *seq) -{ - tree res = pos ? NULL_TREE : build_int_cst (unsigned_type_node, 1); - unsigned ix; - - /* Start at gang level, and examine relevant dimension indices. */ - for (ix = GOMP_DIM_GANG; ix != GOMP_DIM_MAX; ix++) - if (GOMP_DIM_MASK (ix) & mask) - { - tree arg = build_int_cst (unsigned_type_node, ix); - - if (res) - { - /* We had an outer index, so scale that by the size of - this dimension. */ - tree n = create_tmp_var (integer_type_node); - gimple *call - = gimple_build_call_internal (IFN_GOACC_DIM_SIZE, 1, arg); - - gimple_call_set_lhs (call, n); - gimple_seq_add_stmt (seq, call); - res = fold_build2 (MULT_EXPR, integer_type_node, res, n); - } - if (pos) - { - /* Determine index in this dimension. */ - tree id = create_tmp_var (integer_type_node); - gimple *call = gimple_build_call_internal - (IFN_GOACC_DIM_POS, 1, arg); - - gimple_call_set_lhs (call, id); - gimple_seq_add_stmt (seq, call); - if (res) - res = fold_build2 (PLUS_EXPR, integer_type_node, res, id); - else - res = id; - } - } - - if (res == NULL_TREE) - res = integer_zero_node; - - return res; -} - -/* Transform IFN_GOACC_LOOP calls to actual code. See - expand_oacc_for for where these are generated. At the vector - level, we stride loops, such that each member of a warp will - operate on adjacent iterations. At the worker and gang level, - each gang/warp executes a set of contiguous iterations. Chunking - can override this such that each iteration engine executes a - contiguous chunk, and then moves on to stride to the next chunk. */ - -static void -oacc_xform_loop (gcall *call) -{ - gimple_stmt_iterator gsi = gsi_for_stmt (call); - enum ifn_goacc_loop_kind code - = (enum ifn_goacc_loop_kind) TREE_INT_CST_LOW (gimple_call_arg (call, 0)); - tree dir = gimple_call_arg (call, 1); - tree range = gimple_call_arg (call, 2); - tree step = gimple_call_arg (call, 3); - tree chunk_size = NULL_TREE; - unsigned mask = (unsigned) TREE_INT_CST_LOW (gimple_call_arg (call, 5)); - tree lhs = gimple_call_lhs (call); - tree type = TREE_TYPE (lhs); - tree diff_type = TREE_TYPE (range); - tree r = NULL_TREE; - gimple_seq seq = NULL; - bool chunking = false, striding = true; - unsigned outer_mask = mask & (~mask + 1); // Outermost partitioning - unsigned inner_mask = mask & ~outer_mask; // Inner partitioning (if any) - -#ifdef ACCEL_COMPILER - chunk_size = gimple_call_arg (call, 4); - if (integer_minus_onep (chunk_size) /* Force static allocation. */ - || integer_zerop (chunk_size)) /* Default (also static). */ - { - /* If we're at the gang level, we want each to execute a - contiguous run of iterations. Otherwise we want each element - to stride. */ - striding = !(outer_mask & GOMP_DIM_MASK (GOMP_DIM_GANG)); - chunking = false; - } - else - { - /* Chunk of size 1 is striding. */ - striding = integer_onep (chunk_size); - chunking = !striding; - } -#endif - - /* striding=true, chunking=true - -> invalid. - striding=true, chunking=false - -> chunks=1 - striding=false,chunking=true - -> chunks=ceil (range/(chunksize*threads*step)) - striding=false,chunking=false - -> chunk_size=ceil(range/(threads*step)),chunks=1 */ - push_gimplify_context (true); - - switch (code) - { - default: gcc_unreachable (); - - case IFN_GOACC_LOOP_CHUNKS: - if (!chunking) - r = build_int_cst (type, 1); - else - { - /* chunk_max - = (range - dir) / (chunks * step * num_threads) + dir */ - tree per = oacc_thread_numbers (false, mask, &seq); - per = fold_convert (type, per); - chunk_size = fold_convert (type, chunk_size); - per = fold_build2 (MULT_EXPR, type, per, chunk_size); - per = fold_build2 (MULT_EXPR, type, per, step); - r = build2 (MINUS_EXPR, type, range, dir); - r = build2 (PLUS_EXPR, type, r, per); - r = build2 (TRUNC_DIV_EXPR, type, r, per); - } - break; - - case IFN_GOACC_LOOP_STEP: - { - /* If striding, step by the entire compute volume, otherwise - step by the inner volume. */ - unsigned volume = striding ? mask : inner_mask; - - r = oacc_thread_numbers (false, volume, &seq); - r = build2 (MULT_EXPR, type, fold_convert (type, r), step); - } - break; - - case IFN_GOACC_LOOP_OFFSET: - if (striding) - { - r = oacc_thread_numbers (true, mask, &seq); - r = fold_convert (diff_type, r); - } - else - { - tree inner_size = oacc_thread_numbers (false, inner_mask, &seq); - tree outer_size = oacc_thread_numbers (false, outer_mask, &seq); - tree volume = fold_build2 (MULT_EXPR, TREE_TYPE (inner_size), - inner_size, outer_size); - - volume = fold_convert (diff_type, volume); - if (chunking) - chunk_size = fold_convert (diff_type, chunk_size); - else - { - tree per = fold_build2 (MULT_EXPR, diff_type, volume, step); - - chunk_size = build2 (MINUS_EXPR, diff_type, range, dir); - chunk_size = build2 (PLUS_EXPR, diff_type, chunk_size, per); - chunk_size = build2 (TRUNC_DIV_EXPR, diff_type, chunk_size, per); - } - - tree span = build2 (MULT_EXPR, diff_type, chunk_size, - fold_convert (diff_type, inner_size)); - r = oacc_thread_numbers (true, outer_mask, &seq); - r = fold_convert (diff_type, r); - r = build2 (MULT_EXPR, diff_type, r, span); - - tree inner = oacc_thread_numbers (true, inner_mask, &seq); - inner = fold_convert (diff_type, inner); - r = fold_build2 (PLUS_EXPR, diff_type, r, inner); - - if (chunking) - { - tree chunk = fold_convert (diff_type, gimple_call_arg (call, 6)); - tree per - = fold_build2 (MULT_EXPR, diff_type, volume, chunk_size); - per = build2 (MULT_EXPR, diff_type, per, chunk); - - r = build2 (PLUS_EXPR, diff_type, r, per); - } - } - r = fold_build2 (MULT_EXPR, diff_type, r, step); - if (type != diff_type) - r = fold_convert (type, r); - break; - - case IFN_GOACC_LOOP_BOUND: - if (striding) - r = range; - else - { - tree inner_size = oacc_thread_numbers (false, inner_mask, &seq); - tree outer_size = oacc_thread_numbers (false, outer_mask, &seq); - tree volume = fold_build2 (MULT_EXPR, TREE_TYPE (inner_size), - inner_size, outer_size); - - volume = fold_convert (diff_type, volume); - if (chunking) - chunk_size = fold_convert (diff_type, chunk_size); - else - { - tree per = fold_build2 (MULT_EXPR, diff_type, volume, step); - - chunk_size = build2 (MINUS_EXPR, diff_type, range, dir); - chunk_size = build2 (PLUS_EXPR, diff_type, chunk_size, per); - chunk_size = build2 (TRUNC_DIV_EXPR, diff_type, chunk_size, per); - } - - tree span = build2 (MULT_EXPR, diff_type, chunk_size, - fold_convert (diff_type, inner_size)); - - r = fold_build2 (MULT_EXPR, diff_type, span, step); - - tree offset = gimple_call_arg (call, 6); - r = build2 (PLUS_EXPR, diff_type, r, - fold_convert (diff_type, offset)); - r = build2 (integer_onep (dir) ? MIN_EXPR : MAX_EXPR, - diff_type, r, range); - } - if (diff_type != type) - r = fold_convert (type, r); - break; - } - - gimplify_assign (lhs, r, &seq); - - pop_gimplify_context (NULL); - - gsi_replace_with_seq (&gsi, seq, true); -} - -/* Default partitioned and minimum partitioned dimensions. */ - -static int oacc_default_dims[GOMP_DIM_MAX]; -static int oacc_min_dims[GOMP_DIM_MAX]; - -/* Parse the default dimension parameter. This is a set of - :-separated optional compute dimensions. Each specified dimension - is a positive integer. When device type support is added, it is - planned to be a comma separated list of such compute dimensions, - with all but the first prefixed by the colon-terminated device - type. */ - -static void -oacc_parse_default_dims (const char *dims) -{ - int ix; - - for (ix = GOMP_DIM_MAX; ix--;) - { - oacc_default_dims[ix] = -1; - oacc_min_dims[ix] = 1; - } - -#ifndef ACCEL_COMPILER - /* Cannot be overridden on the host. */ - dims = NULL; -#endif - if (dims) - { - const char *pos = dims; - - for (ix = 0; *pos && ix != GOMP_DIM_MAX; ix++) - { - if (ix) - { - if (*pos != ':') - goto malformed; - pos++; - } - - if (*pos != ':') - { - long val; - const char *eptr; - - errno = 0; - val = strtol (pos, CONST_CAST (char **, &eptr), 10); - if (errno || val <= 0 || (int) val != val) - goto malformed; - pos = eptr; - oacc_default_dims[ix] = (int) val; - } - } - if (*pos) - { - malformed: - error_at (UNKNOWN_LOCATION, - "-fopenacc-dim operand is malformed at '%s'", pos); - } - } - - /* Allow the backend to validate the dimensions. */ - targetm.goacc.validate_dims (NULL_TREE, oacc_default_dims, -1); - targetm.goacc.validate_dims (NULL_TREE, oacc_min_dims, -2); -} - -/* Validate and update the dimensions for offloaded FN. ATTRS is the - raw attribute. DIMS is an array of dimensions, which is filled in. - LEVEL is the partitioning level of a routine, or -1 for an offload - region itself. USED is the mask of partitioned execution in the - function. */ - -static void -oacc_validate_dims (tree fn, tree attrs, int *dims, int level, unsigned used) -{ - tree purpose[GOMP_DIM_MAX]; - unsigned ix; - tree pos = TREE_VALUE (attrs); - bool is_kernel = oacc_fn_attrib_kernels_p (attrs); - - /* Make sure the attribute creator attached the dimension - information. */ - gcc_assert (pos); - - for (ix = 0; ix != GOMP_DIM_MAX; ix++) - { - purpose[ix] = TREE_PURPOSE (pos); - tree val = TREE_VALUE (pos); - dims[ix] = val ? TREE_INT_CST_LOW (val) : -1; - pos = TREE_CHAIN (pos); - } - - bool changed = targetm.goacc.validate_dims (fn, dims, level); - - /* Default anything left to 1 or a partitioned default. */ - for (ix = 0; ix != GOMP_DIM_MAX; ix++) - if (dims[ix] < 0) - { - /* The OpenACC spec says 'If the [num_gangs] clause is not - specified, an implementation-defined default will be used; - the default may depend on the code within the construct.' - (2.5.6). Thus an implementation is free to choose - non-unity default for a parallel region that doesn't have - any gang-partitioned loops. However, it appears that there - is a sufficient body of user code that expects non-gang - partitioned regions to not execute in gang-redundant mode. - So we (a) don't warn about the non-portability and (b) pick - the minimum permissible dimension size when there is no - partitioned execution. Otherwise we pick the global - default for the dimension, which the user can control. The - same wording and logic applies to num_workers and - vector_length, however the worker- or vector- single - execution doesn't have the same impact as gang-redundant - execution. (If the minimum gang-level partioning is not 1, - the target is probably too confusing.) */ - dims[ix] = (used & GOMP_DIM_MASK (ix) - ? oacc_default_dims[ix] : oacc_min_dims[ix]); - changed = true; - } - - if (changed) - { - /* Replace the attribute with new values. */ - pos = NULL_TREE; - for (ix = GOMP_DIM_MAX; ix--;) - { - pos = tree_cons (purpose[ix], - build_int_cst (integer_type_node, dims[ix]), - pos); - if (is_kernel) - TREE_PUBLIC (pos) = 1; - } - replace_oacc_fn_attrib (fn, pos); - } -} - -/* Create an empty OpenACC loop structure at LOC. */ - -static oacc_loop * -new_oacc_loop_raw (oacc_loop *parent, location_t loc) -{ - oacc_loop *loop = XCNEW (oacc_loop); - - loop->parent = parent; - loop->child = loop->sibling = NULL; - - if (parent) - { - loop->sibling = parent->child; - parent->child = loop; - } - - loop->loc = loc; - loop->marker = NULL; - memset (loop->heads, 0, sizeof (loop->heads)); - memset (loop->tails, 0, sizeof (loop->tails)); - loop->routine = NULL_TREE; - - loop->mask = loop->flags = loop->inner = 0; - loop->ifns = 0; - loop->chunk_size = 0; - loop->head_end = NULL; - - return loop; -} - -/* Create an outermost, dummy OpenACC loop for offloaded function - DECL. */ - -static oacc_loop * -new_oacc_loop_outer (tree decl) -{ - return new_oacc_loop_raw (NULL, DECL_SOURCE_LOCATION (decl)); -} - -/* Start a new OpenACC loop structure beginning at head marker HEAD. - Link into PARENT loop. Return the new loop. */ - -static oacc_loop * -new_oacc_loop (oacc_loop *parent, gcall *marker) -{ - oacc_loop *loop = new_oacc_loop_raw (parent, gimple_location (marker)); - - loop->marker = marker; - - /* TODO: This is where device_type flattening would occur for the loop - flags. */ - - loop->flags = TREE_INT_CST_LOW (gimple_call_arg (marker, 3)); - - tree chunk_size = integer_zero_node; - if (loop->flags & OLF_GANG_STATIC) - chunk_size = gimple_call_arg (marker, 4); - loop->chunk_size = chunk_size; - - return loop; -} - -/* Create a dummy loop encompassing a call to a openACC routine. - Extract the routine's partitioning requirements. */ - -static void -new_oacc_loop_routine (oacc_loop *parent, gcall *call, tree decl, tree attrs) -{ - oacc_loop *loop = new_oacc_loop_raw (parent, gimple_location (call)); - int level = oacc_fn_attrib_level (attrs); - - gcc_assert (level >= 0); - - loop->marker = call; - loop->routine = decl; - loop->mask = ((GOMP_DIM_MASK (GOMP_DIM_MAX) - 1) - ^ (GOMP_DIM_MASK (level) - 1)); -} - -/* Finish off the current OpenACC loop ending at tail marker TAIL. - Return the parent loop. */ - -static oacc_loop * -finish_oacc_loop (oacc_loop *loop) -{ - /* If the loop has been collapsed, don't partition it. */ - if (!loop->ifns) - loop->mask = loop->flags = 0; - return loop->parent; -} - -/* Free all OpenACC loop structures within LOOP (inclusive). */ - -static void -free_oacc_loop (oacc_loop *loop) -{ - if (loop->sibling) - free_oacc_loop (loop->sibling); - if (loop->child) - free_oacc_loop (loop->child); - - free (loop); -} - -/* Dump out the OpenACC loop head or tail beginning at FROM. */ - -static void -dump_oacc_loop_part (FILE *file, gcall *from, int depth, - const char *title, int level) -{ - enum ifn_unique_kind kind - = (enum ifn_unique_kind) TREE_INT_CST_LOW (gimple_call_arg (from, 0)); - - fprintf (file, "%*s%s-%d:\n", depth * 2, "", title, level); - for (gimple_stmt_iterator gsi = gsi_for_stmt (from);;) - { - gimple *stmt = gsi_stmt (gsi); - - if (gimple_call_internal_p (stmt, IFN_UNIQUE)) - { - enum ifn_unique_kind k - = ((enum ifn_unique_kind) TREE_INT_CST_LOW - (gimple_call_arg (stmt, 0))); - - if (k == kind && stmt != from) - break; - } - print_gimple_stmt (file, stmt, depth * 2 + 2, 0); - - gsi_next (&gsi); - while (gsi_end_p (gsi)) - gsi = gsi_start_bb (single_succ (gsi_bb (gsi))); - } -} - -/* Dump OpenACC loops LOOP, its siblings and its children. */ - -static void -dump_oacc_loop (FILE *file, oacc_loop *loop, int depth) -{ - int ix; - - fprintf (file, "%*sLoop %x(%x) %s:%u\n", depth * 2, "", - loop->flags, loop->mask, - LOCATION_FILE (loop->loc), LOCATION_LINE (loop->loc)); - - if (loop->marker) - print_gimple_stmt (file, loop->marker, depth * 2, 0); - - if (loop->routine) - fprintf (file, "%*sRoutine %s:%u:%s\n", - depth * 2, "", DECL_SOURCE_FILE (loop->routine), - DECL_SOURCE_LINE (loop->routine), - IDENTIFIER_POINTER (DECL_NAME (loop->routine))); - - for (ix = GOMP_DIM_GANG; ix != GOMP_DIM_MAX; ix++) - if (loop->heads[ix]) - dump_oacc_loop_part (file, loop->heads[ix], depth, "Head", ix); - for (ix = GOMP_DIM_MAX; ix--;) - if (loop->tails[ix]) - dump_oacc_loop_part (file, loop->tails[ix], depth, "Tail", ix); - - if (loop->child) - dump_oacc_loop (file, loop->child, depth + 1); - if (loop->sibling) - dump_oacc_loop (file, loop->sibling, depth); -} - -void debug_oacc_loop (oacc_loop *); - -/* Dump loops to stderr. */ - -DEBUG_FUNCTION void -debug_oacc_loop (oacc_loop *loop) -{ - dump_oacc_loop (stderr, loop, 0); -} - -/* DFS walk of basic blocks BB onwards, creating OpenACC loop - structures as we go. By construction these loops are properly - nested. */ - -static void -oacc_loop_discover_walk (oacc_loop *loop, basic_block bb) -{ - int marker = 0; - int remaining = 0; - - if (bb->flags & BB_VISITED) - return; - - follow: - bb->flags |= BB_VISITED; - - /* Scan for loop markers. */ - for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); - gsi_next (&gsi)) - { - gimple *stmt = gsi_stmt (gsi); - - if (!is_gimple_call (stmt)) - continue; - - gcall *call = as_a <gcall *> (stmt); - - /* If this is a routine, make a dummy loop for it. */ - if (tree decl = gimple_call_fndecl (call)) - if (tree attrs = get_oacc_fn_attrib (decl)) - { - gcc_assert (!marker); - new_oacc_loop_routine (loop, call, decl, attrs); - } - - if (!gimple_call_internal_p (call)) - continue; - - switch (gimple_call_internal_fn (call)) - { - default: - break; - - case IFN_GOACC_LOOP: - /* Count the goacc loop abstraction fns, to determine if the - loop was collapsed already. */ - loop->ifns++; - break; - - case IFN_UNIQUE: - enum ifn_unique_kind kind - = (enum ifn_unique_kind) (TREE_INT_CST_LOW - (gimple_call_arg (call, 0))); - if (kind == IFN_UNIQUE_OACC_HEAD_MARK - || kind == IFN_UNIQUE_OACC_TAIL_MARK) - { - if (gimple_call_num_args (call) == 2) - { - gcc_assert (marker && !remaining); - marker = 0; - if (kind == IFN_UNIQUE_OACC_TAIL_MARK) - loop = finish_oacc_loop (loop); - else - loop->head_end = call; - } - else - { - int count = TREE_INT_CST_LOW (gimple_call_arg (call, 2)); - - if (!marker) - { - if (kind == IFN_UNIQUE_OACC_HEAD_MARK) - loop = new_oacc_loop (loop, call); - remaining = count; - } - gcc_assert (count == remaining); - if (remaining) - { - remaining--; - if (kind == IFN_UNIQUE_OACC_HEAD_MARK) - loop->heads[marker] = call; - else - loop->tails[remaining] = call; - } - marker++; - } - } - } - } - if (remaining || marker) - { - bb = single_succ (bb); - gcc_assert (single_pred_p (bb) && !(bb->flags & BB_VISITED)); - goto follow; - } - - /* Walk successor blocks. */ - edge e; - edge_iterator ei; - - FOR_EACH_EDGE (e, ei, bb->succs) - oacc_loop_discover_walk (loop, e->dest); -} - -/* LOOP is the first sibling. Reverse the order in place and return - the new first sibling. Recurse to child loops. */ - -static oacc_loop * -oacc_loop_sibling_nreverse (oacc_loop *loop) -{ - oacc_loop *last = NULL; - do - { - if (loop->child) - loop->child = oacc_loop_sibling_nreverse (loop->child); - - oacc_loop *next = loop->sibling; - loop->sibling = last; - last = loop; - loop = next; - } - while (loop); - - return last; -} - -/* Discover the OpenACC loops marked up by HEAD and TAIL markers for - the current function. */ - -static oacc_loop * -oacc_loop_discovery () -{ - /* Clear basic block flags, in particular BB_VISITED which we're going to use - in the following. */ - clear_bb_flags (); - - oacc_loop *top = new_oacc_loop_outer (current_function_decl); - oacc_loop_discover_walk (top, ENTRY_BLOCK_PTR_FOR_FN (cfun)); - - /* The siblings were constructed in reverse order, reverse them so - that diagnostics come out in an unsurprising order. */ - top = oacc_loop_sibling_nreverse (top); - - return top; -} - -/* Transform the abstract internal function markers starting at FROM - to be for partitioning level LEVEL. Stop when we meet another HEAD - or TAIL marker. */ - -static void -oacc_loop_xform_head_tail (gcall *from, int level) -{ - enum ifn_unique_kind kind - = (enum ifn_unique_kind) TREE_INT_CST_LOW (gimple_call_arg (from, 0)); - tree replacement = build_int_cst (unsigned_type_node, level); - - for (gimple_stmt_iterator gsi = gsi_for_stmt (from);;) - { - gimple *stmt = gsi_stmt (gsi); - - if (gimple_call_internal_p (stmt, IFN_UNIQUE)) - { - enum ifn_unique_kind k - = ((enum ifn_unique_kind) - TREE_INT_CST_LOW (gimple_call_arg (stmt, 0))); - - if (k == IFN_UNIQUE_OACC_FORK || k == IFN_UNIQUE_OACC_JOIN) - *gimple_call_arg_ptr (stmt, 2) = replacement; - else if (k == kind && stmt != from) - break; - } - else if (gimple_call_internal_p (stmt, IFN_GOACC_REDUCTION)) - *gimple_call_arg_ptr (stmt, 3) = replacement; - - gsi_next (&gsi); - while (gsi_end_p (gsi)) - gsi = gsi_start_bb (single_succ (gsi_bb (gsi))); - } -} - -/* Transform the IFN_GOACC_LOOP internal functions by providing the - determined partitioning mask and chunking argument. END_MARKER - points at the end IFN_HEAD_TAIL call intgroducing the loop. IFNS - is the number of IFN_GOACC_LOOP calls for the loop. MASK_ARG is - the replacement partitioning mask and CHUNK_ARG is the replacement - chunking arg. */ - -static void -oacc_loop_xform_loop (gcall *end_marker, unsigned ifns, - tree mask_arg, tree chunk_arg) -{ - gimple_stmt_iterator gsi = gsi_for_stmt (end_marker); - - gcc_checking_assert (ifns); - for (;;) - { - for (; !gsi_end_p (gsi); gsi_next (&gsi)) - { - gimple *stmt = gsi_stmt (gsi); - - if (!is_gimple_call (stmt)) - continue; - - gcall *call = as_a <gcall *> (stmt); - - if (!gimple_call_internal_p (call)) - continue; - - if (gimple_call_internal_fn (call) != IFN_GOACC_LOOP) - continue; - - *gimple_call_arg_ptr (call, 5) = mask_arg; - *gimple_call_arg_ptr (call, 4) = chunk_arg; - ifns--; - if (!ifns) - return; - } - - /* The LOOP_BOUND ifn could be in the single successor - block. */ - basic_block bb = single_succ (gsi_bb (gsi)); - gsi = gsi_start_bb (bb); - } -} - -/* Process the discovered OpenACC loops, setting the correct - partitioning level etc. */ - -static void -oacc_loop_process (oacc_loop *loop) -{ - if (loop->child) - oacc_loop_process (loop->child); - - if (loop->mask && !loop->routine) - { - int ix; - unsigned mask = loop->mask; - unsigned dim = GOMP_DIM_GANG; - tree mask_arg = build_int_cst (unsigned_type_node, mask); - tree chunk_arg = loop->chunk_size; - - oacc_loop_xform_loop (loop->head_end, loop->ifns, mask_arg, chunk_arg); - - for (ix = 0; ix != GOMP_DIM_MAX && mask; ix++) - { - while (!(GOMP_DIM_MASK (dim) & mask)) - dim++; - - oacc_loop_xform_head_tail (loop->heads[ix], dim); - oacc_loop_xform_head_tail (loop->tails[ix], dim); - - mask ^= GOMP_DIM_MASK (dim); - } - } - - if (loop->sibling) - oacc_loop_process (loop->sibling); -} - -/* Walk the OpenACC loop heirarchy checking and assigning the - programmer-specified partitionings. OUTER_MASK is the partitioning - this loop is contained within. Return mask of partitioning - encountered. If any auto loops are discovered, set GOMP_DIM_MAX - bit. */ - -static unsigned -oacc_loop_fixed_partitions (oacc_loop *loop, unsigned outer_mask) -{ - unsigned this_mask = loop->mask; - unsigned mask_all = 0; - bool noisy = true; - -#ifdef ACCEL_COMPILER - /* When device_type is supported, we want the device compiler to be - noisy, if the loop parameters are device_type-specific. */ - noisy = false; -#endif - - if (!loop->routine) - { - bool auto_par = (loop->flags & OLF_AUTO) != 0; - bool seq_par = (loop->flags & OLF_SEQ) != 0; - - this_mask = ((loop->flags >> OLF_DIM_BASE) - & (GOMP_DIM_MASK (GOMP_DIM_MAX) - 1)); - - if ((this_mask != 0) + auto_par + seq_par > 1) - { - if (noisy) - error_at (loop->loc, - seq_par - ? "%<seq%> overrides other OpenACC loop specifiers" - : "%<auto%> conflicts with other OpenACC loop specifiers"); - auto_par = false; - loop->flags &= ~OLF_AUTO; - if (seq_par) - { - loop->flags &= - ~((GOMP_DIM_MASK (GOMP_DIM_MAX) - 1) << OLF_DIM_BASE); - this_mask = 0; - } - } - if (auto_par && (loop->flags & OLF_INDEPENDENT)) - mask_all |= GOMP_DIM_MASK (GOMP_DIM_MAX); - } - - if (this_mask & outer_mask) - { - const oacc_loop *outer; - for (outer = loop->parent; outer; outer = outer->parent) - if (outer->mask & this_mask) - break; - - if (noisy) - { - if (outer) - { - error_at (loop->loc, - "%s uses same OpenACC parallelism as containing loop", - loop->routine ? "routine call" : "inner loop"); - inform (outer->loc, "containing loop here"); - } - else - error_at (loop->loc, - "%s uses OpenACC parallelism disallowed by containing routine", - loop->routine ? "routine call" : "loop"); - - if (loop->routine) - inform (DECL_SOURCE_LOCATION (loop->routine), - "routine %qD declared here", loop->routine); - } - this_mask &= ~outer_mask; - } - else - { - unsigned outermost = least_bit_hwi (this_mask); - - if (outermost && outermost <= outer_mask) - { - if (noisy) - { - error_at (loop->loc, - "incorrectly nested OpenACC loop parallelism"); - - const oacc_loop *outer; - for (outer = loop->parent; - outer->flags && outer->flags < outermost; - outer = outer->parent) - continue; - inform (outer->loc, "containing loop here"); - } - - this_mask &= ~outermost; - } - } - - loop->mask = this_mask; - mask_all |= this_mask; - - if (loop->child) - { - loop->inner = oacc_loop_fixed_partitions (loop->child, - outer_mask | this_mask); - mask_all |= loop->inner; - } - - if (loop->sibling) - mask_all |= oacc_loop_fixed_partitions (loop->sibling, outer_mask); - - return mask_all; -} - -/* Walk the OpenACC loop heirarchy to assign auto-partitioned loops. - OUTER_MASK is the partitioning this loop is contained within. - Return the cumulative partitioning used by this loop, siblings and - children. */ - -static unsigned -oacc_loop_auto_partitions (oacc_loop *loop, unsigned outer_mask) -{ - bool assign = (loop->flags & OLF_AUTO) && (loop->flags & OLF_INDEPENDENT); - bool noisy = true; - -#ifdef ACCEL_COMPILER - /* When device_type is supported, we want the device compiler to be - noisy, if the loop parameters are device_type-specific. */ - noisy = false; -#endif - - if (assign && outer_mask < GOMP_DIM_MASK (GOMP_DIM_MAX - 1)) - { - /* Allocate the outermost loop at the outermost available - level. */ - unsigned this_mask = outer_mask + 1; - - if (!(this_mask & loop->inner)) - loop->mask = this_mask; - } - - if (loop->child) - { - unsigned child_mask = outer_mask | loop->mask; - - if (loop->mask || assign) - child_mask |= GOMP_DIM_MASK (GOMP_DIM_MAX); - - loop->inner = oacc_loop_auto_partitions (loop->child, child_mask); - } - - if (assign && !loop->mask) - { - /* Allocate the loop at the innermost available level. */ - unsigned this_mask = 0; - - /* Determine the outermost partitioning used within this loop. */ - this_mask = loop->inner | GOMP_DIM_MASK (GOMP_DIM_MAX); - this_mask = least_bit_hwi (this_mask); - - /* Pick the partitioning just inside that one. */ - this_mask >>= 1; - - /* And avoid picking one use by an outer loop. */ - this_mask &= ~outer_mask; - - if (!this_mask && noisy) - warning_at (loop->loc, 0, - "insufficient partitioning available to parallelize loop"); - - loop->mask = this_mask; - } - - if (assign && dump_file) - fprintf (dump_file, "Auto loop %s:%d assigned %d\n", - LOCATION_FILE (loop->loc), LOCATION_LINE (loop->loc), - loop->mask); - - unsigned inner_mask = 0; - - if (loop->sibling) - inner_mask |= oacc_loop_auto_partitions (loop->sibling, outer_mask); - - inner_mask |= loop->inner | loop->mask; - - return inner_mask; -} - -/* Walk the OpenACC loop heirarchy to check and assign partitioning - axes. Return mask of partitioning. */ - -static unsigned -oacc_loop_partition (oacc_loop *loop, unsigned outer_mask) -{ - unsigned mask_all = oacc_loop_fixed_partitions (loop, outer_mask); - - if (mask_all & GOMP_DIM_MASK (GOMP_DIM_MAX)) - { - mask_all ^= GOMP_DIM_MASK (GOMP_DIM_MAX); - mask_all |= oacc_loop_auto_partitions (loop, outer_mask); - } - return mask_all; -} - -/* Default fork/join early expander. Delete the function calls if - there is no RTL expander. */ - -bool -default_goacc_fork_join (gcall *ARG_UNUSED (call), - const int *ARG_UNUSED (dims), bool is_fork) -{ - if (is_fork) - return targetm.have_oacc_fork (); - else - return targetm.have_oacc_join (); -} - -/* Default goacc.reduction early expander. - - LHS-opt = IFN_REDUCTION (KIND, RES_PTR, VAR, LEVEL, OP, OFFSET) - If RES_PTR is not integer-zerop: - SETUP - emit 'LHS = *RES_PTR', LHS = NULL - TEARDOWN - emit '*RES_PTR = VAR' - If LHS is not NULL - emit 'LHS = VAR' */ - -void -default_goacc_reduction (gcall *call) -{ - unsigned code = (unsigned)TREE_INT_CST_LOW (gimple_call_arg (call, 0)); - gimple_stmt_iterator gsi = gsi_for_stmt (call); - tree lhs = gimple_call_lhs (call); - tree var = gimple_call_arg (call, 2); - gimple_seq seq = NULL; - - if (code == IFN_GOACC_REDUCTION_SETUP - || code == IFN_GOACC_REDUCTION_TEARDOWN) - { - /* Setup and Teardown need to copy from/to the receiver object, - if there is one. */ - tree ref_to_res = gimple_call_arg (call, 1); - - if (!integer_zerop (ref_to_res)) - { - tree dst = build_simple_mem_ref (ref_to_res); - tree src = var; - - if (code == IFN_GOACC_REDUCTION_SETUP) - { - src = dst; - dst = lhs; - lhs = NULL; - } - gimple_seq_add_stmt (&seq, gimple_build_assign (dst, src)); - } - } - - /* Copy VAR to LHS, if there is an LHS. */ - if (lhs) - gimple_seq_add_stmt (&seq, gimple_build_assign (lhs, var)); - - gsi_replace_with_seq (&gsi, seq, true); -} - -/* Main entry point for oacc transformations which run on the device - compiler after LTO, so we know what the target device is at this - point (including the host fallback). */ - -static unsigned int -execute_oacc_device_lower () -{ - tree attrs = get_oacc_fn_attrib (current_function_decl); - - if (!attrs) - /* Not an offloaded function. */ - return 0; - - /* Parse the default dim argument exactly once. */ - if ((const void *)flag_openacc_dims != &flag_openacc_dims) - { - oacc_parse_default_dims (flag_openacc_dims); - flag_openacc_dims = (char *)&flag_openacc_dims; - } - - /* Discover, partition and process the loops. */ - oacc_loop *loops = oacc_loop_discovery (); - int fn_level = oacc_fn_attrib_level (attrs); - - if (dump_file) - fprintf (dump_file, oacc_fn_attrib_kernels_p (attrs) - ? "Function is kernels offload\n" - : fn_level < 0 ? "Function is parallel offload\n" - : "Function is routine level %d\n", fn_level); - - unsigned outer_mask = fn_level >= 0 ? GOMP_DIM_MASK (fn_level) - 1 : 0; - unsigned used_mask = oacc_loop_partition (loops, outer_mask); - int dims[GOMP_DIM_MAX]; - - oacc_validate_dims (current_function_decl, attrs, dims, fn_level, used_mask); - - if (dump_file) - { - const char *comma = "Compute dimensions ["; - for (int ix = 0; ix != GOMP_DIM_MAX; ix++, comma = ", ") - fprintf (dump_file, "%s%d", comma, dims[ix]); - fprintf (dump_file, "]\n"); - } - - oacc_loop_process (loops); - if (dump_file) - { - fprintf (dump_file, "OpenACC loops\n"); - dump_oacc_loop (dump_file, loops, 0); - fprintf (dump_file, "\n"); - } - - /* Offloaded targets may introduce new basic blocks, which require - dominance information to update SSA. */ - calculate_dominance_info (CDI_DOMINATORS); - - /* Now lower internal loop functions to target-specific code - sequences. */ - basic_block bb; - FOR_ALL_BB_FN (bb, cfun) - for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);) - { - gimple *stmt = gsi_stmt (gsi); - if (!is_gimple_call (stmt)) - { - gsi_next (&gsi); - continue; - } - - gcall *call = as_a <gcall *> (stmt); - if (!gimple_call_internal_p (call)) - { - gsi_next (&gsi); - continue; - } - - /* Rewind to allow rescan. */ - gsi_prev (&gsi); - bool rescan = false, remove = false; - enum internal_fn ifn_code = gimple_call_internal_fn (call); - - switch (ifn_code) - { - default: break; - - case IFN_GOACC_LOOP: - oacc_xform_loop (call); - rescan = true; - break; - - case IFN_GOACC_REDUCTION: - /* Mark the function for SSA renaming. */ - mark_virtual_operands_for_renaming (cfun); - - /* If the level is -1, this ended up being an unused - axis. Handle as a default. */ - if (integer_minus_onep (gimple_call_arg (call, 3))) - default_goacc_reduction (call); - else - targetm.goacc.reduction (call); - rescan = true; - break; - - case IFN_UNIQUE: - { - enum ifn_unique_kind kind - = ((enum ifn_unique_kind) - TREE_INT_CST_LOW (gimple_call_arg (call, 0))); - - switch (kind) - { - default: - gcc_unreachable (); - - case IFN_UNIQUE_OACC_FORK: - case IFN_UNIQUE_OACC_JOIN: - if (integer_minus_onep (gimple_call_arg (call, 2))) - remove = true; - else if (!targetm.goacc.fork_join - (call, dims, kind == IFN_UNIQUE_OACC_FORK)) - remove = true; - break; - - case IFN_UNIQUE_OACC_HEAD_MARK: - case IFN_UNIQUE_OACC_TAIL_MARK: - remove = true; - break; - } - break; - } - } - - if (gsi_end_p (gsi)) - /* We rewound past the beginning of the BB. */ - gsi = gsi_start_bb (bb); - else - /* Undo the rewind. */ - gsi_next (&gsi); - - if (remove) - { - if (gimple_vdef (call)) - replace_uses_by (gimple_vdef (call), gimple_vuse (call)); - if (gimple_call_lhs (call)) - { - /* Propagate the data dependency var. */ - gimple *ass = gimple_build_assign (gimple_call_lhs (call), - gimple_call_arg (call, 1)); - gsi_replace (&gsi, ass, false); - } - else - gsi_remove (&gsi, true); - } - else if (!rescan) - /* If not rescanning, advance over the call. */ - gsi_next (&gsi); - } - - free_oacc_loop (loops); - - return 0; -} - -/* Default launch dimension validator. Force everything to 1. A - backend that wants to provide larger dimensions must override this - hook. */ - -bool -default_goacc_validate_dims (tree ARG_UNUSED (decl), int *dims, - int ARG_UNUSED (fn_level)) -{ - bool changed = false; - - for (unsigned ix = 0; ix != GOMP_DIM_MAX; ix++) - { - if (dims[ix] != 1) - { - dims[ix] = 1; - changed = true; - } - } - - return changed; -} - -/* Default dimension bound is unknown on accelerator and 1 on host. */ - -int -default_goacc_dim_limit (int ARG_UNUSED (axis)) -{ -#ifdef ACCEL_COMPILER - return 0; -#else - return 1; -#endif -} - -namespace { - -const pass_data pass_data_oacc_device_lower = -{ - GIMPLE_PASS, /* type */ - "oaccdevlow", /* name */ - OPTGROUP_OPENMP, /* optinfo_flags */ - TV_NONE, /* tv_id */ - PROP_cfg, /* properties_required */ - 0 /* Possibly PROP_gimple_eomp. */, /* properties_provided */ - 0, /* properties_destroyed */ - 0, /* todo_flags_start */ - TODO_update_ssa | TODO_cleanup_cfg, /* todo_flags_finish */ -}; - -class pass_oacc_device_lower : public gimple_opt_pass -{ -public: - pass_oacc_device_lower (gcc::context *ctxt) - : gimple_opt_pass (pass_data_oacc_device_lower, ctxt) - {} - - /* opt_pass methods: */ - virtual bool gate (function *) { return flag_openacc; }; - - virtual unsigned int execute (function *) - { - return execute_oacc_device_lower (); - } - -}; // class pass_oacc_device_lower - -} // anon namespace - -gimple_opt_pass * -make_pass_oacc_device_lower (gcc::context *ctxt) -{ - return new pass_oacc_device_lower (ctxt); -} - - -/* Cleanup uses of SIMT placeholder internal functions: on non-SIMT targets, - VF is 1 and LANE is 0; on SIMT targets, VF is folded to a constant, and - LANE is kept to be expanded to RTL later on. Also cleanup all other SIMT - internal functions on non-SIMT targets, and likewise some SIMD internal - functions on SIMT targets. */ - -static unsigned int -execute_omp_device_lower () -{ - int vf = targetm.simt.vf ? targetm.simt.vf () : 1; - basic_block bb; - gimple_stmt_iterator gsi; - FOR_EACH_BB_FN (bb, cfun) - for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) - { - gimple *stmt = gsi_stmt (gsi); - if (!is_gimple_call (stmt) || !gimple_call_internal_p (stmt)) - continue; - tree lhs = gimple_call_lhs (stmt), rhs = NULL_TREE; - tree type = lhs ? TREE_TYPE (lhs) : integer_type_node; - switch (gimple_call_internal_fn (stmt)) - { - case IFN_GOMP_USE_SIMT: - rhs = vf == 1 ? integer_zero_node : integer_one_node; - break; - case IFN_GOMP_SIMT_LANE: - case IFN_GOMP_SIMT_LAST_LANE: - rhs = vf == 1 ? build_zero_cst (type) : NULL_TREE; - break; - case IFN_GOMP_SIMT_VF: - rhs = build_int_cst (type, vf); - break; - case IFN_GOMP_SIMT_ORDERED_PRED: - rhs = vf == 1 ? integer_zero_node : NULL_TREE; - if (rhs || !lhs) - unlink_stmt_vdef (stmt); - break; - case IFN_GOMP_SIMT_VOTE_ANY: - case IFN_GOMP_SIMT_XCHG_BFLY: - case IFN_GOMP_SIMT_XCHG_IDX: - rhs = vf == 1 ? gimple_call_arg (stmt, 0) : NULL_TREE; - break; - case IFN_GOMP_SIMD_LANE: - case IFN_GOMP_SIMD_LAST_LANE: - rhs = vf != 1 ? build_zero_cst (type) : NULL_TREE; - break; - case IFN_GOMP_SIMD_VF: - rhs = vf != 1 ? build_one_cst (type) : NULL_TREE; - break; - default: - continue; - } - if (lhs && !rhs) - continue; - stmt = lhs ? gimple_build_assign (lhs, rhs) : gimple_build_nop (); - gsi_replace (&gsi, stmt, false); - } - if (vf != 1) - cfun->has_force_vectorize_loops = false; - return 0; -} - -namespace { - -const pass_data pass_data_omp_device_lower = -{ - GIMPLE_PASS, /* type */ - "ompdevlow", /* name */ - OPTGROUP_OPENMP, /* optinfo_flags */ - TV_NONE, /* tv_id */ - PROP_cfg, /* properties_required */ - PROP_gimple_lomp_dev, /* properties_provided */ - 0, /* properties_destroyed */ - 0, /* todo_flags_start */ - TODO_update_ssa, /* todo_flags_finish */ -}; - -class pass_omp_device_lower : public gimple_opt_pass -{ -public: - pass_omp_device_lower (gcc::context *ctxt) - : gimple_opt_pass (pass_data_omp_device_lower, ctxt) - {} - - /* opt_pass methods: */ - virtual bool gate (function *ARG_UNUSED (fun)) - { - /* FIXME: this should use PROP_gimple_lomp_dev. */ -#ifdef ACCEL_COMPILER - return true; -#else - return ENABLE_OFFLOADING && (flag_openmp || in_lto_p); -#endif - } - virtual unsigned int execute (function *) - { - return execute_omp_device_lower (); - } - -}; // class pass_expand_omp_ssa - -} // anon namespace - -gimple_opt_pass * -make_pass_omp_device_lower (gcc::context *ctxt) -{ - return new pass_omp_device_lower (ctxt); -} - -/* "omp declare target link" handling pass. */ - -namespace { - -const pass_data pass_data_omp_target_link = -{ - GIMPLE_PASS, /* type */ - "omptargetlink", /* name */ - OPTGROUP_OPENMP, /* optinfo_flags */ - TV_NONE, /* tv_id */ - PROP_ssa, /* properties_required */ - 0, /* properties_provided */ - 0, /* properties_destroyed */ - 0, /* todo_flags_start */ - TODO_update_ssa, /* todo_flags_finish */ -}; - -class pass_omp_target_link : public gimple_opt_pass -{ -public: - pass_omp_target_link (gcc::context *ctxt) - : gimple_opt_pass (pass_data_omp_target_link, ctxt) - {} - - /* opt_pass methods: */ - virtual bool gate (function *fun) - { -#ifdef ACCEL_COMPILER - tree attrs = DECL_ATTRIBUTES (fun->decl); - return lookup_attribute ("omp declare target", attrs) - || lookup_attribute ("omp target entrypoint", attrs); -#else - (void) fun; - return false; -#endif - } - - virtual unsigned execute (function *); -}; - -/* Callback for walk_gimple_stmt used to scan for link var operands. */ - -static tree -find_link_var_op (tree *tp, int *walk_subtrees, void *) -{ - tree t = *tp; - - if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t) - && lookup_attribute ("omp declare target link", DECL_ATTRIBUTES (t))) - { - *walk_subtrees = 0; - return t; - } - - return NULL_TREE; -} - -unsigned -pass_omp_target_link::execute (function *fun) -{ - basic_block bb; - FOR_EACH_BB_FN (bb, fun) - { - gimple_stmt_iterator gsi; - for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) - if (walk_gimple_stmt (&gsi, NULL, find_link_var_op, NULL)) - gimple_regimplify_operands (gsi_stmt (gsi), &gsi); - } - - return 0; -} - -} // anon namespace - -gimple_opt_pass * -make_pass_omp_target_link (gcc::context *ctxt) -{ - return new pass_omp_target_link (ctxt); -} #include "gt-omp-low.h" |