diff options
Diffstat (limited to 'gcc/omp-expand.c')
-rw-r--r-- | gcc/omp-expand.c | 142 |
1 files changed, 90 insertions, 52 deletions
diff --git a/gcc/omp-expand.c b/gcc/omp-expand.c index 6583c88..4baec32 100644 --- a/gcc/omp-expand.c +++ b/gcc/omp-expand.c @@ -1510,8 +1510,8 @@ struct oacc_collapse static tree expand_oacc_collapse_init (const struct omp_for_data *fd, gimple_stmt_iterator *gsi, - oacc_collapse *counts, tree bound_type, - location_t loc) + oacc_collapse *counts, tree diff_type, + tree bound_type, location_t loc) { tree tiling = fd->tiling; tree total = build_int_cst (bound_type, 1); @@ -1528,17 +1528,12 @@ expand_oacc_collapse_init (const struct omp_for_data *fd, const omp_for_data_loop *loop = &fd->loops[ix]; tree iter_type = TREE_TYPE (loop->v); - tree diff_type = iter_type; tree plus_type = iter_type; gcc_assert (loop->cond_code == fd->loop.cond_code); if (POINTER_TYPE_P (iter_type)) plus_type = sizetype; - if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type)) - diff_type = signed_type_for (diff_type); - if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node)) - diff_type = integer_type_node; if (tiling) { @@ -1626,7 +1621,8 @@ expand_oacc_collapse_init (const struct omp_for_data *fd, static void expand_oacc_collapse_vars (const struct omp_for_data *fd, bool inner, gimple_stmt_iterator *gsi, - const oacc_collapse *counts, tree ivar) + const oacc_collapse *counts, tree ivar, + tree diff_type) { tree ivar_type = TREE_TYPE (ivar); @@ -1638,7 +1634,6 @@ expand_oacc_collapse_vars (const struct omp_for_data *fd, bool inner, const oacc_collapse *collapse = &counts[ix]; tree v = inner ? loop->v : collapse->outer; tree iter_type = TREE_TYPE (v); - tree diff_type = TREE_TYPE (collapse->step); tree plus_type = iter_type; enum tree_code plus_code = PLUS_EXPR; tree expr; @@ -1660,7 +1655,7 @@ expand_oacc_collapse_vars (const struct omp_for_data *fd, bool inner, } expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr), - collapse->step); + fold_convert (diff_type, collapse->step)); expr = fold_build2 (plus_code, iter_type, inner ? collapse->outer : collapse->base, fold_convert (plus_type, expr)); @@ -2514,7 +2509,8 @@ expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi, && (TREE_CODE (fd->loop.n2) == INTEGER_CST || fd->first_inner_iterations) && (optab_handler (sqrt_optab, TYPE_MODE (double_type_node)) - != CODE_FOR_nothing)) + != CODE_FOR_nothing) + && !integer_zerop (fd->loop.n2)) { tree outer_n1 = fd->adjn1 ? fd->adjn1 : fd->loops[i - 1].n1; tree itype = TREE_TYPE (fd->loops[i].v); @@ -4255,8 +4251,7 @@ expand_omp_for_generic (struct omp_region *region, : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a); t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, false, GSI_CONTINUE_LINKING); - assign_stmt = gimple_build_assign (dest, t); - gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); + expand_omp_build_assign (&gsi, dest, t, true); } if (fd->collapse > 1) expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar); @@ -4309,13 +4304,18 @@ expand_omp_for_generic (struct omp_region *region, gsi = gsi_last_bb (l0_bb); expand_omp_build_assign (&gsi, counts[fd->collapse - 1], istart0, true); - gsi = gsi_last_bb (cont_bb); - t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[fd->collapse - 1], - build_int_cst (fd->iter_type, 1)); - expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t); - tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered], - size_zero_node, NULL_TREE, NULL_TREE); - expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]); + if (cont_bb) + { + gsi = gsi_last_bb (cont_bb); + t = fold_build2 (PLUS_EXPR, fd->iter_type, + counts[fd->collapse - 1], + build_int_cst (fd->iter_type, 1)); + expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t); + tree aref = build4 (ARRAY_REF, fd->iter_type, + counts[fd->ordered], size_zero_node, + NULL_TREE, NULL_TREE); + expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]); + } t = counts[fd->collapse - 1]; } else if (fd->collapse > 1) @@ -5250,8 +5250,7 @@ expand_omp_for_static_nochunk (struct omp_region *region, : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a); t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, false, GSI_CONTINUE_LINKING); - assign_stmt = gimple_build_assign (dest, t); - gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); + expand_omp_build_assign (&gsi, dest, t, true); } if (fd->collapse > 1) { @@ -5974,8 +5973,7 @@ expand_omp_for_static_chunk (struct omp_region *region, : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a); t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, false, GSI_CONTINUE_LINKING); - assign_stmt = gimple_build_assign (dest, t); - gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING); + expand_omp_build_assign (&gsi, dest, t, true); } if (fd->collapse > 1) expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar); @@ -7415,6 +7413,21 @@ expand_omp_taskloop_for_inner (struct omp_region *region, static void expand_oacc_for (struct omp_region *region, struct omp_for_data *fd) { + bool is_oacc_kernels_parallelized + = (lookup_attribute ("oacc kernels parallelized", + DECL_ATTRIBUTES (current_function_decl)) != NULL); + { + bool is_oacc_kernels + = (lookup_attribute ("oacc kernels", + DECL_ATTRIBUTES (current_function_decl)) != NULL); + if (is_oacc_kernels_parallelized) + gcc_checking_assert (is_oacc_kernels); + } + gcc_assert (gimple_in_ssa_p (cfun) == is_oacc_kernels_parallelized); + /* In the following, some of the 'gimple_in_ssa_p (cfun)' conditionals are + for SSA specifics, and some are for 'parloops' OpenACC + 'kernels'-parallelized specifics. */ + tree v = fd->loop.v; enum tree_code cond_code = fd->loop.cond_code; enum tree_code plus_code = PLUS_EXPR; @@ -7436,6 +7449,12 @@ expand_oacc_for (struct omp_region *region, struct omp_for_data *fd) plus_code = POINTER_PLUS_EXPR; plus_type = sizetype; } + for (int ix = fd->collapse; ix--;) + { + tree diff_type2 = TREE_TYPE (fd->loops[ix].step); + if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (diff_type2)) + diff_type = diff_type2; + } if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type)) diff_type = signed_type_for (diff_type); if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node)) @@ -7519,7 +7538,7 @@ expand_oacc_for (struct omp_region *region, struct omp_for_data *fd) { gcc_assert (!gimple_in_ssa_p (cfun) && up); counts = XALLOCAVEC (struct oacc_collapse, fd->collapse); - tree total = expand_oacc_collapse_init (fd, &gsi, counts, + tree total = expand_oacc_collapse_init (fd, &gsi, counts, diff_type, TREE_TYPE (fd->loop.n2), loc); if (SSA_VAR_P (fd->loop.n2)) @@ -7681,7 +7700,7 @@ expand_oacc_for (struct omp_region *region, struct omp_for_data *fd) gsi_insert_before (&gsi, ass, GSI_SAME_STMT); if (fd->collapse > 1 || fd->tiling) - expand_oacc_collapse_vars (fd, false, &gsi, counts, v); + expand_oacc_collapse_vars (fd, false, &gsi, counts, v, diff_type); if (fd->tiling) { @@ -7751,7 +7770,8 @@ expand_oacc_for (struct omp_region *region, struct omp_for_data *fd) /* Initialize the user's loop vars. */ gsi = gsi_start_bb (elem_body_bb); - expand_oacc_collapse_vars (fd, true, &gsi, counts, e_offset); + expand_oacc_collapse_vars (fd, true, &gsi, counts, e_offset, + diff_type); } } @@ -9240,7 +9260,7 @@ expand_omp_target (struct omp_region *region) gomp_target *entry_stmt; gimple *stmt; edge e; - bool offloaded, data_region; + bool offloaded; int target_kind; entry_stmt = as_a <gomp_target *> (last_stmt (region->entry)); @@ -9260,12 +9280,12 @@ expand_omp_target (struct omp_region *region) case GF_OMP_TARGET_KIND_OACC_UPDATE: case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA: case GF_OMP_TARGET_KIND_OACC_DECLARE: - data_region = false; - break; + case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED: + case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE: case GF_OMP_TARGET_KIND_DATA: case GF_OMP_TARGET_KIND_OACC_DATA: case GF_OMP_TARGET_KIND_OACC_HOST_DATA: - data_region = true; + case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS: break; default: gcc_unreachable (); @@ -9287,27 +9307,43 @@ expand_omp_target (struct omp_region *region) entry_bb = region->entry; exit_bb = region->exit; + if (target_kind == GF_OMP_TARGET_KIND_OACC_KERNELS) + mark_loops_in_oacc_kernels_region (region->entry, region->exit); + + /* Going on, all OpenACC compute constructs are mapped to + 'BUILT_IN_GOACC_PARALLEL', and get their compute regions outlined. + To distinguish between them, we attach attributes. */ switch (target_kind) { + case GF_OMP_TARGET_KIND_OACC_PARALLEL: + DECL_ATTRIBUTES (child_fn) + = tree_cons (get_identifier ("oacc parallel"), + NULL_TREE, DECL_ATTRIBUTES (child_fn)); + break; case GF_OMP_TARGET_KIND_OACC_KERNELS: - mark_loops_in_oacc_kernels_region (region->entry, region->exit); - - /* Further down, all OpenACC compute constructs will be mapped to - BUILT_IN_GOACC_PARALLEL, and to distinguish between them, there - is an "oacc kernels" attribute set for OpenACC kernels. */ DECL_ATTRIBUTES (child_fn) = tree_cons (get_identifier ("oacc kernels"), NULL_TREE, DECL_ATTRIBUTES (child_fn)); break; case GF_OMP_TARGET_KIND_OACC_SERIAL: - /* Further down, all OpenACC compute constructs will be mapped to - BUILT_IN_GOACC_PARALLEL, and to distinguish between them, there - is an "oacc serial" attribute set for OpenACC serial. */ DECL_ATTRIBUTES (child_fn) = tree_cons (get_identifier ("oacc serial"), NULL_TREE, DECL_ATTRIBUTES (child_fn)); break; + case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED: + DECL_ATTRIBUTES (child_fn) + = tree_cons (get_identifier ("oacc parallel_kernels_parallelized"), + NULL_TREE, DECL_ATTRIBUTES (child_fn)); + break; + case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE: + DECL_ATTRIBUTES (child_fn) + = tree_cons (get_identifier ("oacc parallel_kernels_gang_single"), + NULL_TREE, DECL_ATTRIBUTES (child_fn)); + break; default: + /* Make sure we don't miss any. */ + gcc_checking_assert (!(is_gimple_omp_oacc (entry_stmt) + && is_gimple_omp_offloaded (entry_stmt))); break; } @@ -9514,10 +9550,13 @@ expand_omp_target (struct omp_region *region) case GF_OMP_TARGET_KIND_OACC_PARALLEL: case GF_OMP_TARGET_KIND_OACC_KERNELS: case GF_OMP_TARGET_KIND_OACC_SERIAL: + case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED: + case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE: start_ix = BUILT_IN_GOACC_PARALLEL; break; case GF_OMP_TARGET_KIND_OACC_DATA: case GF_OMP_TARGET_KIND_OACC_HOST_DATA: + case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS: start_ix = BUILT_IN_GOACC_DATA_START; break; case GF_OMP_TARGET_KIND_OACC_UPDATE: @@ -9813,13 +9852,6 @@ expand_omp_target (struct omp_region *region) gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET); gsi_remove (&gsi, true); } - if (data_region && region->exit) - { - gsi = gsi_last_nondebug_bb (region->exit); - g = gsi_stmt (gsi); - gcc_assert (g && gimple_code (g) == GIMPLE_OMP_RETURN); - gsi_remove (&gsi, true); - } } /* Expand the parallel region tree rooted at REGION. Expansion @@ -9984,16 +10016,19 @@ build_omp_regions_1 (basic_block bb, struct omp_region *parent, switch (gimple_omp_target_kind (stmt)) { case GF_OMP_TARGET_KIND_REGION: - case GF_OMP_TARGET_KIND_DATA: case GF_OMP_TARGET_KIND_OACC_PARALLEL: case GF_OMP_TARGET_KIND_OACC_KERNELS: case GF_OMP_TARGET_KIND_OACC_SERIAL: - case GF_OMP_TARGET_KIND_OACC_DATA: - case GF_OMP_TARGET_KIND_OACC_HOST_DATA: + case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED: + case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE: break; case GF_OMP_TARGET_KIND_UPDATE: case GF_OMP_TARGET_KIND_ENTER_DATA: case GF_OMP_TARGET_KIND_EXIT_DATA: + case GF_OMP_TARGET_KIND_DATA: + case GF_OMP_TARGET_KIND_OACC_DATA: + case GF_OMP_TARGET_KIND_OACC_HOST_DATA: + case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS: case GF_OMP_TARGET_KIND_OACC_UPDATE: case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA: case GF_OMP_TARGET_KIND_OACC_DECLARE: @@ -10238,16 +10273,19 @@ omp_make_gimple_edges (basic_block bb, struct omp_region **region, switch (gimple_omp_target_kind (last)) { case GF_OMP_TARGET_KIND_REGION: - case GF_OMP_TARGET_KIND_DATA: case GF_OMP_TARGET_KIND_OACC_PARALLEL: case GF_OMP_TARGET_KIND_OACC_KERNELS: case GF_OMP_TARGET_KIND_OACC_SERIAL: - case GF_OMP_TARGET_KIND_OACC_DATA: - case GF_OMP_TARGET_KIND_OACC_HOST_DATA: + case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED: + case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE: break; case GF_OMP_TARGET_KIND_UPDATE: case GF_OMP_TARGET_KIND_ENTER_DATA: case GF_OMP_TARGET_KIND_EXIT_DATA: + case GF_OMP_TARGET_KIND_DATA: + case GF_OMP_TARGET_KIND_OACC_DATA: + case GF_OMP_TARGET_KIND_OACC_HOST_DATA: + case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS: case GF_OMP_TARGET_KIND_OACC_UPDATE: case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA: case GF_OMP_TARGET_KIND_OACC_DECLARE: |