From 629b3d75c8c5a244d891a9c292bca6912d4b0dd9 Mon Sep 17 00:00:00 2001 From: Martin Jambor Date: Wed, 14 Dec 2016 23:30:41 +0100 Subject: Split omp-low into multiple files 2016-12-14 Martin Jambor * omp-general.h: New file. * omp-general.c: New file. * omp-expand.h: Likewise. * omp-expand.c: Likewise. * omp-offload.h: Likewise. * omp-offload.c: Likewise. * omp-grid.c: Likewise. * omp-grid.c: Likewise. * omp-low.h: Include omp-general.h and omp-grid.h. Removed includes of params.h, symbol-summary.h, lto-section-names.h, cilk.h, tree-eh.h, ipa-prop.h, tree-cfgcleanup.h, cfgloop.h, except.h, expr.h, stmt.h, varasm.h, calls.h, explow.h, dojump.h, flags.h, tree-into-ssa.h, tree-cfg.h, cfganal.h, alias.h, emit-rtl.h, optabs.h, expmed.h, alloc-pool.h, cfghooks.h, rtl.h and memmodel.h. (omp_find_combined_for): Declare. (find_omp_clause): Renamed to omp_find_clause and moved to omp-general.h. (free_omp_regions): Renamed to omp_free_regions and moved to omp-expand.h. (replace_oacc_fn_attrib): Renamed to oacc_replace_fn_attrib and moved to omp-general.h. (set_oacc_fn_attrib): Renamed to oacc_set_fn_attrib and moved to omp-general.h. (build_oacc_routine_dims): Renamed to oacc_build_routine_dims and moved to omp-general.h. (get_oacc_fn_attrib): Renamed to oacc_get_fn_attrib and moved to omp-general.h. (oacc_fn_attrib_kernels_p): Moved to omp-general.h. (get_oacc_fn_dim_size): Renamed to oacc_get_fn_dim_size and moved to omp-general.c. (omp_expand_local): Moved to omp-expand.h. (make_gimple_omp_edges): Renamed to omp_make_gimple_edges and moved to omp-expand.h. (omp_finish_file): Moved to omp-offload.h. (default_goacc_validate_dims): Renamed to oacc_default_goacc_validate_dims and moved to omp-offload.h. (offload_funcs, offload_vars): Moved to omp-offload.h. * omp-low.c: Include omp-general.h, omp-offload.h and omp-grid.h. (omp_region): Moved to omp-expand.c. (omp_for_data_loop): Moved to omp-general.h. (omp_for_data): Likewise. (oacc_loop): Moved to omp-offload.c. (oacc_loop_flags): Moved to omp-general.h. (offload_funcs, offload_vars): Moved to omp-offload.c. (root_omp_region): Moved to omp-expand.c. (omp_any_child_fn_dumped): Likewise. (find_omp_clause): Renamed to omp_find_clause and moved to omp-general.c. (is_combined_parallel): Moved to omp-expand.c. (is_reference): Renamed to omp_is_reference and and moved to omp-general.c. (adjust_for_condition): Renamed to omp_adjust_for_condition and moved to omp-general.c. (get_omp_for_step_from_incr): Renamed to omp_get_for_step_from_incr and moved to omp-general.c. (extract_omp_for_data): Renamed to omp_extract_for_data and moved to omp-general.c. (workshare_safe_to_combine_p): Moved to omp-expand.c. (omp_adjust_chunk_size): Likewise. (get_ws_args_for): Likewise. (get_base_type): Removed. (dump_omp_region): Moved to omp-expand.c. (debug_omp_region): Likewise. (debug_all_omp_regions): Likewise. (new_omp_region): Likewise. (free_omp_region_1): Likewise. (free_omp_regions): Renamed to omp_free_regions and moved to omp-expand.c. (find_combined_for): Renamed to omp_find_combined_for, made global. (build_omp_barrier): Renamed to omp_build_barrier and moved to omp-general.c. (omp_max_vf): Moved to omp-general.c. (omp_max_simt_vf): Likewise. (gimple_build_cond_empty): Moved to omp-expand.c. (parallel_needs_hsa_kernel_p): Likewise. (expand_omp_build_assign): Moved declaration to omp-expand.c. (expand_parallel_call): Moved to omp-expand.c. (expand_cilk_for_call): Likewise. (expand_task_call): Likewise. (vec2chain): Likewise. (remove_exit_barrier): Likewise. (remove_exit_barriers): Likewise. (optimize_omp_library_calls): Likewise. (expand_omp_regimplify_p): Likewise. (expand_omp_build_assign): Likewise. (expand_omp_taskreg): Likewise. (oacc_collapse): Likewise. (expand_oacc_collapse_init): Likewise. (expand_oacc_collapse_vars): Likewise. (expand_omp_for_init_counts): Likewise. (expand_omp_for_init_vars): Likewise. (extract_omp_for_update_vars): Likewise. (expand_omp_ordered_source): Likewise. (expand_omp_ordered_sink): Likewise. (expand_omp_ordered_source_sink): Likewise. (expand_omp_for_ordered_loops): Likewise. (expand_omp_for_generic): Likewise. (expand_omp_for_static_nochunk): Likewise. (find_phi_with_arg_on_edge): Likewise. (expand_omp_for_static_chunk): Likewise. (expand_cilk_for): Likewise. (expand_omp_simd): Likewise. (expand_omp_taskloop_for_outer): Likewise. (expand_omp_taskloop_for_inner): Likewise. (expand_oacc_for): Likewise. (expand_omp_for): Likewise. (expand_omp_sections): Likewise. (expand_omp_single): Likewise. (expand_omp_synch): Likewise. (expand_omp_atomic_load): Likewise. (expand_omp_atomic_store): Likewise. (expand_omp_atomic_fetch_op): Likewise. (expand_omp_atomic_pipeline): Likewise. (expand_omp_atomic_mutex): Likewise. (expand_omp_atomic): Likewise. (oacc_launch_pack): and moved to omp-general.c, made public. (OACC_FN_ATTRIB): Likewise. (replace_oacc_fn_attrib): Renamed to oacc_replace_fn_attrib and moved to omp-general.c. (set_oacc_fn_attrib): Renamed to oacc_set_fn_attrib and moved to omp-general.c. (build_oacc_routine_dims): Renamed to oacc_build_routine_dims and moved to omp-general.c. (get_oacc_fn_attrib): Renamed to oacc_get_fn_attrib and moved to omp-general.c. (oacc_fn_attrib_kernels_p): Moved to omp-general.c. (oacc_fn_attrib_level): Moved to omp-offload.c. (get_oacc_fn_dim_size): Renamed to oacc_get_fn_dim_size and moved to omp-general.c. (get_oacc_ifn_dim_arg): Renamed to oacc_get_ifn_dim_arg and moved to omp-general.c. (mark_loops_in_oacc_kernels_region): Moved to omp-expand.c. (grid_launch_attributes_trees): Likewise. (grid_attr_trees): Likewise. (grid_create_kernel_launch_attr_types): Likewise. (grid_insert_store_range_dim): Likewise. (grid_get_kernel_launch_attributes): Likewise. (get_target_argument_identifier_1): Likewise. (get_target_argument_identifier): Likewise. (get_target_argument_value): Likewise. (push_target_argument_according_to_value): Likewise. (get_target_arguments): Likewise. (expand_omp_target): Likewise. (grid_expand_omp_for_loop): Moved to omp-grid.c. (grid_arg_decl_map): Likewise. (grid_remap_kernel_arg_accesses): Likewise. (grid_expand_target_grid_body): Likewise. (expand_omp): Renamed to omp_expand and moved to omp-expand.c. (build_omp_regions_1): Moved to omp-expand.c. (build_omp_regions_root): Likewise. (omp_expand_local): Likewise. (build_omp_regions): Likewise. (execute_expand_omp): Likewise. (pass_data_expand_omp): Likewise. (pass_expand_omp): Likewise. (make_pass_expand_omp): Likewise. (pass_data_expand_omp_ssa): Likewise. (pass_expand_omp_ssa): Likewise. (make_pass_expand_omp_ssa): Likewise. (grid_lastprivate_predicate): Renamed to omp_grid_lastprivate_predicate and moved to omp-grid.c, made public. (grid_prop): Moved to omp-grid.c. (GRID_MISSED_MSG_PREFIX): Likewise. (grid_safe_assignment_p): Likewise. (grid_seq_only_contains_local_assignments): Likewise. (grid_find_single_omp_among_assignments_1): Likewise. (grid_find_single_omp_among_assignments): Likewise. (grid_find_ungridifiable_statement): Likewise. (grid_parallel_clauses_gridifiable): Likewise. (grid_inner_loop_gridifiable_p): Likewise. (grid_dist_follows_simple_pattern): Likewise. (grid_gfor_follows_tiling_pattern): Likewise. (grid_call_permissible_in_distribute_p): Likewise. (grid_handle_call_in_distribute): Likewise. (grid_dist_follows_tiling_pattern): Likewise. (grid_target_follows_gridifiable_pattern): Likewise. (grid_remap_prebody_decls): Likewise. (grid_var_segment): Likewise. (grid_mark_variable_segment): Likewise. (grid_copy_leading_local_assignments): Likewise. (grid_process_grid_body): Likewise. (grid_eliminate_combined_simd_part): Likewise. (grid_mark_tiling_loops): Likewise. (grid_mark_tiling_parallels_and_loops): Likewise. (grid_process_kernel_body_copy): Likewise. (grid_attempt_target_gridification): Likewise. (grid_gridify_all_targets_stmt): Likewise. (grid_gridify_all_targets): Renamed to omp_grid_gridify_all_targets and moved to omp-grid.c, made public. (make_gimple_omp_edges): Renamed to omp_make_gimple_edges and moved to omp-expand.c. (add_decls_addresses_to_decl_constructor): Moved to omp-offload.c. (omp_finish_file): Likewise. (oacc_thread_numbers): Likewise. (oacc_xform_loop): Likewise. (oacc_default_dims, oacc_min_dims): Likewise. (oacc_parse_default_dims): Likewise. (oacc_validate_dims): Likewise. (new_oacc_loop_raw): Likewise. (new_oacc_loop_outer): Likewise. (new_oacc_loop): Likewise. (new_oacc_loop_routine): Likewise. (finish_oacc_loop): Likewise. (free_oacc_loop): Likewise. (dump_oacc_loop_part): Likewise. (dump_oacc_loop): Likewise. (debug_oacc_loop): Likewise. (oacc_loop_discover_walk): Likewise. (oacc_loop_sibling_nreverse): Likewise. (oacc_loop_discovery): Likewise. (oacc_loop_xform_head_tail): Likewise. (oacc_loop_xform_loop): Likewise. (oacc_loop_process): Likewise. (oacc_loop_fixed_partitions): Likewise. (oacc_loop_auto_partitions): Likewise. (oacc_loop_partition): Likewise. (default_goacc_fork_join): Likewise. (default_goacc_reduction): Likewise. (execute_oacc_device_lower): Likewise. (default_goacc_validate_dims): Likewise. (default_goacc_dim_limit): Likewise. (pass_data_oacc_device_lower): Likewise. (pass_oacc_device_lower): Likewise. (make_pass_oacc_device_lower): Likewise. (execute_omp_device_lower): Likewise. (pass_data_omp_device_lower): Likewise. (pass_omp_device_lower): Likewise. (make_pass_omp_device_lower): Likewise. (pass_data_omp_target_link): Likewise. (pass_omp_target_link): Likewise. (find_link_var_op): Likewise. (pass_omp_target_link::execute): Likewise. (make_pass_omp_target_link): Likewise. * Makefile.in (OBJS): Added omp-offload.o, omp-expand.o, omp-general.o and omp-grid.o. (GTFILES): Added omp-offload.h, omp-offload.c and omp-expand.c, removed omp-low.h. * gimple-fold.c: Include omp-general.h instead of omp-low.h. (fold_internal_goacc_dim): Adjusted calls to get_oacc_ifn_dim_arg and get_oacc_fn_dim_size to use their new names. * gimplify.c: Include omp-low.h. (omp_notice_variable): Adjust the call to get_oacc_fn_attrib to use its new name. (gimplify_omp_task): Adjusted calls to find_omp_clause to use its new name. (gimplify_omp_for): Likewise. * lto-cgraph.c: Include omp-offload.h instead of omp-low.h. * toplev.c: Include omp-offload.h instead of omp-low.h. * tree-cfg.c: Include omp-general.h instead of omp-low.h. Also include omp-expand.h. (make_edges_bb): Adjusted the call to make_gimple_omp_edges to use its new name. (make_edges): Adjust the call to free_omp_regions to use its new name. * tree-parloops.c: Include omp-general.h. (create_parallel_loop): Adjusted the call to set_oacc_fn_attrib to use its new name. (parallelize_loops): Adjusted the call to get_oacc_fn_attrib to use its new name. * tree-ssa-loop.c: Include omp-general.h instead of omp-low.h. (gate_oacc_kernels): Adjusted the call to get_oacc_fn_attrib to use its new name. * tree-vrp.c: Include omp-general.h instead of omp-low.h. (extract_range_basic): Adjusted calls to get_oacc_ifn_dim_arg and get_oacc_fn_dim_size to use their new names. * varpool.c: Include omp-offload.h instead of omp-low.h. * gengtype.c (open_base_files): Replace omp-low.h with omp-offload.h in ifiles. * config/nvptx/nvptx.c: Include omp-general.c. (nvptx_expand_call): Adjusted the call to get_oacc_fn_attrib to use its new name. (nvptx_reorg): Likewise. (nvptx_record_offload_symbol): Likewise. gcc/c-family: * c-omp.c: Include omp-general.h instead of omp-low.h. (c_finish_oacc_wait): Adjusted call to find_omp_clause to use its new name. gcc/c/ * c-parser.c: Include omp-general.h and omp-offload.h instead of omp-low.h. (c_finish_oacc_routine): Adjusted call to get_oacc_fn_attrib, build_oacc_routine_dims and replace_oacc_fn_attrib to use their new names. (c_parser_oacc_enter_exit_data): Adjusted call to find_omp_clause to use its new name. (c_parser_oacc_update): Likewise. (c_parser_omp_simd): Likewise. (c_parser_omp_target_update): Likewise. * c-typeck.c: Include omp-general.h instead of omp-low.h. (c_finish_omp_cancel): Adjusted call to find_omp_clause to use its new name. (c_finish_omp_cancellation_point): Likewise. * gimple-parser.c: Do not include omp-low.h gcc/cp/ * parser.c: Include omp-general.h and omp-offload.h instead of omp-low.h. (cp_parser_omp_simd): Adjusted calls to find_omp_clause to use its new name. (cp_parser_omp_target_update): Likewise. (cp_parser_oacc_declare): Likewise. (cp_parser_oacc_enter_exit_data): Likewise. (cp_parser_oacc_update): Likewise. (cp_finalize_oacc_routine): Adjusted call to get_oacc_fn_attrib, build_oacc_routine_dims and replace_oacc_fn_attrib to use their new names. * semantics.c: Include omp-general insteda of omp-low.h. (finish_omp_for): Adjusted calls to find_omp_clause to use its new name. (finish_omp_cancel): Likewise. (finish_omp_cancellation_point): Likewise. fortran/ * trans-openmp.c: Include omp-general.h. From-SVN: r243673 --- gcc/omp-grid.c | 1407 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1407 insertions(+) create mode 100644 gcc/omp-grid.c (limited to 'gcc/omp-grid.c') diff --git a/gcc/omp-grid.c b/gcc/omp-grid.c new file mode 100644 index 0000000..81f6ea5 --- /dev/null +++ b/gcc/omp-grid.c @@ -0,0 +1,1407 @@ +/* Lowering and expansion of OpenMP directives for HSA GPU agents. + + Copyright (C) 2013-2016 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "backend.h" +#include "tree.h" +#include "gimple.h" +#include "tree-pass.h" +#include "ssa.h" +#include "cgraph.h" +#include "pretty-print.h" +#include "fold-const.h" +#include "gimplify.h" +#include "gimple-iterator.h" +#include "gimple-walk.h" +#include "tree-inline.h" +#include "langhooks.h" +#include "omp-general.h" +#include "omp-low.h" +#include "omp-grid.h" +#include "gimple-pretty-print.h" + +/* Return the lastprivate predicate for a given gridified loop described by + FD). */ + +tree +omp_grid_lastprivate_predicate (struct omp_for_data *fd) +{ + /* When dealing with a gridified loop, we need to check up to three collapsed + iteration variables but they are not actually captured in this fd. + Fortunately, we can easily rely on HSA builtins to get this + information. */ + + tree id, size; + if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP + && gimple_omp_for_grid_intra_group (fd->for_stmt)) + { + id = builtin_decl_explicit (BUILT_IN_HSA_WORKITEMID); + size = builtin_decl_explicit (BUILT_IN_HSA_CURRENTWORKGROUPSIZE); + } + else + { + id = builtin_decl_explicit (BUILT_IN_HSA_WORKITEMABSID); + size = builtin_decl_explicit (BUILT_IN_HSA_GRIDSIZE); + } + tree cond = NULL; + for (int dim = 0; dim < fd->collapse; dim++) + { + tree dim_tree = build_int_cstu (unsigned_type_node, dim); + tree u1 = build_int_cstu (unsigned_type_node, 1); + tree c2 + = build2 (EQ_EXPR, boolean_type_node, + build2 (PLUS_EXPR, unsigned_type_node, + build_call_expr (id, 1, dim_tree), u1), + build_call_expr (size, 1, dim_tree)); + if (cond) + cond = build2 (TRUTH_AND_EXPR, boolean_type_node, cond, c2); + else + cond = c2; + } + return cond; +} + +/* Structure describing the basic properties of the loop we ara analyzing + whether it can be gridified and when it is gridified. */ + +struct grid_prop +{ + /* True when we are doing tiling gridification, i.e. when there is a distinct + distribute loop over groups and a loop construct over work-items. False + when distribute and parallel for loops form a combined construct. */ + bool tiling; + /* Location of the target construct for optimization information + messages. */ + location_t target_loc; + /* The collapse clause of the involved loops. Collapse value of all of them + must be the same for gridification to take place. */ + size_t collapse; + /* Group sizes, if requested by the user or NULL if not requested. */ + tree group_sizes[3]; +}; + +#define GRID_MISSED_MSG_PREFIX "Will not turn target construct into a " \ + "gridified HSA kernel because " + +/* Return true if STMT is an assignment of a register-type into a local + VAR_DECL. If GRID is non-NULL, the assignment additionally must not be to + any of the trees specifying group sizes there. */ + +static bool +grid_safe_assignment_p (gimple *stmt, grid_prop *grid) +{ + gassign *assign = dyn_cast (stmt); + if (!assign) + return false; + if (gimple_clobber_p (assign)) + return true; + tree lhs = gimple_assign_lhs (assign); + if (!VAR_P (lhs) + || !is_gimple_reg_type (TREE_TYPE (lhs)) + || is_global_var (lhs)) + return false; + if (grid) + for (unsigned i = 0; i < grid->collapse; i++) + if (lhs == grid->group_sizes[i]) + return false; + return true; +} + +/* Return true if all statements in SEQ are assignments to local register-type + variables that do not hold group size information. */ + +static bool +grid_seq_only_contains_local_assignments (gimple_seq seq, grid_prop *grid) +{ + if (!seq) + return true; + + gimple_stmt_iterator gsi; + for (gsi = gsi_start (seq); !gsi_end_p (gsi); gsi_next (&gsi)) + if (!grid_safe_assignment_p (gsi_stmt (gsi), grid)) + return false; + return true; +} + +/* Scan statements in SEQ and call itself recursively on any bind. GRID + describes hitherto discovered properties of the loop that is evaluated for + possible gridification. If during whole search only assignments to + register-type local variables (that do not overwrite group size information) + and one single OMP statement is encountered, return true, otherwise return + false. RET is where we store any OMP statement encountered. */ + +static bool +grid_find_single_omp_among_assignments_1 (gimple_seq seq, grid_prop *grid, + const char *name, gimple **ret) +{ + gimple_stmt_iterator gsi; + for (gsi = gsi_start (seq); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple *stmt = gsi_stmt (gsi); + + if (grid_safe_assignment_p (stmt, grid)) + continue; + if (gbind *bind = dyn_cast (stmt)) + { + if (!grid_find_single_omp_among_assignments_1 (gimple_bind_body (bind), + grid, name, ret)) + return false; + } + else if (is_gimple_omp (stmt)) + { + if (*ret) + { + if (dump_enabled_p ()) + { + dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, + GRID_MISSED_MSG_PREFIX "%s construct " + "contains multiple OpenMP constructs\n", + name); + dump_printf_loc (MSG_NOTE, gimple_location (*ret), + "The first OpenMP construct within " + "a parallel\n"); + dump_printf_loc (MSG_NOTE, gimple_location (stmt), + "The second OpenMP construct within " + "a parallel\n"); + } + return false; + } + *ret = stmt; + } + else + { + if (dump_enabled_p ()) + { + dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, + GRID_MISSED_MSG_PREFIX "%s construct contains " + "a complex statement\n", name); + dump_printf_loc (MSG_NOTE, gimple_location (stmt), + "This statement cannot be analyzed for " + "gridification\n"); + } + return false; + } + } + return true; +} + +/* Scan statements in SEQ and make sure that it and any binds in it contain + only assignments to local register-type variables (that do not overwrite + group size information) and one OMP construct. If so, return that + construct, otherwise return NULL. GRID describes hitherto discovered + properties of the loop that is evaluated for possible gridification. If + dumping is enabled and function fails, use NAME to dump a note with the + reason for failure. */ + +static gimple * +grid_find_single_omp_among_assignments (gimple_seq seq, grid_prop *grid, + const char *name) +{ + if (!seq) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, + GRID_MISSED_MSG_PREFIX "%s construct has empty body\n", + name); + return NULL; + } + + gimple *ret = NULL; + if (grid_find_single_omp_among_assignments_1 (seq, grid, name, &ret)) + { + if (!ret && dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, + GRID_MISSED_MSG_PREFIX "%s construct does not contain" + "any other OpenMP construct\n", name); + return ret; + } + else + return NULL; +} + +/* Walker function looking for statements there is no point gridifying (and for + noreturn function calls which we cannot do). Return non-NULL if such a + function is found. */ + +static tree +grid_find_ungridifiable_statement (gimple_stmt_iterator *gsi, + bool *handled_ops_p, + struct walk_stmt_info *wi) +{ + *handled_ops_p = false; + gimple *stmt = gsi_stmt (*gsi); + switch (gimple_code (stmt)) + { + case GIMPLE_CALL: + if (gimple_call_noreturn_p (as_a (stmt))) + { + *handled_ops_p = true; + wi->info = stmt; + return error_mark_node; + } + break; + + /* We may reduce the following list if we find a way to implement the + clauses, but now there is no point trying further. */ + case GIMPLE_OMP_CRITICAL: + case GIMPLE_OMP_TASKGROUP: + case GIMPLE_OMP_TASK: + case GIMPLE_OMP_SECTION: + case GIMPLE_OMP_SECTIONS: + case GIMPLE_OMP_SECTIONS_SWITCH: + case GIMPLE_OMP_TARGET: + case GIMPLE_OMP_ORDERED: + *handled_ops_p = true; + wi->info = stmt; + return error_mark_node; + default: + break; + } + return NULL; +} + +/* Examine clauses of omp parallel statement PAR and if any prevents + gridification, issue a missed-optimization diagnostics and return false, + otherwise return true. GRID describes hitherto discovered properties of the + loop that is evaluated for possible gridification. */ + +static bool +grid_parallel_clauses_gridifiable (gomp_parallel *par, location_t tloc) +{ + tree clauses = gimple_omp_parallel_clauses (par); + while (clauses) + { + switch (OMP_CLAUSE_CODE (clauses)) + { + case OMP_CLAUSE_NUM_THREADS: + if (dump_enabled_p ()) + { + dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, + GRID_MISSED_MSG_PREFIX "because there is " + "a num_threads clause of the parallel " + "construct\n"); + dump_printf_loc (MSG_NOTE, gimple_location (par), + "Parallel construct has a num_threads clause\n"); + } + return false; + + case OMP_CLAUSE_REDUCTION: + if (dump_enabled_p ()) + { + dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, + GRID_MISSED_MSG_PREFIX "a reduction clause" + "is present\n "); + dump_printf_loc (MSG_NOTE, gimple_location (par), + "Parallel construct has a reduction clause\n"); + } + return false; + + default: + break; + } + clauses = OMP_CLAUSE_CHAIN (clauses); + } + return true; +} + +/* Examine clauses and the body of omp loop statement GFOR and if something + prevents gridification, issue a missed-optimization diagnostics and return + false, otherwise return true. GRID describes hitherto discovered properties + of the loop that is evaluated for possible gridification. */ + +static bool +grid_inner_loop_gridifiable_p (gomp_for *gfor, grid_prop *grid) +{ + if (!grid_seq_only_contains_local_assignments (gimple_omp_for_pre_body (gfor), + grid)) + { + if (dump_enabled_p ()) + { + dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, + GRID_MISSED_MSG_PREFIX "the inner loop " + "loop bounds computation contains a complex " + "statement\n"); + dump_printf_loc (MSG_NOTE, gimple_location (gfor), + "Loop construct cannot be analyzed for " + "gridification\n"); + } + return false; + } + + tree clauses = gimple_omp_for_clauses (gfor); + while (clauses) + { + switch (OMP_CLAUSE_CODE (clauses)) + { + case OMP_CLAUSE_SCHEDULE: + if (OMP_CLAUSE_SCHEDULE_KIND (clauses) != OMP_CLAUSE_SCHEDULE_AUTO) + { + if (dump_enabled_p ()) + { + dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, + GRID_MISSED_MSG_PREFIX "the inner loop " + "has a non-automatic schedule clause\n"); + dump_printf_loc (MSG_NOTE, gimple_location (gfor), + "Loop construct has a non automatic " + "schedule clause\n"); + } + return false; + } + break; + + case OMP_CLAUSE_REDUCTION: + if (dump_enabled_p ()) + { + dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, + GRID_MISSED_MSG_PREFIX "a reduction " + "clause is present\n "); + dump_printf_loc (MSG_NOTE, gimple_location (gfor), + "Loop construct has a reduction schedule " + "clause\n"); + } + return false; + + default: + break; + } + clauses = OMP_CLAUSE_CHAIN (clauses); + } + struct walk_stmt_info wi; + memset (&wi, 0, sizeof (wi)); + if (walk_gimple_seq (gimple_omp_body (gfor), + grid_find_ungridifiable_statement, + NULL, &wi)) + { + gimple *bad = (gimple *) wi.info; + if (dump_enabled_p ()) + { + if (is_gimple_call (bad)) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, + GRID_MISSED_MSG_PREFIX "the inner loop contains " + "call to a noreturn function\n"); + else + dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, + GRID_MISSED_MSG_PREFIX "the inner loop contains " + "statement %s which cannot be transformed\n", + gimple_code_name[(int) gimple_code (bad)]); + dump_printf_loc (MSG_NOTE, gimple_location (bad), + "This statement cannot be analyzed for " + "gridification\n"); + } + return false; + } + return true; +} + +/* Given distribute omp construct represented by DIST, which in the original + source forms a compound construct with a looping construct, return true if it + can be turned into a gridified HSA kernel. Otherwise return false. GRID + describes hitherto discovered properties of the loop that is evaluated for + possible gridification. */ + +static bool +grid_dist_follows_simple_pattern (gomp_for *dist, grid_prop *grid) +{ + location_t tloc = grid->target_loc; + gimple *stmt = grid_find_single_omp_among_assignments (gimple_omp_body (dist), + grid, "distribute"); + gomp_parallel *par; + if (!stmt + || !(par = dyn_cast (stmt)) + || !grid_parallel_clauses_gridifiable (par, tloc)) + return false; + + stmt = grid_find_single_omp_among_assignments (gimple_omp_body (par), grid, + "parallel"); + gomp_for *gfor; + if (!stmt || !(gfor = dyn_cast (stmt))) + return false; + + if (gimple_omp_for_kind (gfor) != GF_OMP_FOR_KIND_FOR) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, + GRID_MISSED_MSG_PREFIX "the inner loop is not " + "a simple for loop\n"); + return false; + } + gcc_assert (gimple_omp_for_collapse (gfor) == grid->collapse); + + if (!grid_inner_loop_gridifiable_p (gfor, grid)) + return false; + + return true; +} + +/* Given an omp loop statement GFOR, return true if it can participate in + tiling gridification, i.e. in one where the distribute and parallel for + loops do not form a compound statement. GRID describes hitherto discovered + properties of the loop that is evaluated for possible gridification. */ + +static bool +grid_gfor_follows_tiling_pattern (gomp_for *gfor, grid_prop *grid) +{ + if (gimple_omp_for_kind (gfor) != GF_OMP_FOR_KIND_FOR) + { + if (dump_enabled_p ()) + { + dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, + GRID_MISSED_MSG_PREFIX "an inner loop is not " + "a simple for loop\n"); + dump_printf_loc (MSG_NOTE, gimple_location (gfor), + "This statement is not a simple for loop\n"); + } + return false; + } + + if (!grid_inner_loop_gridifiable_p (gfor, grid)) + return false; + + if (gimple_omp_for_collapse (gfor) != grid->collapse) + { + if (dump_enabled_p ()) + { + dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, + GRID_MISSED_MSG_PREFIX "an inner loop does not " + "have use the same collapse clause\n"); + dump_printf_loc (MSG_NOTE, gimple_location (gfor), + "Loop construct uses a different collapse clause\n"); + } + return false; + } + + struct omp_for_data fd; + struct omp_for_data_loop *loops + = (struct omp_for_data_loop *)alloca (grid->collapse + * sizeof (struct omp_for_data_loop)); + omp_extract_for_data (gfor, &fd, loops); + for (unsigned i = 0; i < grid->collapse; i++) + { + tree itype, type = TREE_TYPE (fd.loops[i].v); + if (POINTER_TYPE_P (type)) + itype = signed_type_for (type); + else + itype = type; + + tree n1 = fold_convert (itype, fd.loops[i].n1); + tree n2 = fold_convert (itype, fd.loops[i].n2); + tree t = build_int_cst (itype, + (fd.loops[i].cond_code == LT_EXPR ? -1 : 1)); + t = fold_build2 (PLUS_EXPR, itype, fd.loops[i].step, t); + t = fold_build2 (PLUS_EXPR, itype, t, n2); + t = fold_build2 (MINUS_EXPR, itype, t, n1); + if (TYPE_UNSIGNED (itype) && fd.loops[i].cond_code == GT_EXPR) + t = fold_build2 (TRUNC_DIV_EXPR, itype, + fold_build1 (NEGATE_EXPR, itype, t), + fold_build1 (NEGATE_EXPR, itype, fd.loops[i].step)); + else + t = fold_build2 (TRUNC_DIV_EXPR, itype, t, fd.loops[i].step); + + if (!operand_equal_p (grid->group_sizes[i], t, 0)) + { + if (dump_enabled_p ()) + { + dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, + GRID_MISSED_MSG_PREFIX "the distribute and " + "an internal loop do not agree on tile size\n"); + dump_printf_loc (MSG_NOTE, gimple_location (gfor), + "Loop construct does not seem to loop over " + "a tile size\n"); + } + return false; + } + } + return true; +} + +/* Facing a call to FNDECL in the body of a distribute construct, return true + if we can handle it or false if it precludes gridification. */ + +static bool +grid_call_permissible_in_distribute_p (tree fndecl) +{ + if (DECL_PURE_P (fndecl) || TREE_READONLY (fndecl)) + return true; + + const char *name = IDENTIFIER_POINTER (DECL_NAME (fndecl)); + if (strstr (name, "omp_") != name) + return false; + + if ((strcmp (name, "omp_get_thread_num") == 0) + || (strcmp (name, "omp_get_num_threads") == 0) + || (strcmp (name, "omp_get_num_teams") == 0) + || (strcmp (name, "omp_get_team_num") == 0) + || (strcmp (name, "omp_get_level") == 0) + || (strcmp (name, "omp_get_active_level") == 0) + || (strcmp (name, "omp_in_parallel") == 0)) + return true; + + return false; +} + +/* Facing a call satisfying grid_call_permissible_in_distribute_p in the body + of a distribute construct that is pointed at by GSI, modify it as necessary + for gridification. If the statement itself got removed, return true. */ + +static bool +grid_handle_call_in_distribute (gimple_stmt_iterator *gsi) +{ + gimple *stmt = gsi_stmt (*gsi); + tree fndecl = gimple_call_fndecl (stmt); + gcc_checking_assert (stmt); + if (DECL_PURE_P (fndecl) || TREE_READONLY (fndecl)) + return false; + + const char *name = IDENTIFIER_POINTER (DECL_NAME (fndecl)); + if ((strcmp (name, "omp_get_thread_num") == 0) + || (strcmp (name, "omp_get_level") == 0) + || (strcmp (name, "omp_get_active_level") == 0) + || (strcmp (name, "omp_in_parallel") == 0)) + { + tree lhs = gimple_call_lhs (stmt); + if (lhs) + { + gassign *assign + = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs))); + gsi_insert_before (gsi, assign, GSI_SAME_STMT); + } + gsi_remove (gsi, true); + return true; + } + + /* The rest of the omp functions can stay as they are, HSA back-end will + handle them correctly. */ + gcc_checking_assert ((strcmp (name, "omp_get_num_threads") == 0) + || (strcmp (name, "omp_get_num_teams") == 0) + || (strcmp (name, "omp_get_team_num") == 0)); + return false; +} + +/* Given a sequence of statements within a distribute omp construct or a + parallel construct, which in the original source does not form a compound + construct with a looping construct, return true if it does not prevent us + from turning it into a gridified HSA kernel. Otherwise return false. GRID + describes hitherto discovered properties of the loop that is evaluated for + possible gridification. IN_PARALLEL must be true if seq is within a + parallel construct and flase if it is only within a distribute + construct. */ + +static bool +grid_dist_follows_tiling_pattern (gimple_seq seq, grid_prop *grid, + bool in_parallel) +{ + gimple_stmt_iterator gsi; + for (gsi = gsi_start (seq); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple *stmt = gsi_stmt (gsi); + + if (grid_safe_assignment_p (stmt, grid) + || gimple_code (stmt) == GIMPLE_GOTO + || gimple_code (stmt) == GIMPLE_LABEL + || gimple_code (stmt) == GIMPLE_COND) + continue; + else if (gbind *bind = dyn_cast (stmt)) + { + if (!grid_dist_follows_tiling_pattern (gimple_bind_body (bind), + grid, in_parallel)) + return false; + continue; + } + else if (gtry *try_stmt = dyn_cast (stmt)) + { + if (gimple_try_kind (try_stmt) == GIMPLE_TRY_CATCH) + { + if (dump_enabled_p ()) + { + dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, + GRID_MISSED_MSG_PREFIX "the distribute " + "construct contains a try..catch region\n"); + dump_printf_loc (MSG_NOTE, gimple_location (try_stmt), + "This statement cannot be analyzed for " + "tiled gridification\n"); + } + return false; + } + if (!grid_dist_follows_tiling_pattern (gimple_try_eval (try_stmt), + grid, in_parallel)) + return false; + if (!grid_dist_follows_tiling_pattern (gimple_try_cleanup (try_stmt), + grid, in_parallel)) + return false; + continue; + } + else if (is_gimple_call (stmt)) + { + tree fndecl = gimple_call_fndecl (stmt); + if (fndecl && grid_call_permissible_in_distribute_p (fndecl)) + continue; + + if (dump_enabled_p ()) + { + dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, + GRID_MISSED_MSG_PREFIX "the distribute " + "construct contains a call\n"); + dump_printf_loc (MSG_NOTE, gimple_location (stmt), + "This statement cannot be analyzed for " + "tiled gridification\n"); + } + return false; + } + else if (gomp_parallel *par = dyn_cast (stmt)) + { + if (in_parallel) + { + if (dump_enabled_p ()) + { + dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, + GRID_MISSED_MSG_PREFIX "a parallel " + "construct contains another parallel " + "construct\n"); + dump_printf_loc (MSG_NOTE, gimple_location (stmt), + "This parallel construct is nested in " + "another one\n"); + } + return false; + } + if (!grid_parallel_clauses_gridifiable (par, grid->target_loc) + || !grid_dist_follows_tiling_pattern (gimple_omp_body (par), + grid, true)) + return false; + } + else if (gomp_for *gfor = dyn_cast (stmt)) + { + if (!in_parallel) + { + if (dump_enabled_p ()) + { + dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, + GRID_MISSED_MSG_PREFIX "a loop " + "construct is not nested within a parallel " + "construct\n"); + dump_printf_loc (MSG_NOTE, gimple_location (stmt), + "This loop construct is not nested in " + "a parallel construct\n"); + } + return false; + } + if (!grid_gfor_follows_tiling_pattern (gfor, grid)) + return false; + } + else + { + if (dump_enabled_p ()) + { + dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, + GRID_MISSED_MSG_PREFIX "the distribute " + "construct contains a complex statement\n"); + dump_printf_loc (MSG_NOTE, gimple_location (stmt), + "This statement cannot be analyzed for " + "tiled gridification\n"); + } + return false; + } + } + return true; +} + +/* If TARGET follows a pattern that can be turned into a gridified HSA kernel, + return true, otherwise return false. In the case of success, also fill in + GRID with information describing the kernel grid. */ + +static bool +grid_target_follows_gridifiable_pattern (gomp_target *target, grid_prop *grid) +{ + if (gimple_omp_target_kind (target) != GF_OMP_TARGET_KIND_REGION) + return false; + + location_t tloc = gimple_location (target); + grid->target_loc = tloc; + gimple *stmt + = grid_find_single_omp_among_assignments (gimple_omp_body (target), + grid, "target"); + if (!stmt) + return false; + gomp_teams *teams = dyn_cast (stmt); + tree group_size = NULL; + if (!teams) + { + dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, + GRID_MISSED_MSG_PREFIX "it does not have a sole teams " + "construct in it.\n"); + return false; + } + + tree clauses = gimple_omp_teams_clauses (teams); + while (clauses) + { + switch (OMP_CLAUSE_CODE (clauses)) + { + case OMP_CLAUSE_NUM_TEAMS: + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, + GRID_MISSED_MSG_PREFIX "the teams construct " + "contains a num_teams clause\n "); + return false; + + case OMP_CLAUSE_REDUCTION: + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, + GRID_MISSED_MSG_PREFIX "a reduction " + "clause is present\n "); + return false; + + case OMP_CLAUSE_THREAD_LIMIT: + if (!integer_zerop (OMP_CLAUSE_OPERAND (clauses, 0))) + group_size = OMP_CLAUSE_OPERAND (clauses, 0); + break; + + default: + break; + } + clauses = OMP_CLAUSE_CHAIN (clauses); + } + + stmt = grid_find_single_omp_among_assignments (gimple_omp_body (teams), grid, + "teams"); + if (!stmt) + return false; + gomp_for *dist = dyn_cast (stmt); + if (!dist) + { + dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, + GRID_MISSED_MSG_PREFIX "the teams construct does not " + "have a single distribute construct in it.\n"); + return false; + } + + gcc_assert (gimple_omp_for_kind (dist) == GF_OMP_FOR_KIND_DISTRIBUTE); + + grid->collapse = gimple_omp_for_collapse (dist); + if (grid->collapse > 3) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, + GRID_MISSED_MSG_PREFIX "the distribute construct " + "contains collapse clause with parameter greater " + "than 3\n"); + return false; + } + + struct omp_for_data fd; + struct omp_for_data_loop *dist_loops + = (struct omp_for_data_loop *)alloca (grid->collapse + * sizeof (struct omp_for_data_loop)); + omp_extract_for_data (dist, &fd, dist_loops); + if (fd.chunk_size) + { + if (group_size && !operand_equal_p (group_size, fd.chunk_size, 0)) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, + GRID_MISSED_MSG_PREFIX "the teams " + "thread limit is different from distribute " + "schedule chunk\n"); + return false; + } + group_size = fd.chunk_size; + } + if (group_size && grid->collapse > 1) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, + GRID_MISSED_MSG_PREFIX "group size cannot be " + "set using thread_limit or schedule clauses " + "when also using a collapse clause greater than 1\n"); + return false; + } + + if (gimple_omp_for_combined_p (dist)) + { + grid->tiling = false; + grid->group_sizes[0] = group_size; + for (unsigned i = 1; i < grid->collapse; i++) + grid->group_sizes[i] = NULL; + return grid_dist_follows_simple_pattern (dist, grid); + } + else + { + grid->tiling = true; + if (group_size) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, + GRID_MISSED_MSG_PREFIX "group size cannot be set " + "using thread_limit or schedule clauses when " + "distribute and loop constructs do not form " + "one combined construct\n"); + return false; + } + for (unsigned i = 0; i < grid->collapse; i++) + { + if (fd.loops[i].cond_code == GT_EXPR) + grid->group_sizes[i] = fold_build1 (NEGATE_EXPR, + TREE_TYPE (fd.loops[i].step), + fd.loops[i].step); + else + grid->group_sizes[i] = fd.loops[i].step; + } + return grid_dist_follows_tiling_pattern (gimple_omp_body (dist), grid, + false); + } +} + +/* Operand walker, used to remap pre-body declarations according to a hash map + provided in DATA. */ + +static tree +grid_remap_prebody_decls (tree *tp, int *walk_subtrees, void *data) +{ + tree t = *tp; + + if (DECL_P (t) || TYPE_P (t)) + *walk_subtrees = 0; + else + *walk_subtrees = 1; + + if (VAR_P (t)) + { + struct walk_stmt_info *wi = (struct walk_stmt_info *) data; + hash_map *declmap = (hash_map *) wi->info; + tree *repl = declmap->get (t); + if (repl) + *tp = *repl; + } + return NULL_TREE; +} + +/* Identifiers of segments into which a particular variable should be places + when gridifying. */ + +enum grid_var_segment {GRID_SEGMENT_PRIVATE, GRID_SEGMENT_GROUP, + GRID_SEGMENT_GLOBAL}; + +/* Mark VAR so that it is eventually placed into SEGMENT. Place an artificial + builtin call into SEQ that will make sure the variable is always considered + address taken. */ + +static void +grid_mark_variable_segment (tree var, enum grid_var_segment segment) +{ + /* Making a non-addressable variables would require that we re-gimplify all + their uses. Fortunately, we do not have to do this because if they are + not addressable, it means they are not used in atomic or parallel + statements and so relaxed GPU consistency rules mean we can just keep them + private. */ + if (!TREE_ADDRESSABLE (var)) + return; + + switch (segment) + { + case GRID_SEGMENT_GROUP: + DECL_ATTRIBUTES (var) = tree_cons (get_identifier ("hsa_group_segment"), + NULL, DECL_ATTRIBUTES (var)); + break; + case GRID_SEGMENT_GLOBAL: + DECL_ATTRIBUTES (var) = tree_cons (get_identifier ("hsa_global_segment"), + NULL, DECL_ATTRIBUTES (var)); + break; + default: + gcc_unreachable (); + } + + if (!TREE_STATIC (var)) + { + TREE_STATIC (var) = 1; + varpool_node::finalize_decl (var); + } + +} + +/* Copy leading register-type assignments to local variables in SRC to just + before DST, Creating temporaries, adjusting mapping of operands in WI and + remapping operands as necessary. Add any new temporaries to TGT_BIND. + Return the first statement that does not conform to grid_safe_assignment_p + or NULL. If VAR_SEGMENT is not GRID_SEGMENT_PRIVATE, also mark all + variables in traversed bind statements so that they are put into the + appropriate segment. */ + +static gimple * +grid_copy_leading_local_assignments (gimple_seq src, gimple_stmt_iterator *dst, + gbind *tgt_bind, + enum grid_var_segment var_segment, + struct walk_stmt_info *wi) +{ + hash_map *declmap = (hash_map *) wi->info; + gimple_stmt_iterator gsi; + for (gsi = gsi_start (src); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple *stmt = gsi_stmt (gsi); + if (gbind *bind = dyn_cast (stmt)) + { + gimple *r = grid_copy_leading_local_assignments + (gimple_bind_body (bind), dst, tgt_bind, var_segment, wi); + + if (var_segment != GRID_SEGMENT_PRIVATE) + for (tree var = gimple_bind_vars (bind); var; var = DECL_CHAIN (var)) + grid_mark_variable_segment (var, var_segment); + if (r) + return r; + else + continue; + } + if (!grid_safe_assignment_p (stmt, NULL)) + return stmt; + tree lhs = gimple_assign_lhs (as_a (stmt)); + tree repl = copy_var_decl (lhs, create_tmp_var_name (NULL), + TREE_TYPE (lhs)); + DECL_CONTEXT (repl) = current_function_decl; + gimple_bind_append_vars (tgt_bind, repl); + + declmap->put (lhs, repl); + gassign *copy = as_a (gimple_copy (stmt)); + walk_gimple_op (copy, grid_remap_prebody_decls, wi); + gsi_insert_before (dst, copy, GSI_SAME_STMT); + } + return NULL; +} + +/* Statement walker function to make adjustments to statements within the + gridifed kernel copy. */ + +static tree +grid_process_grid_body (gimple_stmt_iterator *gsi, bool *handled_ops_p, + struct walk_stmt_info *) +{ + *handled_ops_p = false; + gimple *stmt = gsi_stmt (*gsi); + if (gimple_code (stmt) == GIMPLE_OMP_FOR + && (gimple_omp_for_kind (stmt) & GF_OMP_FOR_SIMD)) + { + gomp_for *loop = as_a (stmt); + tree clauses = gimple_omp_for_clauses (loop); + tree cl = omp_find_clause (clauses, OMP_CLAUSE_SAFELEN); + if (cl) + OMP_CLAUSE_SAFELEN_EXPR (cl) = integer_one_node; + else + { + tree c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE_SAFELEN); + OMP_CLAUSE_SAFELEN_EXPR (c) = integer_one_node; + OMP_CLAUSE_CHAIN (c) = clauses; + gimple_omp_for_set_clauses (loop, c); + } + } + return NULL_TREE; +} + +/* Given a PARLOOP that is a normal for looping construct but also a part of a + combined construct with a simd loop, eliminate the simd loop. */ + +static void +grid_eliminate_combined_simd_part (gomp_for *parloop) +{ + struct walk_stmt_info wi; + + memset (&wi, 0, sizeof (wi)); + wi.val_only = true; + enum gf_mask msk = GF_OMP_FOR_SIMD; + wi.info = (void *) &msk; + walk_gimple_seq (gimple_omp_body (parloop), omp_find_combined_for, NULL, &wi); + gimple *stmt = (gimple *) wi.info; + /* We expect that the SIMD id the only statement in the parallel loop. */ + gcc_assert (stmt + && gimple_code (stmt) == GIMPLE_OMP_FOR + && (gimple_omp_for_kind (stmt) == GF_OMP_FOR_SIMD) + && gimple_omp_for_combined_into_p (stmt) + && !gimple_omp_for_combined_p (stmt)); + gomp_for *simd = as_a (stmt); + + /* Copy over the iteration properties because the body refers to the index in + the bottmom-most loop. */ + unsigned i, collapse = gimple_omp_for_collapse (parloop); + gcc_checking_assert (collapse == gimple_omp_for_collapse (simd)); + for (i = 0; i < collapse; i++) + { + gimple_omp_for_set_index (parloop, i, gimple_omp_for_index (simd, i)); + gimple_omp_for_set_initial (parloop, i, gimple_omp_for_initial (simd, i)); + gimple_omp_for_set_final (parloop, i, gimple_omp_for_final (simd, i)); + gimple_omp_for_set_incr (parloop, i, gimple_omp_for_incr (simd, i)); + } + + tree *tgt= gimple_omp_for_clauses_ptr (parloop); + while (*tgt) + tgt = &OMP_CLAUSE_CHAIN (*tgt); + + /* Copy over all clauses, except for linaer clauses, which are turned into + private clauses, and all other simd-specificl clauses, which are + ignored. */ + tree *pc = gimple_omp_for_clauses_ptr (simd); + while (*pc) + { + tree c = *pc; + switch (TREE_CODE (c)) + { + case OMP_CLAUSE_LINEAR: + { + tree priv = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE_PRIVATE); + OMP_CLAUSE_DECL (priv) = OMP_CLAUSE_DECL (c); + OMP_CLAUSE_CHAIN (priv) = NULL; + *tgt = priv; + tgt = &OMP_CLAUSE_CHAIN (priv); + pc = &OMP_CLAUSE_CHAIN (c); + break; + } + + case OMP_CLAUSE_SAFELEN: + case OMP_CLAUSE_SIMDLEN: + case OMP_CLAUSE_ALIGNED: + pc = &OMP_CLAUSE_CHAIN (c); + break; + + default: + *pc = OMP_CLAUSE_CHAIN (c); + OMP_CLAUSE_CHAIN (c) = NULL; + *tgt = c; + tgt = &OMP_CLAUSE_CHAIN(c); + break; + } + } + + /* Finally, throw away the simd and mark the parallel loop as not + combined. */ + gimple_omp_set_body (parloop, gimple_omp_body (simd)); + gimple_omp_for_set_combined_p (parloop, false); +} + +/* Statement walker function marking all parallels as grid_phony and loops as + grid ones representing threads of a particular thread group. */ + +static tree +grid_mark_tiling_loops (gimple_stmt_iterator *gsi, bool *handled_ops_p, + struct walk_stmt_info *wi_in) +{ + *handled_ops_p = false; + if (gomp_for *loop = dyn_cast (gsi_stmt (*gsi))) + { + *handled_ops_p = true; + gimple_omp_for_set_kind (loop, GF_OMP_FOR_KIND_GRID_LOOP); + gimple_omp_for_set_grid_intra_group (loop, true); + if (gimple_omp_for_combined_p (loop)) + grid_eliminate_combined_simd_part (loop); + + struct walk_stmt_info body_wi; + memset (&body_wi, 0, sizeof (body_wi)); + walk_gimple_seq_mod (gimple_omp_body_ptr (loop), + grid_process_grid_body, NULL, &body_wi); + + gbind *bind = (gbind *) wi_in->info; + tree c; + for (c = gimple_omp_for_clauses (loop); c; c = OMP_CLAUSE_CHAIN (c)) + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE) + { + push_gimplify_context (); + tree ov = OMP_CLAUSE_DECL (c); + tree gv = copy_var_decl (ov, create_tmp_var_name (NULL), + TREE_TYPE (ov)); + + grid_mark_variable_segment (gv, GRID_SEGMENT_GROUP); + DECL_CONTEXT (gv) = current_function_decl; + gimple_bind_append_vars (bind, gv); + tree x = lang_hooks.decls.omp_clause_assign_op (c, gv, ov); + gimplify_and_add (x, &OMP_CLAUSE_LASTPRIVATE_GIMPLE_SEQ (c)); + x = lang_hooks.decls.omp_clause_copy_ctor (c, ov, gv); + gimple_seq l = NULL; + gimplify_and_add (x, &l); + gsi_insert_seq_after (gsi, l, GSI_SAME_STMT); + pop_gimplify_context (bind); + } + } + return NULL_TREE; +} + +/* Statement walker function marking all parallels as grid_phony and loops as + grid ones representing threads of a particular thread group. */ + +static tree +grid_mark_tiling_parallels_and_loops (gimple_stmt_iterator *gsi, + bool *handled_ops_p, + struct walk_stmt_info *wi_in) +{ + *handled_ops_p = false; + wi_in->removed_stmt = false; + gimple *stmt = gsi_stmt (*gsi); + if (gbind *bind = dyn_cast (stmt)) + { + for (tree var = gimple_bind_vars (bind); var; var = DECL_CHAIN (var)) + grid_mark_variable_segment (var, GRID_SEGMENT_GROUP); + } + else if (gomp_parallel *parallel = dyn_cast (stmt)) + { + *handled_ops_p = true; + gimple_omp_parallel_set_grid_phony (parallel, true); + + gbind *new_bind = gimple_build_bind (NULL, NULL, make_node (BLOCK)); + gimple_bind_set_body (new_bind, gimple_omp_body (parallel)); + gimple_seq s = NULL; + gimple_seq_add_stmt (&s, new_bind); + gimple_omp_set_body (parallel, s); + + struct walk_stmt_info wi_par; + memset (&wi_par, 0, sizeof (wi_par)); + wi_par.info = new_bind; + walk_gimple_seq_mod (gimple_bind_body_ptr (new_bind), + grid_mark_tiling_loops, NULL, &wi_par); + } + else if (is_a (stmt)) + wi_in->removed_stmt = grid_handle_call_in_distribute (gsi); + return NULL_TREE; +} + +/* Given freshly copied top level kernel SEQ, identify the individual OMP + components, mark them as part of kernel, copy assignment leading to them + just before DST, remapping them using WI and adding new temporaries to + TGT_BIND, and and return the loop that will be used for kernel dispatch. */ + +static gomp_for * +grid_process_kernel_body_copy (grid_prop *grid, gimple_seq seq, + gimple_stmt_iterator *dst, + gbind *tgt_bind, struct walk_stmt_info *wi) +{ + gimple *stmt = grid_copy_leading_local_assignments (seq, dst, tgt_bind, + GRID_SEGMENT_GLOBAL, wi); + gomp_teams *teams = dyn_cast (stmt); + gcc_assert (teams); + gimple_omp_teams_set_grid_phony (teams, true); + stmt = grid_copy_leading_local_assignments (gimple_omp_body (teams), dst, + tgt_bind, GRID_SEGMENT_GLOBAL, wi); + gcc_checking_assert (stmt); + gomp_for *dist = dyn_cast (stmt); + gcc_assert (dist); + gimple_seq prebody = gimple_omp_for_pre_body (dist); + if (prebody) + grid_copy_leading_local_assignments (prebody, dst, tgt_bind, + GRID_SEGMENT_GROUP, wi); + + if (grid->tiling) + { + gimple_omp_for_set_kind (dist, GF_OMP_FOR_KIND_GRID_LOOP); + gimple_omp_for_set_grid_group_iter (dist, true); + + struct walk_stmt_info wi_tiled; + memset (&wi_tiled, 0, sizeof (wi_tiled)); + walk_gimple_seq_mod (gimple_omp_body_ptr (dist), + grid_mark_tiling_parallels_and_loops, NULL, + &wi_tiled); + return dist; + } + else + { + gimple_omp_for_set_grid_phony (dist, true); + stmt = grid_copy_leading_local_assignments (gimple_omp_body (dist), dst, + tgt_bind, + GRID_SEGMENT_PRIVATE, wi); + gcc_checking_assert (stmt); + gomp_parallel *parallel = as_a (stmt); + gimple_omp_parallel_set_grid_phony (parallel, true); + stmt = grid_copy_leading_local_assignments (gimple_omp_body (parallel), + dst, tgt_bind, + GRID_SEGMENT_PRIVATE, wi); + gomp_for *inner_loop = as_a (stmt); + gimple_omp_for_set_kind (inner_loop, GF_OMP_FOR_KIND_GRID_LOOP); + prebody = gimple_omp_for_pre_body (inner_loop); + if (prebody) + grid_copy_leading_local_assignments (prebody, dst, tgt_bind, + GRID_SEGMENT_PRIVATE, wi); + + if (gimple_omp_for_combined_p (inner_loop)) + grid_eliminate_combined_simd_part (inner_loop); + struct walk_stmt_info body_wi;; + memset (&body_wi, 0, sizeof (body_wi)); + walk_gimple_seq_mod (gimple_omp_body_ptr (inner_loop), + grid_process_grid_body, NULL, &body_wi); + + return inner_loop; + } +} + +/* If TARGET points to a GOMP_TARGET which follows a gridifiable pattern, + create a GPU kernel for it. GSI must point to the same statement, TGT_BIND + is the bind into which temporaries inserted before TARGET should be + added. */ + +static void +grid_attempt_target_gridification (gomp_target *target, + gimple_stmt_iterator *gsi, + gbind *tgt_bind) +{ + /* removed group_size */ + grid_prop grid; + memset (&grid, 0, sizeof (grid)); + if (!target || !grid_target_follows_gridifiable_pattern (target, &grid)) + return; + + location_t loc = gimple_location (target); + if (dump_enabled_p ()) + dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loc, + "Target construct will be turned into a gridified HSA " + "kernel\n"); + + /* Copy target body to a GPUKERNEL construct: */ + gimple_seq kernel_seq = copy_gimple_seq_and_replace_locals + (gimple_omp_body (target)); + + hash_map *declmap = new hash_map; + struct walk_stmt_info wi; + memset (&wi, 0, sizeof (struct walk_stmt_info)); + wi.info = declmap; + + /* Copy assignments in between OMP statements before target, mark OMP + statements within copy appropriately. */ + gomp_for *inner_loop = grid_process_kernel_body_copy (&grid, kernel_seq, gsi, + tgt_bind, &wi); + + gbind *old_bind = as_a (gimple_seq_first (gimple_omp_body (target))); + gbind *new_bind = as_a (gimple_seq_first (kernel_seq)); + tree new_block = gimple_bind_block (new_bind); + tree enc_block = BLOCK_SUPERCONTEXT (gimple_bind_block (old_bind)); + BLOCK_CHAIN (new_block) = BLOCK_SUBBLOCKS (enc_block); + BLOCK_SUBBLOCKS (enc_block) = new_block; + BLOCK_SUPERCONTEXT (new_block) = enc_block; + gimple *gpukernel = gimple_build_omp_grid_body (kernel_seq); + gimple_seq_add_stmt + (gimple_bind_body_ptr (as_a (gimple_omp_body (target))), + gpukernel); + + for (size_t i = 0; i < grid.collapse; i++) + walk_tree (&grid.group_sizes[i], grid_remap_prebody_decls, &wi, NULL); + push_gimplify_context (); + for (size_t i = 0; i < grid.collapse; i++) + { + tree itype, type = TREE_TYPE (gimple_omp_for_index (inner_loop, i)); + if (POINTER_TYPE_P (type)) + itype = signed_type_for (type); + else + itype = type; + + enum tree_code cond_code = gimple_omp_for_cond (inner_loop, i); + tree n1 = unshare_expr (gimple_omp_for_initial (inner_loop, i)); + walk_tree (&n1, grid_remap_prebody_decls, &wi, NULL); + tree n2 = unshare_expr (gimple_omp_for_final (inner_loop, i)); + walk_tree (&n2, grid_remap_prebody_decls, &wi, NULL); + omp_adjust_for_condition (loc, &cond_code, &n2); + n1 = fold_convert (itype, n1); + n2 = fold_convert (itype, n2); + + tree step + = omp_get_for_step_from_incr (loc, gimple_omp_for_incr (inner_loop, i)); + + tree t = build_int_cst (itype, (cond_code == LT_EXPR ? -1 : 1)); + t = fold_build2 (PLUS_EXPR, itype, step, t); + t = fold_build2 (PLUS_EXPR, itype, t, n2); + t = fold_build2 (MINUS_EXPR, itype, t, n1); + if (TYPE_UNSIGNED (itype) && cond_code == GT_EXPR) + t = fold_build2 (TRUNC_DIV_EXPR, itype, + fold_build1 (NEGATE_EXPR, itype, t), + fold_build1 (NEGATE_EXPR, itype, step)); + else + t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step); + if (grid.tiling) + { + if (cond_code == GT_EXPR) + step = fold_build1 (NEGATE_EXPR, itype, step); + t = fold_build2 (MULT_EXPR, itype, t, step); + } + + tree gs = fold_convert (uint32_type_node, t); + gimple_seq tmpseq = NULL; + gimplify_expr (&gs, &tmpseq, NULL, is_gimple_val, fb_rvalue); + if (!gimple_seq_empty_p (tmpseq)) + gsi_insert_seq_before (gsi, tmpseq, GSI_SAME_STMT); + + tree ws; + if (grid.group_sizes[i]) + { + ws = fold_convert (uint32_type_node, grid.group_sizes[i]); + tmpseq = NULL; + gimplify_expr (&ws, &tmpseq, NULL, is_gimple_val, fb_rvalue); + if (!gimple_seq_empty_p (tmpseq)) + gsi_insert_seq_before (gsi, tmpseq, GSI_SAME_STMT); + } + else + ws = build_zero_cst (uint32_type_node); + + tree c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE__GRIDDIM_); + OMP_CLAUSE__GRIDDIM__DIMENSION (c) = i; + OMP_CLAUSE__GRIDDIM__SIZE (c) = gs; + OMP_CLAUSE__GRIDDIM__GROUP (c) = ws; + OMP_CLAUSE_CHAIN (c) = gimple_omp_target_clauses (target); + gimple_omp_target_set_clauses (target, c); + } + pop_gimplify_context (tgt_bind); + delete declmap; + return; +} + +/* Walker function doing all the work for create_target_kernels. */ + +static tree +grid_gridify_all_targets_stmt (gimple_stmt_iterator *gsi, + bool *handled_ops_p, + struct walk_stmt_info *incoming) +{ + *handled_ops_p = false; + + gimple *stmt = gsi_stmt (*gsi); + gomp_target *target = dyn_cast (stmt); + if (target) + { + gbind *tgt_bind = (gbind *) incoming->info; + gcc_checking_assert (tgt_bind); + grid_attempt_target_gridification (target, gsi, tgt_bind); + return NULL_TREE; + } + gbind *bind = dyn_cast (stmt); + if (bind) + { + *handled_ops_p = true; + struct walk_stmt_info wi; + memset (&wi, 0, sizeof (wi)); + wi.info = bind; + walk_gimple_seq_mod (gimple_bind_body_ptr (bind), + grid_gridify_all_targets_stmt, NULL, &wi); + } + return NULL_TREE; +} + +/* Attempt to gridify all target constructs in BODY_P. All such targets will + have their bodies duplicated, with the new copy being put into a + gimple_omp_grid_body statement. All kernel-related construct within the + grid_body will be marked with phony flags or kernel kinds. Moreover, some + re-structuring is often needed, such as copying pre-bodies before the target + construct so that kernel grid sizes can be computed. */ + +void +omp_grid_gridify_all_targets (gimple_seq *body_p) +{ + struct walk_stmt_info wi; + memset (&wi, 0, sizeof (wi)); + walk_gimple_seq_mod (body_p, grid_gridify_all_targets_stmt, NULL, &wi); +} -- cgit v1.1