From b2b40051500c944e882c274727cea7231eefaaf5 Mon Sep 17 00:00:00 2001 From: Martin Jambor Date: Tue, 19 Jan 2016 11:35:10 +0100 Subject: Merge of HSA 2016-01-19 Martin Jambor Martin Liska Michael Matz libgomp/ * plugin/Makefrag.am: Add HSA plugin requirements. * plugin/configfrag.ac (HSA_RUNTIME_INCLUDE): New variable. (HSA_RUNTIME_LIB): Likewise. (HSA_RUNTIME_CPPFLAGS): Likewise. (HSA_RUNTIME_INCLUDE): New substitution. (HSA_RUNTIME_LIB): Likewise. (HSA_RUNTIME_LDFLAGS): Likewise. (hsa-runtime): New configure option. (hsa-runtime-include): Likewise. (hsa-runtime-lib): Likewise. (PLUGIN_HSA): New substitution variable. Fill HSA_RUNTIME_INCLUDE and HSA_RUNTIME_LIB according to the new configure options. (PLUGIN_HSA_CPPFLAGS): Likewise. (PLUGIN_HSA_LDFLAGS): Likewise. (PLUGIN_HSA_LIBS): Likewise. Check that we have access to HSA run-time. * libgomp-plugin.h (offload_target_type): New element OFFLOAD_TARGET_TYPE_HSA. * libgomp.h (gomp_target_task): New fields firstprivate_copies and args. (bool gomp_create_target_task): Updated. (gomp_device_descr): Extra parameter of run_func and async_run_func, new field can_run_func. * libgomp_g.h (GOMP_target_ext): Update prototype. * oacc-host.c (host_run): Added a new parameter args. * target.c (calculate_firstprivate_requirements): New function. (copy_firstprivate_data): Likewise. (gomp_target_fallback_firstprivate): Use them. (gomp_target_unshare_firstprivate): New function. (gomp_get_target_fn_addr): Allow returning NULL for shared memory devices. (GOMP_target): Do host fallback for all shared memory devices. Do not pass any args to plugins. (GOMP_target_ext): Introduce device-specific argument parameter args. Allow host fallback if device shares memory. Do not remap data if device has shared memory. (gomp_target_task_fn): Likewise. Also treat shared memory devices like host fallback for mappings. (GOMP_target_data): Treat shared memory devices like host fallback. (GOMP_target_data_ext): Likewise. (GOMP_target_update): Likewise. (GOMP_target_update_ext): Likewise. Also pass NULL as args to gomp_create_target_task. (GOMP_target_enter_exit_data): Likewise. (omp_target_alloc): Treat shared memory devices like host fallback. (omp_target_free): Likewise. (omp_target_is_present): Likewise. (omp_target_memcpy): Likewise. (omp_target_memcpy_rect): Likewise. (omp_target_associate_ptr): Likewise. (gomp_load_plugin_for_device): Also load can_run. * task.c (GOMP_PLUGIN_target_task_completion): Free firstprivate_copies. (gomp_create_target_task): Accept new argument args and store it to ttask. * plugin/plugin-hsa.c: New file. gcc/ * Makefile.in (OBJS): Add new source files. (GTFILES): Add hsa.c. * common.opt (disable_hsa): New variable. (-Whsa): New warning. * config.in (ENABLE_HSA): New. * configure.ac: Treat hsa differently from other accelerators. (OFFLOAD_TARGETS): Define ENABLE_OFFLOADING according to $enable_offloading. (ENABLE_HSA): Define ENABLE_HSA according to $enable_hsa. * doc/install.texi (Configuration): Document --with-hsa-runtime, --with-hsa-runtime-include, --with-hsa-runtime-lib and --with-hsa-kmt-lib. * doc/invoke.texi (-Whsa): Document. (hsa-gen-debug-stores): Likewise. * lto-wrapper.c (compile_images_for_offload_targets): Do not attempt to invoke offload compiler for hsa acclerator. * opts.c (common_handle_option): Determine whether HSA offloading should be performed. * params.def (PARAM_HSA_GEN_DEBUG_STORES): New parameter. * builtin-types.def (BT_FN_VOID_UINT_PTR_INT_PTR): New. (BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_UINT_PTR_INT_INT): Removed. (BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_UINT_PTR_PTR): New. * gimple-low.c (lower_stmt): Also handle GIMPLE_OMP_GRID_BODY. * gimple-pretty-print.c (dump_gimple_omp_for): Also handle GF_OMP_FOR_KIND_GRID_LOOP. (dump_gimple_omp_block): Also handle GIMPLE_OMP_GRID_BODY. (pp_gimple_stmt_1): Likewise. * gimple-walk.c (walk_gimple_stmt): Likewise. * gimple.c (gimple_build_omp_grid_body): New function. (gimple_copy): Also handle GIMPLE_OMP_GRID_BODY. * gimple.def (GIMPLE_OMP_GRID_BODY): New. * gimple.h (enum gf_mask): Added GF_OMP_PARALLEL_GRID_PHONY, GF_OMP_FOR_KIND_GRID_LOOP, GF_OMP_FOR_GRID_PHONY and GF_OMP_TEAMS_GRID_PHONY. (gimple_statement_omp_single_layout): Updated comments. (gimple_build_omp_grid_body): New function. (gimple_has_substatements): Also handle GIMPLE_OMP_GRID_BODY. (gimple_omp_for_grid_phony): New function. (gimple_omp_for_set_grid_phony): Likewise. (gimple_omp_parallel_grid_phony): Likewise. (gimple_omp_parallel_set_grid_phony): Likewise. (gimple_omp_teams_grid_phony): Likewise. (gimple_omp_teams_set_grid_phony): Likewise. (gimple_return_set_retbnd): Also handle GIMPLE_OMP_GRID_BODY. * omp-builtins.def (BUILT_IN_GOMP_OFFLOAD_REGISTER): New. (BUILT_IN_GOMP_OFFLOAD_UNREGISTER): Likewise. (BUILT_IN_GOMP_TARGET): Updated type. * omp-low.c: Include symbol-summary.h, hsa.h and params.h. (adjust_for_condition): New function. (get_omp_for_step_from_incr): Likewise. (extract_omp_for_data): Moved parts to adjust_for_condition and get_omp_for_step_from_incr. (build_outer_var_ref): Handle GIMPLE_OMP_GRID_BODY. (fixup_child_record_type): Bail out if receiver_decl is NULL. (scan_sharing_clauses): Handle OMP_CLAUSE__GRIDDIM_. (scan_omp_parallel): Do not create child functions for phony constructs. (check_omp_nesting_restrictions): Handle GIMPLE_OMP_GRID_BODY. (scan_omp_1_op): Checking assert we are not remapping to ERROR_MARK. Also also handle GIMPLE_OMP_GRID_BODY. (parallel_needs_hsa_kernel_p): New function. (expand_parallel_call): Register apprpriate parallel child functions as HSA kernels. (grid_launch_attributes_trees): New type. (grid_attr_trees): New variable. (grid_create_kernel_launch_attr_types): New function. (grid_insert_store_range_dim): Likewise. (grid_get_kernel_launch_attributes): Likewise. (get_target_argument_identifier_1): Likewise. (get_target_argument_identifier): Likewise. (get_target_argument_value): Likewise. (push_target_argument_according_to_value): Likewise. (get_target_arguments): Likewise. (expand_omp_target): Call get_target_arguments instead of looking up for teams and thread limit. (grid_expand_omp_for_loop): New function. (grid_arg_decl_map): New type. (grid_remap_kernel_arg_accesses): New function. (grid_expand_target_kernel_body): New function. (expand_omp): Call it. (lower_omp_for): Do not emit phony constructs. (lower_omp_taskreg): Do not emit phony constructs but create for them a temporary variable receiver_decl. (lower_omp_taskreg): Do not emit phony constructs. (lower_omp_teams): Likewise. (lower_omp_grid_body): New function. (lower_omp_1): Call it. (grid_reg_assignment_to_local_var_p): New function. (grid_seq_only_contains_local_assignments): Likewise. (grid_find_single_omp_among_assignments_1): Likewise. (grid_find_single_omp_among_assignments): Likewise. (grid_find_ungridifiable_statement): Likewise. (grid_target_follows_gridifiable_pattern): Likewise. (grid_remap_prebody_decls): Likewise. (grid_copy_leading_local_assignments): Likewise. (grid_process_kernel_body_copy): Likewise. (grid_attempt_target_gridification): Likewise. (grid_gridify_all_targets_stmt): Likewise. (grid_gridify_all_targets): Likewise. (execute_lower_omp): Call grid_gridify_all_targets. (make_gimple_omp_edges): Handle GIMPLE_OMP_GRID_BODY. * tree-core.h (omp_clause_code): Added OMP_CLAUSE__GRIDDIM_. (tree_omp_clause): Added union field dimension. * tree-pretty-print.c (dump_omp_clause): Handle OMP_CLAUSE__GRIDDIM_. * tree.c (omp_clause_num_ops): Added number of arguments of OMP_CLAUSE__GRIDDIM_. (omp_clause_code_name): Added name of OMP_CLAUSE__GRIDDIM_. (walk_tree_1): Handle OMP_CLAUSE__GRIDDIM_. * tree.h (OMP_CLAUSE_GRIDDIM_DIMENSION): New. (OMP_CLAUSE_SET_GRIDDIM_DIMENSION): Likewise. (OMP_CLAUSE_GRIDDIM_SIZE): Likewise. (OMP_CLAUSE_GRIDDIM_GROUP): Likewise. * passes.def: Schedule pass_ipa_hsa and pass_gen_hsail. * tree-pass.h (make_pass_gen_hsail): Declare. (make_pass_ipa_hsa): Likewise. * ipa-hsa.c: New file. * lto-section-in.c (lto_section_name): Add hsa section name. * lto-streamer.h (lto_section_type): Add hsa section. * timevar.def (TV_IPA_HSA): New. * hsa-brig-format.h: New file. * hsa-brig.c: New file. * hsa-dump.c: Likewise. * hsa-gen.c: Likewise. * hsa.c: Likewise. * hsa.h: Likewise. * toplev.c (compile_file): Call hsa_output_brig. * hsa-regalloc.c: New file. gcc/fortran/ * types.def (BT_FN_VOID_UINT_PTR_INT_PTR): New. (BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_UINT_PTR_INT_INT): Removed. (BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_UINT_PTR_PTR): New. gcc/lto/ * lto-partition.c: Include "hsa.h" (add_symbol_to_partition_1): Put hsa implementations into the same partition as host implementations. liboffloadmic/ * plugin/libgomp-plugin-intelmic.cpp (GOMP_OFFLOAD_async_run): New unused parameter. (GOMP_OFFLOAD_run): Likewise. include/ * gomp-constants.h (GOMP_DEVICE_HSA): New macro. (GOMP_VERSION_HSA): Likewise. (GOMP_TARGET_ARG_DEVICE_MASK): Likewise. (GOMP_TARGET_ARG_DEVICE_ALL): Likewise. (GOMP_TARGET_ARG_SUBSEQUENT_PARAM): Likewise. (GOMP_TARGET_ARG_ID_MASK): Likewise. (GOMP_TARGET_ARG_NUM_TEAMS): Likewise. (GOMP_TARGET_ARG_THREAD_LIMIT): Likewise. (GOMP_TARGET_ARG_VALUE_SHIFT): Likewise. (GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES): Likewise. From-SVN: r232549 --- gcc/hsa-regalloc.c | 719 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 719 insertions(+) create mode 100644 gcc/hsa-regalloc.c (limited to 'gcc/hsa-regalloc.c') diff --git a/gcc/hsa-regalloc.c b/gcc/hsa-regalloc.c new file mode 100644 index 0000000..f8e83ecf --- /dev/null +++ b/gcc/hsa-regalloc.c @@ -0,0 +1,719 @@ +/* HSAIL IL Register allocation and out-of-SSA. + Copyright (C) 2013-2016 Free Software Foundation, Inc. + Contributed by Michael Matz + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "is-a.h" +#include "vec.h" +#include "tree.h" +#include "dominance.h" +#include "cfg.h" +#include "cfganal.h" +#include "function.h" +#include "bitmap.h" +#include "dumpfile.h" +#include "cgraph.h" +#include "print-tree.h" +#include "cfghooks.h" +#include "symbol-summary.h" +#include "hsa.h" + + +/* Process a PHI node PHI of basic block BB as a part of naive out-f-ssa. */ + +static void +naive_process_phi (hsa_insn_phi *phi) +{ + unsigned count = phi->operand_count (); + for (unsigned i = 0; i < count; i++) + { + gcc_checking_assert (phi->get_op (i)); + hsa_op_base *op = phi->get_op (i); + hsa_bb *hbb; + edge e; + + if (!op) + break; + + e = EDGE_PRED (phi->m_bb, i); + if (single_succ_p (e->src)) + hbb = hsa_bb_for_bb (e->src); + else + { + basic_block old_dest = e->dest; + hbb = hsa_init_new_bb (split_edge (e)); + + /* If switch insn used this edge, fix jump table. */ + hsa_bb *source = hsa_bb_for_bb (e->src); + hsa_insn_sbr *sbr; + if (source->m_last_insn + && (sbr = dyn_cast (source->m_last_insn))) + sbr->replace_all_labels (old_dest, hbb->m_bb); + } + + hsa_build_append_simple_mov (phi->m_dest, op, hbb); + } +} + +/* Naive out-of SSA. */ + +static void +naive_outof_ssa (void) +{ + basic_block bb; + + hsa_cfun->m_in_ssa = false; + + FOR_ALL_BB_FN (bb, cfun) + { + hsa_bb *hbb = hsa_bb_for_bb (bb); + hsa_insn_phi *phi; + + for (phi = hbb->m_first_phi; + phi; + phi = phi->m_next ? as_a (phi->m_next) : NULL) + naive_process_phi (phi); + + /* Zap PHI nodes, they will be deallocated when everything else will. */ + hbb->m_first_phi = NULL; + hbb->m_last_phi = NULL; + } +} + +/* Return register class number for the given HSA TYPE. 0 means the 'c' one + bit register class, 1 means 's' 32 bit class, 2 stands for 'd' 64 bit class + and 3 for 'q' 128 bit class. */ + +static int +m_reg_class_for_type (BrigType16_t type) +{ + switch (type) + { + case BRIG_TYPE_B1: + return 0; + + case BRIG_TYPE_U8: + case BRIG_TYPE_U16: + case BRIG_TYPE_U32: + case BRIG_TYPE_S8: + case BRIG_TYPE_S16: + case BRIG_TYPE_S32: + case BRIG_TYPE_F16: + case BRIG_TYPE_F32: + case BRIG_TYPE_B8: + case BRIG_TYPE_B16: + case BRIG_TYPE_B32: + case BRIG_TYPE_U8X4: + case BRIG_TYPE_S8X4: + case BRIG_TYPE_U16X2: + case BRIG_TYPE_S16X2: + case BRIG_TYPE_F16X2: + return 1; + + case BRIG_TYPE_U64: + case BRIG_TYPE_S64: + case BRIG_TYPE_F64: + case BRIG_TYPE_B64: + case BRIG_TYPE_U8X8: + case BRIG_TYPE_S8X8: + case BRIG_TYPE_U16X4: + case BRIG_TYPE_S16X4: + case BRIG_TYPE_F16X4: + case BRIG_TYPE_U32X2: + case BRIG_TYPE_S32X2: + case BRIG_TYPE_F32X2: + return 2; + + case BRIG_TYPE_B128: + case BRIG_TYPE_U8X16: + case BRIG_TYPE_S8X16: + case BRIG_TYPE_U16X8: + case BRIG_TYPE_S16X8: + case BRIG_TYPE_F16X8: + case BRIG_TYPE_U32X4: + case BRIG_TYPE_U64X2: + case BRIG_TYPE_S32X4: + case BRIG_TYPE_S64X2: + case BRIG_TYPE_F32X4: + case BRIG_TYPE_F64X2: + return 3; + + default: + gcc_unreachable (); + } +} + +/* If the Ith operands of INSN is or contains a register (in an address), + return the address of that register operand. If not return NULL. */ + +static hsa_op_reg ** +insn_reg_addr (hsa_insn_basic *insn, int i) +{ + hsa_op_base *op = insn->get_op (i); + if (!op) + return NULL; + hsa_op_reg *reg = dyn_cast (op); + if (reg) + return (hsa_op_reg **) insn->get_op_addr (i); + hsa_op_address *addr = dyn_cast (op); + if (addr && addr->m_reg) + return &addr->m_reg; + return NULL; +} + +struct m_reg_class_desc +{ + unsigned next_avail, max_num; + unsigned used_num, max_used; + uint64_t used[2]; + char cl_char; +}; + +/* Rewrite the instructions in BB to observe spilled live ranges. + CLASSES is the global register class state. */ + +static void +rewrite_code_bb (basic_block bb, struct m_reg_class_desc *classes) +{ + hsa_bb *hbb = hsa_bb_for_bb (bb); + hsa_insn_basic *insn, *next_insn; + + for (insn = hbb->m_first_insn; insn; insn = next_insn) + { + next_insn = insn->m_next; + unsigned count = insn->operand_count (); + for (unsigned i = 0; i < count; i++) + { + gcc_checking_assert (insn->get_op (i)); + hsa_op_reg **regaddr = insn_reg_addr (insn, i); + + if (regaddr) + { + hsa_op_reg *reg = *regaddr; + if (reg->m_reg_class) + continue; + gcc_assert (reg->m_spill_sym); + + int cl = m_reg_class_for_type (reg->m_type); + hsa_op_reg *tmp, *tmp2; + if (insn->op_output_p (i)) + tmp = hsa_spill_out (insn, reg, &tmp2); + else + tmp = hsa_spill_in (insn, reg, &tmp2); + + *regaddr = tmp; + + tmp->m_reg_class = classes[cl].cl_char; + tmp->m_hard_num = (char) (classes[cl].max_num + i); + if (tmp2) + { + gcc_assert (cl == 0); + tmp2->m_reg_class = classes[1].cl_char; + tmp2->m_hard_num = (char) (classes[1].max_num + i); + } + } + } + } +} + +/* Dump current function to dump file F, with info specific + to register allocation. */ + +void +dump_hsa_cfun_regalloc (FILE *f) +{ + basic_block bb; + + fprintf (f, "\nHSAIL IL for %s\n", hsa_cfun->m_name); + + FOR_ALL_BB_FN (bb, cfun) + { + hsa_bb *hbb = (struct hsa_bb *) bb->aux; + bitmap_print (dump_file, hbb->m_livein, "m_livein ", "\n"); + dump_hsa_bb (f, hbb); + bitmap_print (dump_file, hbb->m_liveout, "m_liveout ", "\n"); + } +} + +/* Given the global register allocation state CLASSES and a + register REG, try to give it a hardware register. If successful, + store that hardreg in REG and return it, otherwise return -1. + Also changes CLASSES to accommodate for the allocated register. */ + +static int +try_alloc_reg (struct m_reg_class_desc *classes, hsa_op_reg *reg) +{ + int cl = m_reg_class_for_type (reg->m_type); + int ret = -1; + if (classes[1].used_num + classes[2].used_num * 2 + classes[3].used_num * 4 + >= 128 - 5) + return -1; + if (classes[cl].used_num < classes[cl].max_num) + { + unsigned int i; + classes[cl].used_num++; + if (classes[cl].used_num > classes[cl].max_used) + classes[cl].max_used = classes[cl].used_num; + for (i = 0; i < classes[cl].used_num; i++) + if (! (classes[cl].used[i / 64] & (((uint64_t)1) << (i & 63)))) + break; + ret = i; + classes[cl].used[i / 64] |= (((uint64_t)1) << (i & 63)); + reg->m_reg_class = classes[cl].cl_char; + reg->m_hard_num = i; + } + return ret; +} + +/* Free up hardregs used by REG, into allocation state CLASSES. */ + +static void +free_reg (struct m_reg_class_desc *classes, hsa_op_reg *reg) +{ + int cl = m_reg_class_for_type (reg->m_type); + int ret = reg->m_hard_num; + gcc_assert (reg->m_reg_class == classes[cl].cl_char); + classes[cl].used_num--; + classes[cl].used[ret / 64] &= ~(((uint64_t)1) << (ret & 63)); +} + +/* Note that the live range for REG ends at least at END. */ + +static void +note_lr_end (hsa_op_reg *reg, int end) +{ + if (reg->m_lr_end < end) + reg->m_lr_end = end; +} + +/* Note that the live range for REG starts at least at BEGIN. */ + +static void +note_lr_begin (hsa_op_reg *reg, int begin) +{ + if (reg->m_lr_begin > begin) + reg->m_lr_begin = begin; +} + +/* Given two registers A and B, return -1, 0 or 1 if A's live range + starts before, at or after B's live range. */ + +static int +cmp_begin (const void *a, const void *b) +{ + const hsa_op_reg * const *rega = (const hsa_op_reg * const *)a; + const hsa_op_reg * const *regb = (const hsa_op_reg * const *)b; + int ret; + if (rega == regb) + return 0; + ret = (*rega)->m_lr_begin - (*regb)->m_lr_begin; + if (ret) + return ret; + return ((*rega)->m_order - (*regb)->m_order); +} + +/* Given two registers REGA and REGB, return true if REGA's + live range ends after REGB's. This results in a sorting order + with earlier end points at the end. */ + +static bool +cmp_end (hsa_op_reg * const ®a, hsa_op_reg * const ®b) +{ + int ret; + if (rega == regb) + return false; + ret = (regb)->m_lr_end - (rega)->m_lr_end; + if (ret) + return ret < 0; + return (((regb)->m_order - (rega)->m_order)) < 0; +} + +/* Expire all old intervals in ACTIVE (a per-regclass vector), + that is, those that end before the interval REG starts. Give + back resources freed so into the state CLASSES. */ + +static void +expire_old_intervals (hsa_op_reg *reg, vec *active, + struct m_reg_class_desc *classes) +{ + for (int i = 0; i < 4; i++) + while (!active[i].is_empty ()) + { + hsa_op_reg *a = active[i].pop (); + if (a->m_lr_end > reg->m_lr_begin) + { + active[i].quick_push (a); + break; + } + free_reg (classes, a); + } +} + +/* The interval REG didn't get a hardreg. Spill it or one of those + from ACTIVE (if the latter, then REG will become allocated to the + hardreg that formerly was used by it). */ + +static void +spill_at_interval (hsa_op_reg *reg, vec *active) +{ + int cl = m_reg_class_for_type (reg->m_type); + gcc_assert (!active[cl].is_empty ()); + hsa_op_reg *cand = active[cl][0]; + if (cand->m_lr_end > reg->m_lr_end) + { + reg->m_reg_class = cand->m_reg_class; + reg->m_hard_num = cand->m_hard_num; + active[cl].ordered_remove (0); + unsigned place = active[cl].lower_bound (reg, cmp_end); + active[cl].quick_insert (place, reg); + } + else + cand = reg; + + gcc_assert (!cand->m_spill_sym); + BrigType16_t type = cand->m_type; + if (type == BRIG_TYPE_B1) + type = BRIG_TYPE_U8; + cand->m_reg_class = 0; + cand->m_spill_sym = hsa_get_spill_symbol (type); + cand->m_spill_sym->m_name_number = cand->m_order; +} + +/* Given the global register state CLASSES allocate all HSA virtual + registers either to hardregs or to a spill symbol. */ + +static void +linear_scan_regalloc (struct m_reg_class_desc *classes) +{ + /* Compute liveness. */ + bool changed; + int i, n; + int insn_order; + int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun)); + bitmap work = BITMAP_ALLOC (NULL); + vec ind2reg = vNULL; + vec active[4] = {vNULL, vNULL, vNULL, vNULL}; + hsa_insn_basic *m_last_insn; + + /* We will need the reverse post order for linearization, + and the post order for liveness analysis, which is the same + backward. */ + n = pre_and_rev_post_order_compute (NULL, bbs, true); + ind2reg.safe_grow_cleared (hsa_cfun->m_reg_count); + + /* Give all instructions a linearized number, at the same time + build a mapping from register index to register. */ + insn_order = 1; + for (i = 0; i < n; i++) + { + basic_block bb = BASIC_BLOCK_FOR_FN (cfun, bbs[i]); + hsa_bb *hbb = hsa_bb_for_bb (bb); + hsa_insn_basic *insn; + for (insn = hbb->m_first_insn; insn; insn = insn->m_next) + { + unsigned opi; + insn->m_number = insn_order++; + for (opi = 0; opi < insn->operand_count (); opi++) + { + gcc_checking_assert (insn->get_op (opi)); + hsa_op_reg **regaddr = insn_reg_addr (insn, opi); + if (regaddr) + ind2reg[(*regaddr)->m_order] = *regaddr; + } + } + } + + /* Initialize all live ranges to [after-end, 0). */ + for (i = 0; i < hsa_cfun->m_reg_count; i++) + if (ind2reg[i]) + ind2reg[i]->m_lr_begin = insn_order, ind2reg[i]->m_lr_end = 0; + + /* Classic liveness analysis, as long as something changes: + m_liveout is union (m_livein of successors) + m_livein is m_liveout minus defs plus uses. */ + do + { + changed = false; + for (i = n - 1; i >= 0; i--) + { + edge e; + edge_iterator ei; + basic_block bb = BASIC_BLOCK_FOR_FN (cfun, bbs[i]); + hsa_bb *hbb = hsa_bb_for_bb (bb); + + /* Union of successors m_livein (or empty if none). */ + bool first = true; + FOR_EACH_EDGE (e, ei, bb->succs) + if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)) + { + hsa_bb *succ = hsa_bb_for_bb (e->dest); + if (first) + { + bitmap_copy (work, succ->m_livein); + first = false; + } + else + bitmap_ior_into (work, succ->m_livein); + } + if (first) + bitmap_clear (work); + + bitmap_copy (hbb->m_liveout, work); + + /* Remove defs, include uses in a backward insn walk. */ + hsa_insn_basic *insn; + for (insn = hbb->m_last_insn; insn; insn = insn->m_prev) + { + unsigned opi; + unsigned ndefs = insn->input_count (); + for (opi = 0; opi < ndefs && insn->get_op (opi); opi++) + { + gcc_checking_assert (insn->get_op (opi)); + hsa_op_reg **regaddr = insn_reg_addr (insn, opi); + if (regaddr) + bitmap_clear_bit (work, (*regaddr)->m_order); + } + for (; opi < insn->operand_count (); opi++) + { + gcc_checking_assert (insn->get_op (opi)); + hsa_op_reg **regaddr = insn_reg_addr (insn, opi); + if (regaddr) + bitmap_set_bit (work, (*regaddr)->m_order); + } + } + + /* Note if that changed something. */ + if (bitmap_ior_into (hbb->m_livein, work)) + changed = true; + } + } + while (changed); + + /* Make one pass through all instructions in linear order, + noting and merging possible live range start and end points. */ + m_last_insn = NULL; + for (i = n - 1; i >= 0; i--) + { + basic_block bb = BASIC_BLOCK_FOR_FN (cfun, bbs[i]); + hsa_bb *hbb = hsa_bb_for_bb (bb); + hsa_insn_basic *insn; + int after_end_number; + unsigned bit; + bitmap_iterator bi; + + if (m_last_insn) + after_end_number = m_last_insn->m_number; + else + after_end_number = insn_order; + /* Everything live-out in this BB has at least an end point + after us. */ + EXECUTE_IF_SET_IN_BITMAP (hbb->m_liveout, 0, bit, bi) + note_lr_end (ind2reg[bit], after_end_number); + + for (insn = hbb->m_last_insn; insn; insn = insn->m_prev) + { + unsigned opi; + unsigned ndefs = insn->input_count (); + for (opi = 0; opi < insn->operand_count (); opi++) + { + gcc_checking_assert (insn->get_op (opi)); + hsa_op_reg **regaddr = insn_reg_addr (insn, opi); + if (regaddr) + { + hsa_op_reg *reg = *regaddr; + if (opi < ndefs) + note_lr_begin (reg, insn->m_number); + else + note_lr_end (reg, insn->m_number); + } + } + } + + /* Everything live-in in this BB has a start point before + our first insn. */ + int before_start_number; + if (hbb->m_first_insn) + before_start_number = hbb->m_first_insn->m_number; + else + before_start_number = after_end_number; + before_start_number--; + EXECUTE_IF_SET_IN_BITMAP (hbb->m_livein, 0, bit, bi) + note_lr_begin (ind2reg[bit], before_start_number); + + if (hbb->m_first_insn) + m_last_insn = hbb->m_first_insn; + } + + for (i = 0; i < hsa_cfun->m_reg_count; i++) + if (ind2reg[i]) + { + /* All regs that have still their start at after all code actually + are defined at the start of the routine (prologue). */ + if (ind2reg[i]->m_lr_begin == insn_order) + ind2reg[i]->m_lr_begin = 0; + /* All regs that have no use but a def will have lr_end == 0, + they are actually live from def until after the insn they are + defined in. */ + if (ind2reg[i]->m_lr_end == 0) + ind2reg[i]->m_lr_end = ind2reg[i]->m_lr_begin + 1; + } + + /* Sort all intervals by increasing start point. */ + gcc_assert (ind2reg.length () == (size_t) hsa_cfun->m_reg_count); + +#ifdef ENABLE_CHECKING + for (unsigned i = 0; i < ind2reg.length (); i++) + gcc_assert (ind2reg[i]); +#endif + + ind2reg.qsort (cmp_begin); + for (i = 0; i < 4; i++) + active[i].reserve_exact (hsa_cfun->m_reg_count); + + /* Now comes the linear scan allocation. */ + for (i = 0; i < hsa_cfun->m_reg_count; i++) + { + hsa_op_reg *reg = ind2reg[i]; + if (!reg) + continue; + expire_old_intervals (reg, active, classes); + int cl = m_reg_class_for_type (reg->m_type); + if (try_alloc_reg (classes, reg) >= 0) + { + unsigned place = active[cl].lower_bound (reg, cmp_end); + active[cl].quick_insert (place, reg); + } + else + spill_at_interval (reg, active); + + /* Some interesting dumping as we go. */ + if (dump_file) + { + fprintf (dump_file, " reg%d: [%5d, %5d)->", + reg->m_order, reg->m_lr_begin, reg->m_lr_end); + if (reg->m_reg_class) + fprintf (dump_file, "$%c%i", reg->m_reg_class, reg->m_hard_num); + else + fprintf (dump_file, "[%%__%s_%i]", + hsa_seg_name (reg->m_spill_sym->m_segment), + reg->m_spill_sym->m_name_number); + for (int cl = 0; cl < 4; cl++) + { + bool first = true; + hsa_op_reg *r; + fprintf (dump_file, " {"); + for (int j = 0; active[cl].iterate (j, &r); j++) + if (first) + { + fprintf (dump_file, "%d", r->m_order); + first = false; + } + else + fprintf (dump_file, ", %d", r->m_order); + fprintf (dump_file, "}"); + } + fprintf (dump_file, "\n"); + } + } + + BITMAP_FREE (work); + free (bbs); + + if (dump_file) + { + fprintf (dump_file, "------- After liveness: -------\n"); + dump_hsa_cfun_regalloc (dump_file); + fprintf (dump_file, " ----- Intervals:\n"); + for (i = 0; i < hsa_cfun->m_reg_count; i++) + { + hsa_op_reg *reg = ind2reg[i]; + if (!reg) + continue; + fprintf (dump_file, " reg%d: [%5d, %5d)->", reg->m_order, + reg->m_lr_begin, reg->m_lr_end); + if (reg->m_reg_class) + fprintf (dump_file, "$%c%i\n", reg->m_reg_class, reg->m_hard_num); + else + fprintf (dump_file, "[%%__%s_%i]\n", + hsa_seg_name (reg->m_spill_sym->m_segment), + reg->m_spill_sym->m_name_number); + } + } + + for (i = 0; i < 4; i++) + active[i].release (); + ind2reg.release (); +} + +/* Entry point for register allocation. */ + +static void +regalloc (void) +{ + basic_block bb; + m_reg_class_desc classes[4]; + + /* If there are no registers used in the function, exit right away. */ + if (hsa_cfun->m_reg_count == 0) + return; + + memset (classes, 0, sizeof (classes)); + classes[0].next_avail = 0; + classes[0].max_num = 7; + classes[0].cl_char = 'c'; + classes[1].cl_char = 's'; + classes[2].cl_char = 'd'; + classes[3].cl_char = 'q'; + + for (int i = 1; i < 4; i++) + { + classes[i].next_avail = 0; + classes[i].max_num = 20; + } + + linear_scan_regalloc (classes); + + FOR_ALL_BB_FN (bb, cfun) + rewrite_code_bb (bb, classes); +} + +/* Out of SSA and register allocation on HSAIL IL. */ + +void +hsa_regalloc (void) +{ + naive_outof_ssa (); + + if (dump_file) + { + fprintf (dump_file, "------- After out-of-SSA: -------\n"); + dump_hsa_cfun (dump_file); + } + + regalloc (); + + if (dump_file) + { + fprintf (dump_file, "------- After register allocation: -------\n"); + dump_hsa_cfun (dump_file); + } +} -- cgit v1.1