diff options
60 files changed, 18448 insertions, 197 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index b39f864..907a528 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,135 @@ +2016-01-19 Martin Jambor <mjambor@suse.cz> + Martin Liska <mliska@suse.cz> + Michael Matz <matz@suse.de> + + * Makefile.in (OBJS): Add new source files. + (GTFILES): Add hsa.c. + * common.opt (disable_hsa): New variable. + (-Whsa): New warning. + * config.in (ENABLE_HSA): New. + * configure.ac: Treat hsa differently from other accelerators. + (OFFLOAD_TARGETS): Define ENABLE_OFFLOADING according to + $enable_offloading. + (ENABLE_HSA): Define ENABLE_HSA according to $enable_hsa. + * doc/install.texi (Configuration): Document --with-hsa-runtime, + --with-hsa-runtime-include, --with-hsa-runtime-lib and + --with-hsa-kmt-lib. + * doc/invoke.texi (-Whsa): Document. + (hsa-gen-debug-stores): Likewise. + * lto-wrapper.c (compile_images_for_offload_targets): Do not attempt + to invoke offload compiler for hsa acclerator. + * opts.c (common_handle_option): Determine whether HSA offloading + should be performed. + * params.def (PARAM_HSA_GEN_DEBUG_STORES): New parameter. + * builtin-types.def (BT_FN_VOID_UINT_PTR_INT_PTR): New. + (BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_UINT_PTR_INT_INT): Removed. + (BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_UINT_PTR_PTR): New. + * gimple-low.c (lower_stmt): Also handle GIMPLE_OMP_GRID_BODY. + * gimple-pretty-print.c (dump_gimple_omp_for): Also handle + GF_OMP_FOR_KIND_GRID_LOOP. + (dump_gimple_omp_block): Also handle GIMPLE_OMP_GRID_BODY. + (pp_gimple_stmt_1): Likewise. + * gimple-walk.c (walk_gimple_stmt): Likewise. + * gimple.c (gimple_build_omp_grid_body): New function. + (gimple_copy): Also handle GIMPLE_OMP_GRID_BODY. + * gimple.def (GIMPLE_OMP_GRID_BODY): New. + * gimple.h (enum gf_mask): Added GF_OMP_PARALLEL_GRID_PHONY, + GF_OMP_FOR_KIND_GRID_LOOP, GF_OMP_FOR_GRID_PHONY and + GF_OMP_TEAMS_GRID_PHONY. + (gimple_statement_omp_single_layout): Updated comments. + (gimple_build_omp_grid_body): New function. + (gimple_has_substatements): Also handle GIMPLE_OMP_GRID_BODY. + (gimple_omp_for_grid_phony): New function. + (gimple_omp_for_set_grid_phony): Likewise. + (gimple_omp_parallel_grid_phony): Likewise. + (gimple_omp_parallel_set_grid_phony): Likewise. + (gimple_omp_teams_grid_phony): Likewise. + (gimple_omp_teams_set_grid_phony): Likewise. + (gimple_return_set_retbnd): Also handle GIMPLE_OMP_GRID_BODY. + * omp-builtins.def (BUILT_IN_GOMP_OFFLOAD_REGISTER): New. + (BUILT_IN_GOMP_OFFLOAD_UNREGISTER): Likewise. + (BUILT_IN_GOMP_TARGET): Updated type. + * omp-low.c: Include symbol-summary.h, hsa.h and params.h. + (adjust_for_condition): New function. + (get_omp_for_step_from_incr): Likewise. + (extract_omp_for_data): Moved parts to adjust_for_condition and + get_omp_for_step_from_incr. + (build_outer_var_ref): Handle GIMPLE_OMP_GRID_BODY. + (fixup_child_record_type): Bail out if receiver_decl is NULL. + (scan_sharing_clauses): Handle OMP_CLAUSE__GRIDDIM_. + (scan_omp_parallel): Do not create child functions for phony + constructs. + (check_omp_nesting_restrictions): Handle GIMPLE_OMP_GRID_BODY. + (scan_omp_1_op): Checking assert we are not remapping to + ERROR_MARK. Also also handle GIMPLE_OMP_GRID_BODY. + (parallel_needs_hsa_kernel_p): New function. + (expand_parallel_call): Register apprpriate parallel child + functions as HSA kernels. + (grid_launch_attributes_trees): New type. + (grid_attr_trees): New variable. + (grid_create_kernel_launch_attr_types): New function. + (grid_insert_store_range_dim): Likewise. + (grid_get_kernel_launch_attributes): Likewise. + (get_target_argument_identifier_1): Likewise. + (get_target_argument_identifier): Likewise. + (get_target_argument_value): Likewise. + (push_target_argument_according_to_value): Likewise. + (get_target_arguments): Likewise. + (expand_omp_target): Call get_target_arguments instead of looking + up for teams and thread limit. + (grid_expand_omp_for_loop): New function. + (grid_arg_decl_map): New type. + (grid_remap_kernel_arg_accesses): New function. + (grid_expand_target_kernel_body): New function. + (expand_omp): Call it. + (lower_omp_for): Do not emit phony constructs. + (lower_omp_taskreg): Do not emit phony constructs but create for them + a temporary variable receiver_decl. + (lower_omp_taskreg): Do not emit phony constructs. + (lower_omp_teams): Likewise. + (lower_omp_grid_body): New function. + (lower_omp_1): Call it. + (grid_reg_assignment_to_local_var_p): New function. + (grid_seq_only_contains_local_assignments): Likewise. + (grid_find_single_omp_among_assignments_1): Likewise. + (grid_find_single_omp_among_assignments): Likewise. + (grid_find_ungridifiable_statement): Likewise. + (grid_target_follows_gridifiable_pattern): Likewise. + (grid_remap_prebody_decls): Likewise. + (grid_copy_leading_local_assignments): Likewise. + (grid_process_kernel_body_copy): Likewise. + (grid_attempt_target_gridification): Likewise. + (grid_gridify_all_targets_stmt): Likewise. + (grid_gridify_all_targets): Likewise. + (execute_lower_omp): Call grid_gridify_all_targets. + (make_gimple_omp_edges): Handle GIMPLE_OMP_GRID_BODY. + * tree-core.h (omp_clause_code): Added OMP_CLAUSE__GRIDDIM_. + (tree_omp_clause): Added union field dimension. + * tree-pretty-print.c (dump_omp_clause): Handle OMP_CLAUSE__GRIDDIM_. + * tree.c (omp_clause_num_ops): Added number of arguments of + OMP_CLAUSE__GRIDDIM_. + (omp_clause_code_name): Added name of OMP_CLAUSE__GRIDDIM_. + (walk_tree_1): Handle OMP_CLAUSE__GRIDDIM_. + * tree.h (OMP_CLAUSE_GRIDDIM_DIMENSION): New. + (OMP_CLAUSE_SET_GRIDDIM_DIMENSION): Likewise. + (OMP_CLAUSE_GRIDDIM_SIZE): Likewise. + (OMP_CLAUSE_GRIDDIM_GROUP): Likewise. + * passes.def: Schedule pass_ipa_hsa and pass_gen_hsail. + * tree-pass.h (make_pass_gen_hsail): Declare. + (make_pass_ipa_hsa): Likewise. + * ipa-hsa.c: New file. + * lto-section-in.c (lto_section_name): Add hsa section name. + * lto-streamer.h (lto_section_type): Add hsa section. + * timevar.def (TV_IPA_HSA): New. + * hsa-brig-format.h: New file. + * hsa-brig.c: New file. + * hsa-dump.c: Likewise. + * hsa-gen.c: Likewise. + * hsa.c: Likewise. + * hsa.h: Likewise. + * toplev.c (compile_file): Call hsa_output_brig. + * hsa-regalloc.c: New file. + 2016-01-18 Jeff Law <law@redhat.com> PR tree-optimization/69320 diff --git a/gcc/Makefile.in b/gcc/Makefile.in index 44a18eb..ab9cbbf 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -1297,6 +1297,11 @@ OBJS = \ graphite-sese-to-poly.o \ gtype-desc.o \ haifa-sched.o \ + hsa.o \ + hsa-gen.o \ + hsa-regalloc.o \ + hsa-brig.o \ + hsa-dump.o \ hw-doloop.o \ hwint.o \ ifcvt.o \ @@ -1321,6 +1326,7 @@ OBJS = \ ipa-icf.o \ ipa-icf-gimple.o \ ipa-reference.o \ + ipa-hsa.o \ ipa-ref.o \ ipa-utils.o \ ipa.o \ @@ -2404,6 +2410,7 @@ GTFILES = $(CPP_ID_DATA_H) $(srcdir)/input.h $(srcdir)/coretypes.h \ $(srcdir)/sancov.c \ $(srcdir)/ipa-devirt.c \ $(srcdir)/internal-fn.h \ + $(srcdir)/hsa.c \ @all_gtfiles@ # Compute the list of GT header files from the corresponding C sources, diff --git a/gcc/builtin-types.def b/gcc/builtin-types.def index c3b6e13..7fab9f8 100644 --- a/gcc/builtin-types.def +++ b/gcc/builtin-types.def @@ -478,6 +478,8 @@ DEF_FUNCTION_TYPE_4 (BT_FN_BOOL_UINT_LONGPTR_LONGPTR_LONGPTR, DEF_FUNCTION_TYPE_4 (BT_FN_BOOL_UINT_ULLPTR_ULLPTR_ULLPTR, BT_BOOL, BT_UINT, BT_PTR_ULONGLONG, BT_PTR_ULONGLONG, BT_PTR_ULONGLONG) +DEF_FUNCTION_TYPE_4 (BT_FN_VOID_UINT_PTR_INT_PTR, BT_VOID, BT_INT, BT_PTR, + BT_INT, BT_PTR) DEF_FUNCTION_TYPE_5 (BT_FN_INT_STRING_INT_SIZE_CONST_STRING_VALIST_ARG, BT_INT, BT_STRING, BT_INT, BT_SIZE, BT_CONST_STRING, @@ -555,10 +557,9 @@ DEF_FUNCTION_TYPE_9 (BT_FN_VOID_OMPFN_PTR_OMPCPYFN_LONG_LONG_BOOL_UINT_PTR_INT, BT_VOID, BT_PTR_FN_VOID_PTR, BT_PTR, BT_PTR_FN_VOID_PTR_PTR, BT_LONG, BT_LONG, BT_BOOL, BT_UINT, BT_PTR, BT_INT) - -DEF_FUNCTION_TYPE_10 (BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_UINT_PTR_INT_INT, - BT_VOID, BT_INT, BT_PTR_FN_VOID_PTR, BT_SIZE, BT_PTR, - BT_PTR, BT_PTR, BT_UINT, BT_PTR, BT_INT, BT_INT) +DEF_FUNCTION_TYPE_9 (BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_UINT_PTR_PTR, + BT_VOID, BT_INT, BT_PTR_FN_VOID_PTR, BT_SIZE, BT_PTR, + BT_PTR, BT_PTR, BT_UINT, BT_PTR, BT_PTR) DEF_FUNCTION_TYPE_11 (BT_FN_VOID_OMPFN_PTR_OMPCPYFN_LONG_LONG_UINT_LONG_INT_LONG_LONG_LONG, BT_VOID, BT_PTR_FN_VOID_PTR, BT_PTR, diff --git a/gcc/common.opt b/gcc/common.opt index 49d347c..23e6ed7 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -239,6 +239,10 @@ Inserts call to __sanitizer_cov_trace_pc into every basic block. Variable bool dump_base_name_prefixed = false +; Flag whether HSA generation has been explicitely disabled +Variable +bool flag_disable_hsa = false + ### Driver @@ -593,6 +597,10 @@ Wfree-nonheap-object Common Var(warn_free_nonheap_object) Init(1) Warning Warn when attempting to free a non-heap object. +Whsa +Common Var(warn_hsa) Init(1) Warning +Warn when a function cannot be expanded to HSAIL. + Winline Common Var(warn_inline) Warning Warn when an inlined function cannot be inlined. diff --git a/gcc/config.in b/gcc/config.in index c00cd0f..c3340bb0 100644 --- a/gcc/config.in +++ b/gcc/config.in @@ -144,6 +144,12 @@ #endif +/* Define this to enable support for generating HSAIL. */ +#ifndef USED_FOR_TARGET +#undef ENABLE_HSA +#endif + + /* Define if gcc should always pass --build-id to linker. */ #ifndef USED_FOR_TARGET #undef ENABLE_LD_BUILDID diff --git a/gcc/configure b/gcc/configure index 6eca483..7db7552 100755 --- a/gcc/configure +++ b/gcc/configure @@ -7700,6 +7700,13 @@ fi for tgt in `echo $enable_offload_targets | sed 's/,/ /g'`; do tgt=`echo $tgt | sed 's/=.*//'` + + if echo "$tgt" | grep "^hsa" > /dev/null ; then + enable_hsa=1 + else + enable_offloading=1 + fi + if test x"$offload_targets" = x; then offload_targets=$tgt else @@ -7711,7 +7718,7 @@ cat >>confdefs.h <<_ACEOF #define OFFLOAD_TARGETS "$offload_targets" _ACEOF -if test x"$offload_targets" != x; then +if test x"$enable_offloading" != x; then $as_echo "#define ENABLE_OFFLOADING 1" >>confdefs.h @@ -7721,6 +7728,12 @@ $as_echo "#define ENABLE_OFFLOADING 0" >>confdefs.h fi +if test x"$enable_hsa" = x1 ; then + +$as_echo "#define ENABLE_HSA 1" >>confdefs.h + +fi + # Check whether --with-multilib-list was given. if test "${with_multilib_list+set}" = set; then : @@ -18406,7 +18419,7 @@ else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <<_LT_EOF -#line 18409 "configure" +#line 18422 "configure" #include "confdefs.h" #if HAVE_DLFCN_H @@ -18512,7 +18525,7 @@ else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <<_LT_EOF -#line 18515 "configure" +#line 18528 "configure" #include "confdefs.h" #if HAVE_DLFCN_H diff --git a/gcc/configure.ac b/gcc/configure.ac index 0a626e9..8d3a869 100644 --- a/gcc/configure.ac +++ b/gcc/configure.ac @@ -940,6 +940,13 @@ AC_SUBST(accel_dir_suffix) for tgt in `echo $enable_offload_targets | sed 's/,/ /g'`; do tgt=`echo $tgt | sed 's/=.*//'` + + if echo "$tgt" | grep "^hsa" > /dev/null ; then + enable_hsa=1 + else + enable_offloading=1 + fi + if test x"$offload_targets" = x; then offload_targets=$tgt else @@ -948,7 +955,7 @@ for tgt in `echo $enable_offload_targets | sed 's/,/ /g'`; do done AC_DEFINE_UNQUOTED(OFFLOAD_TARGETS, "$offload_targets", [Define to offload targets, separated by commas.]) -if test x"$offload_targets" != x; then +if test x"$enable_offloading" != x; then AC_DEFINE(ENABLE_OFFLOADING, 1, [Define this to enable support for offloading.]) else @@ -956,6 +963,11 @@ else [Define this to enable support for offloading.]) fi +if test x"$enable_hsa" = x1 ; then + AC_DEFINE(ENABLE_HSA, 1, + [Define this to enable support for generating HSAIL.]) +fi + AC_ARG_WITH(multilib-list, [AS_HELP_STRING([--with-multilib-list], [select multilibs (AArch64, SH and x86-64 only)])], :, diff --git a/gcc/doc/install.texi b/gcc/doc/install.texi index 1c2b702..062f42c 100644 --- a/gcc/doc/install.texi +++ b/gcc/doc/install.texi @@ -1992,6 +1992,28 @@ specifying paths @var{path1}, @dots{}, @var{pathN}. % @var{srcdir}/configure \ --enable-offload-target=i686-unknown-linux-gnu=/path/to/i686/compiler,x86_64-pc-linux-gnu @end smallexample + +If @samp{hsa} is specified as one of the targets, the compiler will be +built with support for HSA GPU accelerators. Because the same +compiler will emit the accelerator code, no path should be specified. + +@item --with-hsa-runtime=@var{pathname} +@itemx --with-hsa-runtime-include=@var{pathname} +@itemx --with-hsa-runtime-lib=@var{pathname} + +If you configure GCC with HSA offloading but do not have the HSA +run-time library installed in a standard location then you can +explicitly specify the directory where they are installed. The +@option{--with-hsa-runtime=@/@var{hsainstalldir}} option is a +shorthand for +@option{--with-hsa-runtime-lib=@/@var{hsainstalldir}/lib} and +@option{--with-hsa-runtime-include=@/@var{hsainstalldir}/include}. + +@item --with-hsa-kmt-lib=@var{pathname} + +If you configure GCC with HSA offloading but do not have the HSA +KMT library installed in a standard location then you can +explicitly specify the directory where it resides. @end table @subheading Cross-Compiler-Specific Options diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index a1debf1..077324b 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -305,7 +305,7 @@ Objective-C and Objective-C++ Dialects}. -Wunused-but-set-parameter -Wunused-but-set-variable @gol -Wuseless-cast -Wvariadic-macros -Wvector-operation-performance @gol -Wvla -Wvolatile-register-var -Wwrite-strings @gol --Wzero-as-null-pointer-constant} +-Wzero-as-null-pointer-constant -Whsa} @item C and Objective-C-only Warning Options @gccoptlist{-Wbad-function-cast -Wmissing-declarations @gol @@ -5693,6 +5693,10 @@ Suppress warnings when a positional initializer is used to initialize a structure that has been marked with the @code{designated_init} attribute. +@item -Whsa +Issue a warning when HSAIL cannot be emitted for the compiled function or +OpenMP construct. + @end table @node Debugging Options @@ -9508,6 +9512,12 @@ dynamic, guided, auto, runtime). The default is static. Maximum depth of recursion when querying properties of SSA names in things like fold routines. One level of recursion corresponds to following a use-def chain. + +@item hsa-gen-debug-stores +Enable emission of special debug stores within HSA kernels which are +then read and reported by libgomp plugin. Generation of these stores +is disabled by default, use @option{--param hsa-gen-debug-stores=1} to +enable it. @end table @end table diff --git a/gcc/fortran/ChangeLog b/gcc/fortran/ChangeLog index 5ad05ce..ee83c2e 100644 --- a/gcc/fortran/ChangeLog +++ b/gcc/fortran/ChangeLog @@ -1,3 +1,9 @@ +2016-01-19 Martin Jambor <mjambor@suse.cz> + + * types.def (BT_FN_VOID_UINT_PTR_INT_PTR): New. + (BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_UINT_PTR_INT_INT): Removed. + (BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_UINT_PTR_PTR): New. + 2016-01-15 Paul Thomas <pault@gcc.gnu.org> PR fortran/64324 diff --git a/gcc/fortran/types.def b/gcc/fortran/types.def index 43a1b46..8780b76 100644 --- a/gcc/fortran/types.def +++ b/gcc/fortran/types.def @@ -159,6 +159,8 @@ DEF_FUNCTION_TYPE_4 (BT_FN_BOOL_UINT_LONGPTR_LONGPTR_LONGPTR, DEF_FUNCTION_TYPE_4 (BT_FN_BOOL_UINT_ULLPTR_ULLPTR_ULLPTR, BT_BOOL, BT_UINT, BT_PTR_ULONGLONG, BT_PTR_ULONGLONG, BT_PTR_ULONGLONG) +DEF_FUNCTION_TYPE_4 (BT_FN_VOID_UINT_PTR_INT_PTR, BT_VOID, BT_INT, BT_PTR, + BT_INT, BT_PTR) DEF_FUNCTION_TYPE_5 (BT_FN_VOID_OMPFN_PTR_UINT_UINT_UINT, BT_VOID, BT_PTR_FN_VOID_PTR, BT_PTR, BT_UINT, BT_UINT, @@ -220,10 +222,9 @@ DEF_FUNCTION_TYPE_9 (BT_FN_VOID_OMPFN_PTR_OMPCPYFN_LONG_LONG_BOOL_UINT_PTR_INT, BT_VOID, BT_PTR_FN_VOID_PTR, BT_PTR, BT_PTR_FN_VOID_PTR_PTR, BT_LONG, BT_LONG, BT_BOOL, BT_UINT, BT_PTR, BT_INT) - -DEF_FUNCTION_TYPE_10 (BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_UINT_PTR_INT_INT, +DEF_FUNCTION_TYPE_9 (BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_UINT_PTR_PTR, BT_VOID, BT_INT, BT_PTR_FN_VOID_PTR, BT_SIZE, BT_PTR, - BT_PTR, BT_PTR, BT_UINT, BT_PTR, BT_INT, BT_INT) + BT_PTR, BT_PTR, BT_UINT, BT_PTR, BT_PTR) DEF_FUNCTION_TYPE_11 (BT_FN_VOID_OMPFN_PTR_OMPCPYFN_LONG_LONG_UINT_LONG_INT_LONG_LONG_LONG, BT_VOID, BT_PTR_FN_VOID_PTR, BT_PTR, diff --git a/gcc/gimple-low.c b/gcc/gimple-low.c index f9b0fba..eb90d48 100644 --- a/gcc/gimple-low.c +++ b/gcc/gimple-low.c @@ -358,6 +358,7 @@ lower_stmt (gimple_stmt_iterator *gsi, struct lower_data *data) case GIMPLE_OMP_TASK: case GIMPLE_OMP_TARGET: case GIMPLE_OMP_TEAMS: + case GIMPLE_OMP_GRID_BODY: data->cannot_fallthru = false; lower_omp_directive (gsi, data); data->cannot_fallthru = false; diff --git a/gcc/gimple-pretty-print.c b/gcc/gimple-pretty-print.c index b26d7a7..e27214f 100644 --- a/gcc/gimple-pretty-print.c +++ b/gcc/gimple-pretty-print.c @@ -1187,6 +1187,9 @@ dump_gimple_omp_for (pretty_printer *buffer, gomp_for *gs, int spc, int flags) case GF_OMP_FOR_KIND_CILKSIMD: pp_string (buffer, "#pragma simd"); break; + case GF_OMP_FOR_KIND_GRID_LOOP: + pp_string (buffer, "#pragma omp for grid_loop"); + break; default: gcc_unreachable (); } @@ -1494,6 +1497,9 @@ dump_gimple_omp_block (pretty_printer *buffer, gimple *gs, int spc, int flags) case GIMPLE_OMP_SECTION: pp_string (buffer, "#pragma omp section"); break; + case GIMPLE_OMP_GRID_BODY: + pp_string (buffer, "#pragma omp gridified body"); + break; default: gcc_unreachable (); } @@ -2301,6 +2307,7 @@ pp_gimple_stmt_1 (pretty_printer *buffer, gimple *gs, int spc, int flags) case GIMPLE_OMP_MASTER: case GIMPLE_OMP_TASKGROUP: case GIMPLE_OMP_SECTION: + case GIMPLE_OMP_GRID_BODY: dump_gimple_omp_block (buffer, gs, spc, flags); break; diff --git a/gcc/gimple-walk.c b/gcc/gimple-walk.c index 6c0a46d..15cd842 100644 --- a/gcc/gimple-walk.c +++ b/gcc/gimple-walk.c @@ -655,6 +655,7 @@ walk_gimple_stmt (gimple_stmt_iterator *gsi, walk_stmt_fn callback_stmt, case GIMPLE_OMP_SINGLE: case GIMPLE_OMP_TARGET: case GIMPLE_OMP_TEAMS: + case GIMPLE_OMP_GRID_BODY: ret = walk_gimple_seq_mod (gimple_omp_body_ptr (stmt), callback_stmt, callback_op, wi); if (ret) diff --git a/gcc/gimple.c b/gcc/gimple.c index 0f4ed88..850e546 100644 --- a/gcc/gimple.c +++ b/gcc/gimple.c @@ -954,6 +954,19 @@ gimple_build_omp_master (gimple_seq body) return p; } +/* Build a GIMPLE_OMP_GRID_BODY statement. + + BODY is the sequence of statements to be executed by the kernel. */ + +gimple * +gimple_build_omp_grid_body (gimple_seq body) +{ + gimple *p = gimple_alloc (GIMPLE_OMP_GRID_BODY, 0); + if (body) + gimple_omp_set_body (p, body); + + return p; +} /* Build a GIMPLE_OMP_TASKGROUP statement. @@ -1807,6 +1820,7 @@ gimple_copy (gimple *stmt) case GIMPLE_OMP_SECTION: case GIMPLE_OMP_MASTER: case GIMPLE_OMP_TASKGROUP: + case GIMPLE_OMP_GRID_BODY: copy_omp_body: new_seq = gimple_seq_copy (gimple_omp_body (stmt)); gimple_omp_set_body (copy, new_seq); diff --git a/gcc/gimple.def b/gcc/gimple.def index db23dee..2ff22b8 100644 --- a/gcc/gimple.def +++ b/gcc/gimple.def @@ -376,6 +376,10 @@ DEFGSCODE(GIMPLE_OMP_TEAMS, "gimple_omp_teams", GSS_OMP_SINGLE_LAYOUT) CLAUSES is an OMP_CLAUSE chain holding the associated clauses. */ DEFGSCODE(GIMPLE_OMP_ORDERED, "gimple_omp_ordered", GSS_OMP_SINGLE_LAYOUT) +/* GIMPLE_OMP_GRID_BODY <BODY> represents a parallel loop lowered for execution + on a GPU. It is an artificial statement created by omp lowering. */ +DEFGSCODE(GIMPLE_OMP_GRID_BODY, "gimple_omp_gpukernel", GSS_OMP) + /* GIMPLE_PREDICT <PREDICT, OUTCOME> specifies a hint for branch prediction. PREDICT is one of the predictors from predict.def. diff --git a/gcc/gimple.h b/gcc/gimple.h index b30a9b8..6d15dab 100644 --- a/gcc/gimple.h +++ b/gcc/gimple.h @@ -146,6 +146,7 @@ enum gf_mask { GF_CALL_CTRL_ALTERING = 1 << 7, GF_CALL_WITH_BOUNDS = 1 << 8, GF_OMP_PARALLEL_COMBINED = 1 << 0, + GF_OMP_PARALLEL_GRID_PHONY = 1 << 1, GF_OMP_TASK_TASKLOOP = 1 << 0, GF_OMP_FOR_KIND_MASK = (1 << 4) - 1, GF_OMP_FOR_KIND_FOR = 0, @@ -153,12 +154,14 @@ enum gf_mask { GF_OMP_FOR_KIND_TASKLOOP = 2, GF_OMP_FOR_KIND_CILKFOR = 3, GF_OMP_FOR_KIND_OACC_LOOP = 4, + GF_OMP_FOR_KIND_GRID_LOOP = 5, /* Flag for SIMD variants of OMP_FOR kinds. */ GF_OMP_FOR_SIMD = 1 << 3, GF_OMP_FOR_KIND_SIMD = GF_OMP_FOR_SIMD | 0, GF_OMP_FOR_KIND_CILKSIMD = GF_OMP_FOR_SIMD | 1, GF_OMP_FOR_COMBINED = 1 << 4, GF_OMP_FOR_COMBINED_INTO = 1 << 5, + GF_OMP_FOR_GRID_PHONY = 1 << 6, GF_OMP_TARGET_KIND_MASK = (1 << 4) - 1, GF_OMP_TARGET_KIND_REGION = 0, GF_OMP_TARGET_KIND_DATA = 1, @@ -172,6 +175,7 @@ enum gf_mask { GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA = 9, GF_OMP_TARGET_KIND_OACC_DECLARE = 10, GF_OMP_TARGET_KIND_OACC_HOST_DATA = 11, + GF_OMP_TEAMS_GRID_PHONY = 1 << 0, /* True on an GIMPLE_OMP_RETURN statement if the return does not require a thread synchronization via some sort of barrier. The exact barrier @@ -733,7 +737,7 @@ struct GTY((tag("GSS_OMP_SINGLE_LAYOUT"))) { /* [ WORD 1-7 ] : base class */ - /* [ WORD 7 ] */ + /* [ WORD 8 ] */ tree clauses; }; @@ -1454,6 +1458,7 @@ gomp_task *gimple_build_omp_task (gimple_seq, tree, tree, tree, tree, tree, tree); gimple *gimple_build_omp_section (gimple_seq); gimple *gimple_build_omp_master (gimple_seq); +gimple *gimple_build_omp_grid_body (gimple_seq); gimple *gimple_build_omp_taskgroup (gimple_seq); gomp_continue *gimple_build_omp_continue (tree, tree); gomp_ordered *gimple_build_omp_ordered (gimple_seq, tree); @@ -1714,6 +1719,7 @@ gimple_has_substatements (gimple *g) case GIMPLE_OMP_CRITICAL: case GIMPLE_WITH_CLEANUP_EXPR: case GIMPLE_TRANSACTION: + case GIMPLE_OMP_GRID_BODY: return true; default: @@ -5079,6 +5085,24 @@ gimple_omp_for_set_pre_body (gimple *gs, gimple_seq pre_body) omp_for_stmt->pre_body = pre_body; } +/* Return the kernel_phony of OMP_FOR statement. */ + +static inline bool +gimple_omp_for_grid_phony (const gomp_for *omp_for) +{ + return (gimple_omp_subcode (omp_for) & GF_OMP_FOR_GRID_PHONY) != 0; +} + +/* Set kernel_phony flag of OMP_FOR to VALUE. */ + +static inline void +gimple_omp_for_set_grid_phony (gomp_for *omp_for, bool value) +{ + if (value) + omp_for->subcode |= GF_OMP_FOR_GRID_PHONY; + else + omp_for->subcode &= ~GF_OMP_FOR_GRID_PHONY; +} /* Return the clauses associated with OMP_PARALLEL GS. */ @@ -5165,6 +5189,24 @@ gimple_omp_parallel_set_data_arg (gomp_parallel *omp_parallel_stmt, omp_parallel_stmt->data_arg = data_arg; } +/* Return the kernel_phony flag of OMP_PARALLEL_STMT. */ + +static inline bool +gimple_omp_parallel_grid_phony (const gomp_parallel *stmt) +{ + return (gimple_omp_subcode (stmt) & GF_OMP_PARALLEL_GRID_PHONY) != 0; +} + +/* Set kernel_phony flag of OMP_PARALLEL_STMT to VALUE. */ + +static inline void +gimple_omp_parallel_set_grid_phony (gomp_parallel *stmt, bool value) +{ + if (value) + stmt->subcode |= GF_OMP_PARALLEL_GRID_PHONY; + else + stmt->subcode &= ~GF_OMP_PARALLEL_GRID_PHONY; +} /* Return the clauses associated with OMP_TASK GS. */ @@ -5638,6 +5680,24 @@ gimple_omp_teams_set_clauses (gomp_teams *omp_teams_stmt, tree clauses) omp_teams_stmt->clauses = clauses; } +/* Return the kernel_phony flag of an OMP_TEAMS_STMT. */ + +static inline bool +gimple_omp_teams_grid_phony (const gomp_teams *omp_teams_stmt) +{ + return (gimple_omp_subcode (omp_teams_stmt) & GF_OMP_TEAMS_GRID_PHONY) != 0; +} + +/* Set kernel_phony flag of an OMP_TEAMS_STMT to VALUE. */ + +static inline void +gimple_omp_teams_set_grid_phony (gomp_teams *omp_teams_stmt, bool value) +{ + if (value) + omp_teams_stmt->subcode |= GF_OMP_TEAMS_GRID_PHONY; + else + omp_teams_stmt->subcode &= ~GF_OMP_TEAMS_GRID_PHONY; +} /* Return the clauses associated with OMP_SECTIONS GS. */ @@ -6002,7 +6062,8 @@ gimple_return_set_retbnd (gimple *gs, tree retval) case GIMPLE_OMP_RETURN: \ case GIMPLE_OMP_ATOMIC_LOAD: \ case GIMPLE_OMP_ATOMIC_STORE: \ - case GIMPLE_OMP_CONTINUE + case GIMPLE_OMP_CONTINUE: \ + case GIMPLE_OMP_GRID_BODY static inline bool is_gimple_omp (const gimple *stmt) diff --git a/gcc/hsa-brig-format.h b/gcc/hsa-brig-format.h new file mode 100644 index 0000000..e1c6cd2 --- /dev/null +++ b/gcc/hsa-brig-format.h @@ -0,0 +1,1234 @@ +/* HSA BRIG (binary representation of HSAIL) 1.0.1 representation description. + Copyright (C) 2016 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. + +The contents of the file was created by extracting data structures, enum, +typedef and other definitions from HSA Programmer's Reference Manual Version +1.0.1 (http://www.hsafoundation.com/standards/). + +HTML version is provided on the following link: +http://www.hsafoundation.com/html/Content/PRM/Topics/PRM_title_page.htm */ + +#ifndef HSA_BRIG_FORMAT_H +#define HSA_BRIG_FORMAT_H + +struct BrigModuleHeader; +typedef uint16_t BrigKind16_t; +typedef uint32_t BrigVersion32_t; + +typedef BrigModuleHeader *BrigModule_t; +typedef uint32_t BrigDataOffset32_t; +typedef uint32_t BrigCodeOffset32_t; +typedef uint32_t BrigOperandOffset32_t; +typedef BrigDataOffset32_t BrigDataOffsetString32_t; +typedef BrigDataOffset32_t BrigDataOffsetCodeList32_t; +typedef BrigDataOffset32_t BrigDataOffsetOperandList32_t; +typedef uint8_t BrigAlignment8_t; + +enum BrigAlignment +{ + BRIG_ALIGNMENT_NONE = 0, + BRIG_ALIGNMENT_1 = 1, + BRIG_ALIGNMENT_2 = 2, + BRIG_ALIGNMENT_4 = 3, + BRIG_ALIGNMENT_8 = 4, + BRIG_ALIGNMENT_16 = 5, + BRIG_ALIGNMENT_32 = 6, + BRIG_ALIGNMENT_64 = 7, + BRIG_ALIGNMENT_128 = 8, + BRIG_ALIGNMENT_256 = 9 +}; + +typedef uint8_t BrigAllocation8_t; + +enum BrigAllocation +{ + BRIG_ALLOCATION_NONE = 0, + BRIG_ALLOCATION_PROGRAM = 1, + BRIG_ALLOCATION_AGENT = 2, + BRIG_ALLOCATION_AUTOMATIC = 3 +}; + +typedef uint8_t BrigAluModifier8_t; + +enum BrigAluModifierMask +{ + BRIG_ALU_FTZ = 1 +}; + +typedef uint8_t BrigAtomicOperation8_t; + +enum BrigAtomicOperation +{ + BRIG_ATOMIC_ADD = 0, + BRIG_ATOMIC_AND = 1, + BRIG_ATOMIC_CAS = 2, + BRIG_ATOMIC_EXCH = 3, + BRIG_ATOMIC_LD = 4, + BRIG_ATOMIC_MAX = 5, + BRIG_ATOMIC_MIN = 6, + BRIG_ATOMIC_OR = 7, + BRIG_ATOMIC_ST = 8, + BRIG_ATOMIC_SUB = 9, + BRIG_ATOMIC_WRAPDEC = 10, + BRIG_ATOMIC_WRAPINC = 11, + BRIG_ATOMIC_XOR = 12, + BRIG_ATOMIC_WAIT_EQ = 13, + BRIG_ATOMIC_WAIT_NE = 14, + BRIG_ATOMIC_WAIT_LT = 15, + BRIG_ATOMIC_WAIT_GTE = 16, + BRIG_ATOMIC_WAITTIMEOUT_EQ = 17, + BRIG_ATOMIC_WAITTIMEOUT_NE = 18, + BRIG_ATOMIC_WAITTIMEOUT_LT = 19, + BRIG_ATOMIC_WAITTIMEOUT_GTE = 20 +}; + +struct BrigBase +{ + uint16_t byteCount; + BrigKind16_t kind; +}; + +typedef uint8_t BrigCompareOperation8_t; + +enum BrigCompareOperation +{ + BRIG_COMPARE_EQ = 0, + BRIG_COMPARE_NE = 1, + BRIG_COMPARE_LT = 2, + BRIG_COMPARE_LE = 3, + BRIG_COMPARE_GT = 4, + BRIG_COMPARE_GE = 5, + BRIG_COMPARE_EQU = 6, + BRIG_COMPARE_NEU = 7, + BRIG_COMPARE_LTU = 8, + BRIG_COMPARE_LEU = 9, + BRIG_COMPARE_GTU = 10, + BRIG_COMPARE_GEU = 11, + BRIG_COMPARE_NUM = 12, + BRIG_COMPARE_NAN = 13, + BRIG_COMPARE_SEQ = 14, + BRIG_COMPARE_SNE = 15, + BRIG_COMPARE_SLT = 16, + BRIG_COMPARE_SLE = 17, + BRIG_COMPARE_SGT = 18, + BRIG_COMPARE_SGE = 19, + BRIG_COMPARE_SGEU = 20, + BRIG_COMPARE_SEQU = 21, + BRIG_COMPARE_SNEU = 22, + BRIG_COMPARE_SLTU = 23, + BRIG_COMPARE_SLEU = 24, + BRIG_COMPARE_SNUM = 25, + BRIG_COMPARE_SNAN = 26, + BRIG_COMPARE_SGTU = 27 +}; + +typedef uint16_t BrigControlDirective16_t; + +enum BrigControlDirective +{ + BRIG_CONTROL_NONE = 0, + BRIG_CONTROL_ENABLEBREAKEXCEPTIONS = 1, + BRIG_CONTROL_ENABLEDETECTEXCEPTIONS = 2, + BRIG_CONTROL_MAXDYNAMICGROUPSIZE = 3, + BRIG_CONTROL_MAXFLATGRIDSIZE = 4, + BRIG_CONTROL_MAXFLATWORKGROUPSIZE = 5, + BRIG_CONTROL_REQUIREDDIM = 6, + BRIG_CONTROL_REQUIREDGRIDSIZE = 7, + BRIG_CONTROL_REQUIREDWORKGROUPSIZE = 8, + BRIG_CONTROL_REQUIRENOPARTIALWORKGROUPS = 9 +}; + +typedef uint32_t BrigExceptions32_t; + +enum BrigExceptionsMask +{ + BRIG_EXCEPTIONS_INVALID_OPERATION = 1 << 0, + BRIG_EXCEPTIONS_DIVIDE_BY_ZERO = 1 << 1, + BRIG_EXCEPTIONS_OVERFLOW = 1 << 2, + BRIG_EXCEPTIONS_UNDERFLOW = 1 << 3, + BRIG_EXCEPTIONS_INEXACT = 1 << 4, + BRIG_EXCEPTIONS_FIRST_USER_DEFINED = 1 << 16 +}; + +typedef uint8_t BrigExecutableModifier8_t; + +enum BrigExecutableModifierMask +{ + BRIG_EXECUTABLE_DEFINITION = 1 +}; + +typedef uint8_t BrigImageChannelOrder8_t; + +enum BrigImageChannelOrder +{ + BRIG_CHANNEL_ORDER_A = 0, + BRIG_CHANNEL_ORDER_R = 1, + BRIG_CHANNEL_ORDER_RX = 2, + BRIG_CHANNEL_ORDER_RG = 3, + BRIG_CHANNEL_ORDER_RGX = 4, + BRIG_CHANNEL_ORDER_RA = 5, + BRIG_CHANNEL_ORDER_RGB = 6, + BRIG_CHANNEL_ORDER_RGBX = 7, + BRIG_CHANNEL_ORDER_RGBA = 8, + BRIG_CHANNEL_ORDER_BGRA = 9, + BRIG_CHANNEL_ORDER_ARGB = 10, + BRIG_CHANNEL_ORDER_ABGR = 11, + BRIG_CHANNEL_ORDER_SRGB = 12, + BRIG_CHANNEL_ORDER_SRGBX = 13, + BRIG_CHANNEL_ORDER_SRGBA = 14, + BRIG_CHANNEL_ORDER_SBGRA = 15, + BRIG_CHANNEL_ORDER_INTENSITY = 16, + BRIG_CHANNEL_ORDER_LUMINANCE = 17, + BRIG_CHANNEL_ORDER_DEPTH = 18, + BRIG_CHANNEL_ORDER_DEPTH_STENCIL = 19, + BRIG_CHANNEL_ORDER_FIRST_USER_DEFINED = 128 +}; + +typedef uint8_t BrigImageChannelType8_t; + +enum BrigImageChannelType +{ + BRIG_CHANNEL_TYPE_SNORM_INT8 = 0, + BRIG_CHANNEL_TYPE_SNORM_INT16 = 1, + BRIG_CHANNEL_TYPE_UNORM_INT8 = 2, + BRIG_CHANNEL_TYPE_UNORM_INT16 = 3, + BRIG_CHANNEL_TYPE_UNORM_INT24 = 4, + BRIG_CHANNEL_TYPE_UNORM_SHORT_555 = 5, + BRIG_CHANNEL_TYPE_UNORM_SHORT_565 = 6, + BRIG_CHANNEL_TYPE_UNORM_INT_101010 = 7, + BRIG_CHANNEL_TYPE_SIGNED_INT8 = 8, + BRIG_CHANNEL_TYPE_SIGNED_INT16 = 9, + BRIG_CHANNEL_TYPE_SIGNED_INT32 = 10, + BRIG_CHANNEL_TYPE_UNSIGNED_INT8 = 11, + BRIG_CHANNEL_TYPE_UNSIGNED_INT16 = 12, + BRIG_CHANNEL_TYPE_UNSIGNED_INT32 = 13, + BRIG_CHANNEL_TYPE_HALF_FLOAT = 14, + BRIG_CHANNEL_TYPE_FLOAT = 15, + BRIG_CHANNEL_TYPE_FIRST_USER_DEFINED = 128 +}; + +typedef uint8_t BrigImageGeometry8_t; + +enum BrigImageGeometry +{ + BRIG_GEOMETRY_1D = 0, + BRIG_GEOMETRY_2D = 1, + BRIG_GEOMETRY_3D = 2, + BRIG_GEOMETRY_1DA = 3, + BRIG_GEOMETRY_2DA = 4, + BRIG_GEOMETRY_1DB = 5, + BRIG_GEOMETRY_2DDEPTH = 6, + BRIG_GEOMETRY_2DADEPTH = 7, + BRIG_GEOMETRY_FIRST_USER_DEFINED = 128 +}; + +typedef uint8_t BrigImageQuery8_t; + +enum BrigImageQuery +{ + BRIG_IMAGE_QUERY_WIDTH = 0, + BRIG_IMAGE_QUERY_HEIGHT = 1, + BRIG_IMAGE_QUERY_DEPTH = 2, + BRIG_IMAGE_QUERY_ARRAY = 3, + BRIG_IMAGE_QUERY_CHANNELORDER = 4, + BRIG_IMAGE_QUERY_CHANNELTYPE = 5 +}; + +enum BrigKind +{ + BRIG_KIND_NONE = 0x0000, + BRIG_KIND_DIRECTIVE_BEGIN = 0x1000, + BRIG_KIND_DIRECTIVE_ARG_BLOCK_END = 0x1000, + BRIG_KIND_DIRECTIVE_ARG_BLOCK_START = 0x1001, + BRIG_KIND_DIRECTIVE_COMMENT = 0x1002, + BRIG_KIND_DIRECTIVE_CONTROL = 0x1003, + BRIG_KIND_DIRECTIVE_EXTENSION = 0x1004, + BRIG_KIND_DIRECTIVE_FBARRIER = 0x1005, + BRIG_KIND_DIRECTIVE_FUNCTION = 0x1006, + BRIG_KIND_DIRECTIVE_INDIRECT_FUNCTION = 0x1007, + BRIG_KIND_DIRECTIVE_KERNEL = 0x1008, + BRIG_KIND_DIRECTIVE_LABEL = 0x1009, + BRIG_KIND_DIRECTIVE_LOC = 0x100a, + BRIG_KIND_DIRECTIVE_MODULE = 0x100b, + BRIG_KIND_DIRECTIVE_PRAGMA = 0x100c, + BRIG_KIND_DIRECTIVE_SIGNATURE = 0x100d, + BRIG_KIND_DIRECTIVE_VARIABLE = 0x100e, + BRIG_KIND_DIRECTIVE_END = 0x100f, + BRIG_KIND_INST_BEGIN = 0x2000, + BRIG_KIND_INST_ADDR = 0x2000, + BRIG_KIND_INST_ATOMIC = 0x2001, + BRIG_KIND_INST_BASIC = 0x2002, + BRIG_KIND_INST_BR = 0x2003, + BRIG_KIND_INST_CMP = 0x2004, + BRIG_KIND_INST_CVT = 0x2005, + BRIG_KIND_INST_IMAGE = 0x2006, + BRIG_KIND_INST_LANE = 0x2007, + BRIG_KIND_INST_MEM = 0x2008, + BRIG_KIND_INST_MEM_FENCE = 0x2009, + BRIG_KIND_INST_MOD = 0x200a, + BRIG_KIND_INST_QUERY_IMAGE = 0x200b, + BRIG_KIND_INST_QUERY_SAMPLER = 0x200c, + BRIG_KIND_INST_QUEUE = 0x200d, + BRIG_KIND_INST_SEG = 0x200e, + BRIG_KIND_INST_SEG_CVT = 0x200f, + BRIG_KIND_INST_SIGNAL = 0x2010, + BRIG_KIND_INST_SOURCE_TYPE = 0x2011, + BRIG_KIND_INST_END = 0x2012, + BRIG_KIND_OPERAND_BEGIN = 0x3000, + BRIG_KIND_OPERAND_ADDRESS = 0x3000, + BRIG_KIND_OPERAND_ALIGN = 0x3001, + BRIG_KIND_OPERAND_CODE_LIST = 0x3002, + BRIG_KIND_OPERAND_CODE_REF = 0x3003, + BRIG_KIND_OPERAND_CONSTANT_BYTES = 0x3004, + BRIG_KIND_OPERAND_RESERVED = 0x3005, + BRIG_KIND_OPERAND_CONSTANT_IMAGE = 0x3006, + BRIG_KIND_OPERAND_CONSTANT_OPERAND_LIST = 0x3007, + BRIG_KIND_OPERAND_CONSTANT_SAMPLER = 0x3008, + BRIG_KIND_OPERAND_OPERAND_LIST = 0x3009, + BRIG_KIND_OPERAND_REGISTER = 0x300a, + BRIG_KIND_OPERAND_STRING = 0x300b, + BRIG_KIND_OPERAND_WAVESIZE = 0x300c, + BRIG_KIND_OPERAND_END = 0x300d +}; + +typedef uint8_t BrigLinkage8_t; + +enum BrigLinkage +{ + BRIG_LINKAGE_NONE = 0, + BRIG_LINKAGE_PROGRAM = 1, + BRIG_LINKAGE_MODULE = 2, + BRIG_LINKAGE_FUNCTION = 3, + BRIG_LINKAGE_ARG = 4 +}; + +typedef uint8_t BrigMachineModel8_t; + +enum BrigMachineModel +{ + BRIG_MACHINE_SMALL = 0, + BRIG_MACHINE_LARGE = 1 +}; + +typedef uint8_t BrigMemoryModifier8_t; + +enum BrigMemoryModifierMask +{ + BRIG_MEMORY_CONST = 1 +}; + +typedef uint8_t BrigMemoryOrder8_t; + +enum BrigMemoryOrder +{ + BRIG_MEMORY_ORDER_NONE = 0, + BRIG_MEMORY_ORDER_RELAXED = 1, + BRIG_MEMORY_ORDER_SC_ACQUIRE = 2, + BRIG_MEMORY_ORDER_SC_RELEASE = 3, + BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE = 4 +}; + +typedef uint8_t BrigMemoryScope8_t; + +enum BrigMemoryScope +{ + BRIG_MEMORY_SCOPE_NONE = 0, + BRIG_MEMORY_SCOPE_WORKITEM = 1, + BRIG_MEMORY_SCOPE_WAVEFRONT = 2, + BRIG_MEMORY_SCOPE_WORKGROUP = 3, + BRIG_MEMORY_SCOPE_AGENT = 4, + BRIG_MEMORY_SCOPE_SYSTEM = 5 +}; + +struct BrigModuleHeader +{ + char identification[8]; + BrigVersion32_t brigMajor; + BrigVersion32_t brigMinor; + uint64_t byteCount; + uint8_t hash[64]; + uint32_t reserved; + uint32_t sectionCount; + uint64_t sectionIndex; +}; + +typedef uint16_t BrigOpcode16_t; + +enum BrigOpcode +{ + BRIG_OPCODE_NOP = 0, + BRIG_OPCODE_ABS = 1, + BRIG_OPCODE_ADD = 2, + BRIG_OPCODE_BORROW = 3, + BRIG_OPCODE_CARRY = 4, + BRIG_OPCODE_CEIL = 5, + BRIG_OPCODE_COPYSIGN = 6, + BRIG_OPCODE_DIV = 7, + BRIG_OPCODE_FLOOR = 8, + BRIG_OPCODE_FMA = 9, + BRIG_OPCODE_FRACT = 10, + BRIG_OPCODE_MAD = 11, + BRIG_OPCODE_MAX = 12, + BRIG_OPCODE_MIN = 13, + BRIG_OPCODE_MUL = 14, + BRIG_OPCODE_MULHI = 15, + BRIG_OPCODE_NEG = 16, + BRIG_OPCODE_REM = 17, + BRIG_OPCODE_RINT = 18, + BRIG_OPCODE_SQRT = 19, + BRIG_OPCODE_SUB = 20, + BRIG_OPCODE_TRUNC = 21, + BRIG_OPCODE_MAD24 = 22, + BRIG_OPCODE_MAD24HI = 23, + BRIG_OPCODE_MUL24 = 24, + BRIG_OPCODE_MUL24HI = 25, + BRIG_OPCODE_SHL = 26, + BRIG_OPCODE_SHR = 27, + BRIG_OPCODE_AND = 28, + BRIG_OPCODE_NOT = 29, + BRIG_OPCODE_OR = 30, + BRIG_OPCODE_POPCOUNT = 31, + BRIG_OPCODE_XOR = 32, + BRIG_OPCODE_BITEXTRACT = 33, + BRIG_OPCODE_BITINSERT = 34, + BRIG_OPCODE_BITMASK = 35, + BRIG_OPCODE_BITREV = 36, + BRIG_OPCODE_BITSELECT = 37, + BRIG_OPCODE_FIRSTBIT = 38, + BRIG_OPCODE_LASTBIT = 39, + BRIG_OPCODE_COMBINE = 40, + BRIG_OPCODE_EXPAND = 41, + BRIG_OPCODE_LDA = 42, + BRIG_OPCODE_MOV = 43, + BRIG_OPCODE_SHUFFLE = 44, + BRIG_OPCODE_UNPACKHI = 45, + BRIG_OPCODE_UNPACKLO = 46, + BRIG_OPCODE_PACK = 47, + BRIG_OPCODE_UNPACK = 48, + BRIG_OPCODE_CMOV = 49, + BRIG_OPCODE_CLASS = 50, + BRIG_OPCODE_NCOS = 51, + BRIG_OPCODE_NEXP2 = 52, + BRIG_OPCODE_NFMA = 53, + BRIG_OPCODE_NLOG2 = 54, + BRIG_OPCODE_NRCP = 55, + BRIG_OPCODE_NRSQRT = 56, + BRIG_OPCODE_NSIN = 57, + BRIG_OPCODE_NSQRT = 58, + BRIG_OPCODE_BITALIGN = 59, + BRIG_OPCODE_BYTEALIGN = 60, + BRIG_OPCODE_PACKCVT = 61, + BRIG_OPCODE_UNPACKCVT = 62, + BRIG_OPCODE_LERP = 63, + BRIG_OPCODE_SAD = 64, + BRIG_OPCODE_SADHI = 65, + BRIG_OPCODE_SEGMENTP = 66, + BRIG_OPCODE_FTOS = 67, + BRIG_OPCODE_STOF = 68, + BRIG_OPCODE_CMP = 69, + BRIG_OPCODE_CVT = 70, + BRIG_OPCODE_LD = 71, + BRIG_OPCODE_ST = 72, + BRIG_OPCODE_ATOMIC = 73, + BRIG_OPCODE_ATOMICNORET = 74, + BRIG_OPCODE_SIGNAL = 75, + BRIG_OPCODE_SIGNALNORET = 76, + BRIG_OPCODE_MEMFENCE = 77, + BRIG_OPCODE_RDIMAGE = 78, + BRIG_OPCODE_LDIMAGE = 79, + BRIG_OPCODE_STIMAGE = 80, + BRIG_OPCODE_IMAGEFENCE = 81, + BRIG_OPCODE_QUERYIMAGE = 82, + BRIG_OPCODE_QUERYSAMPLER = 83, + BRIG_OPCODE_CBR = 84, + BRIG_OPCODE_BR = 85, + BRIG_OPCODE_SBR = 86, + BRIG_OPCODE_BARRIER = 87, + BRIG_OPCODE_WAVEBARRIER = 88, + BRIG_OPCODE_ARRIVEFBAR = 89, + BRIG_OPCODE_INITFBAR = 90, + BRIG_OPCODE_JOINFBAR = 91, + BRIG_OPCODE_LEAVEFBAR = 92, + BRIG_OPCODE_RELEASEFBAR = 93, + BRIG_OPCODE_WAITFBAR = 94, + BRIG_OPCODE_LDF = 95, + BRIG_OPCODE_ACTIVELANECOUNT = 96, + BRIG_OPCODE_ACTIVELANEID = 97, + BRIG_OPCODE_ACTIVELANEMASK = 98, + BRIG_OPCODE_ACTIVELANEPERMUTE = 99, + BRIG_OPCODE_CALL = 100, + BRIG_OPCODE_SCALL = 101, + BRIG_OPCODE_ICALL = 102, + BRIG_OPCODE_RET = 103, + BRIG_OPCODE_ALLOCA = 104, + BRIG_OPCODE_CURRENTWORKGROUPSIZE = 105, + BRIG_OPCODE_CURRENTWORKITEMFLATID = 106, + BRIG_OPCODE_DIM = 107, + BRIG_OPCODE_GRIDGROUPS = 108, + BRIG_OPCODE_GRIDSIZE = 109, + BRIG_OPCODE_PACKETCOMPLETIONSIG = 110, + BRIG_OPCODE_PACKETID = 111, + BRIG_OPCODE_WORKGROUPID = 112, + BRIG_OPCODE_WORKGROUPSIZE = 113, + BRIG_OPCODE_WORKITEMABSID = 114, + BRIG_OPCODE_WORKITEMFLATABSID = 115, + BRIG_OPCODE_WORKITEMFLATID = 116, + BRIG_OPCODE_WORKITEMID = 117, + BRIG_OPCODE_CLEARDETECTEXCEPT = 118, + BRIG_OPCODE_GETDETECTEXCEPT = 119, + BRIG_OPCODE_SETDETECTEXCEPT = 120, + BRIG_OPCODE_ADDQUEUEWRITEINDEX = 121, + BRIG_OPCODE_CASQUEUEWRITEINDEX = 122, + BRIG_OPCODE_LDQUEUEREADINDEX = 123, + BRIG_OPCODE_LDQUEUEWRITEINDEX = 124, + BRIG_OPCODE_STQUEUEREADINDEX = 125, + BRIG_OPCODE_STQUEUEWRITEINDEX = 126, + BRIG_OPCODE_CLOCK = 127, + BRIG_OPCODE_CUID = 128, + BRIG_OPCODE_DEBUGTRAP = 129, + BRIG_OPCODE_GROUPBASEPTR = 130, + BRIG_OPCODE_KERNARGBASEPTR = 131, + BRIG_OPCODE_LANEID = 132, + BRIG_OPCODE_MAXCUID = 133, + BRIG_OPCODE_MAXWAVEID = 134, + BRIG_OPCODE_NULLPTR = 135, + BRIG_OPCODE_WAVEID = 136, + BRIG_OPCODE_FIRST_USER_DEFINED = 32768 +}; + +typedef uint8_t BrigPack8_t; + +enum BrigPack +{ + BRIG_PACK_NONE = 0, + BRIG_PACK_PP = 1, + BRIG_PACK_PS = 2, + BRIG_PACK_SP = 3, + BRIG_PACK_SS = 4, + BRIG_PACK_S = 5, + BRIG_PACK_P = 6, + BRIG_PACK_PPSAT = 7, + BRIG_PACK_PSSAT = 8, + BRIG_PACK_SPSAT = 9, + BRIG_PACK_SSSAT = 10, + BRIG_PACK_SSAT = 11, + BRIG_PACK_PSAT = 12 +}; + +typedef uint8_t BrigProfile8_t; + +enum BrigProfile +{ + BRIG_PROFILE_BASE = 0, + BRIG_PROFILE_FULL = 1 +}; + +typedef uint16_t BrigRegisterKind16_t; + +enum BrigRegisterKind +{ + BRIG_REGISTER_KIND_CONTROL = 0, + BRIG_REGISTER_KIND_SINGLE = 1, + BRIG_REGISTER_KIND_DOUBLE = 2, + BRIG_REGISTER_KIND_QUAD = 3 +}; + +typedef uint8_t BrigRound8_t; + +enum BrigRound +{ + BRIG_ROUND_NONE = 0, + BRIG_ROUND_FLOAT_DEFAULT = 1, + BRIG_ROUND_FLOAT_NEAR_EVEN = 2, + BRIG_ROUND_FLOAT_ZERO = 3, + BRIG_ROUND_FLOAT_PLUS_INFINITY = 4, + BRIG_ROUND_FLOAT_MINUS_INFINITY = 5, + BRIG_ROUND_INTEGER_NEAR_EVEN = 6, + BRIG_ROUND_INTEGER_ZERO = 7, + BRIG_ROUND_INTEGER_PLUS_INFINITY = 8, + BRIG_ROUND_INTEGER_MINUS_INFINITY = 9, + BRIG_ROUND_INTEGER_NEAR_EVEN_SAT = 10, + BRIG_ROUND_INTEGER_ZERO_SAT = 11, + BRIG_ROUND_INTEGER_PLUS_INFINITY_SAT = 12, + BRIG_ROUND_INTEGER_MINUS_INFINITY_SAT = 13, + BRIG_ROUND_INTEGER_SIGNALING_NEAR_EVEN = 14, + BRIG_ROUND_INTEGER_SIGNALING_ZERO = 15, + BRIG_ROUND_INTEGER_SIGNALING_PLUS_INFINITY = 16, + BRIG_ROUND_INTEGER_SIGNALING_MINUS_INFINITY = 17, + BRIG_ROUND_INTEGER_SIGNALING_NEAR_EVEN_SAT = 18, + BRIG_ROUND_INTEGER_SIGNALING_ZERO_SAT = 19, + BRIG_ROUND_INTEGER_SIGNALING_PLUS_INFINITY_SAT = 20, + BRIG_ROUND_INTEGER_SIGNALING_MINUS_INFINITY_SAT = 21 +}; + +typedef uint8_t BrigSamplerAddressing8_t; + +enum BrigSamplerAddressing +{ + BRIG_ADDRESSING_UNDEFINED = 0, + BRIG_ADDRESSING_CLAMP_TO_EDGE = 1, + BRIG_ADDRESSING_CLAMP_TO_BORDER = 2, + BRIG_ADDRESSING_REPEAT = 3, + BRIG_ADDRESSING_MIRRORED_REPEAT = 4, + BRIG_ADDRESSING_FIRST_USER_DEFINED = 128 +}; + +typedef uint8_t BrigSamplerCoordNormalization8_t; + +enum BrigSamplerCoordNormalization +{ + BRIG_COORD_UNNORMALIZED = 0, + BRIG_COORD_NORMALIZED = 1 +}; + +typedef uint8_t BrigSamplerFilter8_t; + +enum BrigSamplerFilter +{ + BRIG_FILTER_NEAREST = 0, + BRIG_FILTER_LINEAR = 1, + BRIG_FILTER_FIRST_USER_DEFINED = 128 +}; + +typedef uint8_t BrigSamplerQuery8_t; + +enum BrigSamplerQuery +{ + BRIG_SAMPLER_QUERY_ADDRESSING = 0, + BRIG_SAMPLER_QUERY_COORD = 1, + BRIG_SAMPLER_QUERY_FILTER = 2 +}; + +typedef uint32_t BrigSectionIndex32_t; + +enum BrigSectionIndex +{ + BRIG_SECTION_INDEX_DATA = 0, + BRIG_SECTION_INDEX_CODE = 1, + BRIG_SECTION_INDEX_OPERAND = 2, + BRIG_SECTION_INDEX_BEGIN_IMPLEMENTATION_DEFINED = 3 +}; + +struct BrigSectionHeader +{ + uint64_t byteCount; + uint32_t headerByteCount; + uint32_t nameLength; + uint8_t name[1]; +}; + +typedef uint8_t BrigSegCvtModifier8_t; + +enum BrigSegCvtModifierMask +{ + BRIG_SEG_CVT_NONULL = 1 +}; + +typedef uint8_t BrigSegment8_t; + +enum BrigSegment +{ + BRIG_SEGMENT_NONE = 0, + BRIG_SEGMENT_FLAT = 1, + BRIG_SEGMENT_GLOBAL = 2, + BRIG_SEGMENT_READONLY = 3, + BRIG_SEGMENT_KERNARG = 4, + BRIG_SEGMENT_GROUP = 5, + BRIG_SEGMENT_PRIVATE = 6, + BRIG_SEGMENT_SPILL = 7, + BRIG_SEGMENT_ARG = 8, + BRIG_SEGMENT_FIRST_USER_DEFINED = 128 +}; + +enum +{ + BRIG_TYPE_BASE_SIZE = 5, + BRIG_TYPE_PACK_SIZE = 2, + BRIG_TYPE_ARRAY_SIZE = 1, + + BRIG_TYPE_BASE_SHIFT = 0, + BRIG_TYPE_PACK_SHIFT = BRIG_TYPE_BASE_SHIFT + BRIG_TYPE_BASE_SIZE, + BRIG_TYPE_ARRAY_SHIFT = BRIG_TYPE_PACK_SHIFT + BRIG_TYPE_PACK_SIZE, + + BRIG_TYPE_BASE_MASK = ((1 << BRIG_TYPE_BASE_SIZE) - 1) + << BRIG_TYPE_BASE_SHIFT, + BRIG_TYPE_PACK_MASK = ((1 << BRIG_TYPE_PACK_SIZE) - 1) + << BRIG_TYPE_PACK_SHIFT, + BRIG_TYPE_ARRAY_MASK = ((1 << BRIG_TYPE_ARRAY_SIZE) - 1) + << BRIG_TYPE_ARRAY_SHIFT, + + BRIG_TYPE_PACK_NONE = 0 << BRIG_TYPE_PACK_SHIFT, + BRIG_TYPE_PACK_32 = 1 << BRIG_TYPE_PACK_SHIFT, + BRIG_TYPE_PACK_64 = 2 << BRIG_TYPE_PACK_SHIFT, + BRIG_TYPE_PACK_128 = 3 << BRIG_TYPE_PACK_SHIFT, + + BRIG_TYPE_ARRAY = 1 << BRIG_TYPE_ARRAY_SHIFT +}; + +typedef uint16_t BrigType16_t; + +enum BrigType +{ + BRIG_TYPE_NONE = 0, + + BRIG_TYPE_U8 = 1, + BRIG_TYPE_U16 = 2, + BRIG_TYPE_U32 = 3, + BRIG_TYPE_U64 = 4, + + BRIG_TYPE_S8 = 5, + BRIG_TYPE_S16 = 6, + BRIG_TYPE_S32 = 7, + BRIG_TYPE_S64 = 8, + + BRIG_TYPE_F16 = 9, + BRIG_TYPE_F32 = 10, + BRIG_TYPE_F64 = 11, + + BRIG_TYPE_B1 = 12, + BRIG_TYPE_B8 = 13, + BRIG_TYPE_B16 = 14, + BRIG_TYPE_B32 = 15, + BRIG_TYPE_B64 = 16, + BRIG_TYPE_B128 = 17, + + BRIG_TYPE_SAMP = 18, + BRIG_TYPE_ROIMG = 19, + BRIG_TYPE_WOIMG = 20, + BRIG_TYPE_RWIMG = 21, + + BRIG_TYPE_SIG32 = 22, + BRIG_TYPE_SIG64 = 23, + + BRIG_TYPE_U8X4 = BRIG_TYPE_U8 | BRIG_TYPE_PACK_32, + BRIG_TYPE_U8X8 = BRIG_TYPE_U8 | BRIG_TYPE_PACK_64, + BRIG_TYPE_U8X16 = BRIG_TYPE_U8 | BRIG_TYPE_PACK_128, + + BRIG_TYPE_U16X2 = BRIG_TYPE_U16 | BRIG_TYPE_PACK_32, + BRIG_TYPE_U16X4 = BRIG_TYPE_U16 | BRIG_TYPE_PACK_64, + BRIG_TYPE_U16X8 = BRIG_TYPE_U16 | BRIG_TYPE_PACK_128, + + BRIG_TYPE_U32X2 = BRIG_TYPE_U32 | BRIG_TYPE_PACK_64, + BRIG_TYPE_U32X4 = BRIG_TYPE_U32 | BRIG_TYPE_PACK_128, + + BRIG_TYPE_U64X2 = BRIG_TYPE_U64 | BRIG_TYPE_PACK_128, + + BRIG_TYPE_S8X4 = BRIG_TYPE_S8 | BRIG_TYPE_PACK_32, + BRIG_TYPE_S8X8 = BRIG_TYPE_S8 | BRIG_TYPE_PACK_64, + BRIG_TYPE_S8X16 = BRIG_TYPE_S8 | BRIG_TYPE_PACK_128, + + BRIG_TYPE_S16X2 = BRIG_TYPE_S16 | BRIG_TYPE_PACK_32, + BRIG_TYPE_S16X4 = BRIG_TYPE_S16 | BRIG_TYPE_PACK_64, + BRIG_TYPE_S16X8 = BRIG_TYPE_S16 | BRIG_TYPE_PACK_128, + + BRIG_TYPE_S32X2 = BRIG_TYPE_S32 | BRIG_TYPE_PACK_64, + BRIG_TYPE_S32X4 = BRIG_TYPE_S32 | BRIG_TYPE_PACK_128, + + BRIG_TYPE_S64X2 = BRIG_TYPE_S64 | BRIG_TYPE_PACK_128, + + BRIG_TYPE_F16X2 = BRIG_TYPE_F16 | BRIG_TYPE_PACK_32, + BRIG_TYPE_F16X4 = BRIG_TYPE_F16 | BRIG_TYPE_PACK_64, + BRIG_TYPE_F16X8 = BRIG_TYPE_F16 | BRIG_TYPE_PACK_128, + + BRIG_TYPE_F32X2 = BRIG_TYPE_F32 | BRIG_TYPE_PACK_64, + BRIG_TYPE_F32X4 = BRIG_TYPE_F32 | BRIG_TYPE_PACK_128, + + BRIG_TYPE_F64X2 = BRIG_TYPE_F64 | BRIG_TYPE_PACK_128, + + BRIG_TYPE_U8_ARRAY = BRIG_TYPE_U8 | BRIG_TYPE_ARRAY, + BRIG_TYPE_U16_ARRAY = BRIG_TYPE_U16 | BRIG_TYPE_ARRAY, + BRIG_TYPE_U32_ARRAY = BRIG_TYPE_U32 | BRIG_TYPE_ARRAY, + BRIG_TYPE_U64_ARRAY = BRIG_TYPE_U64 | BRIG_TYPE_ARRAY, + + BRIG_TYPE_S8_ARRAY = BRIG_TYPE_S8 | BRIG_TYPE_ARRAY, + BRIG_TYPE_S16_ARRAY = BRIG_TYPE_S16 | BRIG_TYPE_ARRAY, + BRIG_TYPE_S32_ARRAY = BRIG_TYPE_S32 | BRIG_TYPE_ARRAY, + BRIG_TYPE_S64_ARRAY = BRIG_TYPE_S64 | BRIG_TYPE_ARRAY, + + BRIG_TYPE_F16_ARRAY = BRIG_TYPE_F16 | BRIG_TYPE_ARRAY, + BRIG_TYPE_F32_ARRAY = BRIG_TYPE_F32 | BRIG_TYPE_ARRAY, + BRIG_TYPE_F64_ARRAY = BRIG_TYPE_F64 | BRIG_TYPE_ARRAY, + + BRIG_TYPE_B8_ARRAY = BRIG_TYPE_B8 | BRIG_TYPE_ARRAY, + BRIG_TYPE_B16_ARRAY = BRIG_TYPE_B16 | BRIG_TYPE_ARRAY, + BRIG_TYPE_B32_ARRAY = BRIG_TYPE_B32 | BRIG_TYPE_ARRAY, + BRIG_TYPE_B64_ARRAY = BRIG_TYPE_B64 | BRIG_TYPE_ARRAY, + BRIG_TYPE_B128_ARRAY = BRIG_TYPE_B128 | BRIG_TYPE_ARRAY, + + BRIG_TYPE_SAMP_ARRAY = BRIG_TYPE_SAMP | BRIG_TYPE_ARRAY, + BRIG_TYPE_ROIMG_ARRAY = BRIG_TYPE_ROIMG | BRIG_TYPE_ARRAY, + BRIG_TYPE_WOIMG_ARRAY = BRIG_TYPE_WOIMG | BRIG_TYPE_ARRAY, + BRIG_TYPE_RWIMG_ARRAY = BRIG_TYPE_RWIMG | BRIG_TYPE_ARRAY, + + BRIG_TYPE_SIG32_ARRAY = BRIG_TYPE_SIG32 | BRIG_TYPE_ARRAY, + BRIG_TYPE_SIG64_ARRAY = BRIG_TYPE_SIG64 | BRIG_TYPE_ARRAY, + + BRIG_TYPE_U8X4_ARRAY = BRIG_TYPE_U8X4 | BRIG_TYPE_ARRAY, + BRIG_TYPE_U8X8_ARRAY = BRIG_TYPE_U8X8 | BRIG_TYPE_ARRAY, + BRIG_TYPE_U8X16_ARRAY = BRIG_TYPE_U8X16 | BRIG_TYPE_ARRAY, + + BRIG_TYPE_U16X2_ARRAY = BRIG_TYPE_U16X2 | BRIG_TYPE_ARRAY, + BRIG_TYPE_U16X4_ARRAY = BRIG_TYPE_U16X4 | BRIG_TYPE_ARRAY, + BRIG_TYPE_U16X8_ARRAY = BRIG_TYPE_U16X8 | BRIG_TYPE_ARRAY, + + BRIG_TYPE_U32X2_ARRAY = BRIG_TYPE_U32X2 | BRIG_TYPE_ARRAY, + BRIG_TYPE_U32X4_ARRAY = BRIG_TYPE_U32X4 | BRIG_TYPE_ARRAY, + + BRIG_TYPE_U64X2_ARRAY = BRIG_TYPE_U64X2 | BRIG_TYPE_ARRAY, + + BRIG_TYPE_S8X4_ARRAY = BRIG_TYPE_S8X4 | BRIG_TYPE_ARRAY, + BRIG_TYPE_S8X8_ARRAY = BRIG_TYPE_S8X8 | BRIG_TYPE_ARRAY, + BRIG_TYPE_S8X16_ARRAY = BRIG_TYPE_S8X16 | BRIG_TYPE_ARRAY, + + BRIG_TYPE_S16X2_ARRAY = BRIG_TYPE_S16X2 | BRIG_TYPE_ARRAY, + BRIG_TYPE_S16X4_ARRAY = BRIG_TYPE_S16X4 | BRIG_TYPE_ARRAY, + BRIG_TYPE_S16X8_ARRAY = BRIG_TYPE_S16X8 | BRIG_TYPE_ARRAY, + + BRIG_TYPE_S32X2_ARRAY = BRIG_TYPE_S32X2 | BRIG_TYPE_ARRAY, + BRIG_TYPE_S32X4_ARRAY = BRIG_TYPE_S32X4 | BRIG_TYPE_ARRAY, + + BRIG_TYPE_S64X2_ARRAY = BRIG_TYPE_S64X2 | BRIG_TYPE_ARRAY, + + BRIG_TYPE_F16X2_ARRAY = BRIG_TYPE_F16X2 | BRIG_TYPE_ARRAY, + BRIG_TYPE_F16X4_ARRAY = BRIG_TYPE_F16X4 | BRIG_TYPE_ARRAY, + BRIG_TYPE_F16X8_ARRAY = BRIG_TYPE_F16X8 | BRIG_TYPE_ARRAY, + + BRIG_TYPE_F32X2_ARRAY = BRIG_TYPE_F32X2 | BRIG_TYPE_ARRAY, + BRIG_TYPE_F32X4_ARRAY = BRIG_TYPE_F32X4 | BRIG_TYPE_ARRAY, + + BRIG_TYPE_F64X2_ARRAY = BRIG_TYPE_F64X2 | BRIG_TYPE_ARRAY +}; + +struct BrigUInt64 +{ + uint32_t lo; + uint32_t hi; +}; + +typedef uint8_t BrigVariableModifier8_t; + +enum BrigVariableModifierMask +{ + BRIG_VARIABLE_DEFINITION = 1, + BRIG_VARIABLE_CONST = 2 +}; + +enum BrigVersion +{ + BRIG_VERSION_HSAIL_MAJOR = 1, + BRIG_VERSION_HSAIL_MINOR = 0, + BRIG_VERSION_BRIG_MAJOR = 1, + BRIG_VERSION_BRIG_MINOR = 0 +}; + +typedef uint8_t BrigWidth8_t; + +enum BrigWidth +{ + BRIG_WIDTH_NONE = 0, + BRIG_WIDTH_1 = 1, + BRIG_WIDTH_2 = 2, + BRIG_WIDTH_4 = 3, + BRIG_WIDTH_8 = 4, + BRIG_WIDTH_16 = 5, + BRIG_WIDTH_32 = 6, + BRIG_WIDTH_64 = 7, + BRIG_WIDTH_128 = 8, + BRIG_WIDTH_256 = 9, + BRIG_WIDTH_512 = 10, + BRIG_WIDTH_1024 = 11, + BRIG_WIDTH_2048 = 12, + BRIG_WIDTH_4096 = 13, + BRIG_WIDTH_8192 = 14, + BRIG_WIDTH_16384 = 15, + BRIG_WIDTH_32768 = 16, + BRIG_WIDTH_65536 = 17, + BRIG_WIDTH_131072 = 18, + BRIG_WIDTH_262144 = 19, + BRIG_WIDTH_524288 = 20, + BRIG_WIDTH_1048576 = 21, + BRIG_WIDTH_2097152 = 22, + BRIG_WIDTH_4194304 = 23, + BRIG_WIDTH_8388608 = 24, + BRIG_WIDTH_16777216 = 25, + BRIG_WIDTH_33554432 = 26, + BRIG_WIDTH_67108864 = 27, + BRIG_WIDTH_134217728 = 28, + BRIG_WIDTH_268435456 = 29, + BRIG_WIDTH_536870912 = 30, + BRIG_WIDTH_1073741824 = 31, + BRIG_WIDTH_2147483648 = 32, + BRIG_WIDTH_WAVESIZE = 33, + BRIG_WIDTH_ALL = 34 +}; + +struct BrigData +{ + uint32_t byteCount; + uint8_t bytes[1]; +}; + +struct BrigDirectiveArgBlock +{ + BrigBase base; +}; + +struct BrigDirectiveComment +{ + BrigBase base; + BrigDataOffsetString32_t name; +}; + +struct BrigDirectiveControl +{ + BrigBase base; + BrigControlDirective16_t control; + uint16_t reserved; + BrigDataOffsetOperandList32_t operands; +}; + +struct BrigDirectiveExecutable +{ + BrigBase base; + BrigDataOffsetString32_t name; + uint16_t outArgCount; + uint16_t inArgCount; + BrigCodeOffset32_t firstInArg; + BrigCodeOffset32_t firstCodeBlockEntry; + BrigCodeOffset32_t nextModuleEntry; + BrigExecutableModifier8_t modifier; + BrigLinkage8_t linkage; + uint16_t reserved; +}; + +struct BrigDirectiveExtension +{ + BrigBase base; + BrigDataOffsetString32_t name; +}; + +struct BrigDirectiveFbarrier +{ + BrigBase base; + BrigDataOffsetString32_t name; + BrigVariableModifier8_t modifier; + BrigLinkage8_t linkage; + uint16_t reserved; +}; + +struct BrigDirectiveLabel +{ + BrigBase base; + BrigDataOffsetString32_t name; +}; + +struct BrigDirectiveLoc +{ + BrigBase base; + BrigDataOffsetString32_t filename; + uint32_t line; + uint32_t column; +}; + +struct BrigDirectiveModule +{ + BrigBase base; + BrigDataOffsetString32_t name; + BrigVersion32_t hsailMajor; + BrigVersion32_t hsailMinor; + BrigProfile8_t profile; + BrigMachineModel8_t machineModel; + BrigRound8_t defaultFloatRound; + uint8_t reserved; +}; + +struct BrigDirectiveNone +{ + BrigBase base; +}; + +struct BrigDirectivePragma +{ + BrigBase base; + BrigDataOffsetOperandList32_t operands; +}; + +struct BrigDirectiveVariable +{ + BrigBase base; + BrigDataOffsetString32_t name; + BrigOperandOffset32_t init; + BrigType16_t type; + BrigSegment8_t segment; + BrigAlignment8_t align; + BrigUInt64 dim; + BrigVariableModifier8_t modifier; + BrigLinkage8_t linkage; + BrigAllocation8_t allocation; + uint8_t reserved; +}; + +struct BrigInstBase +{ + BrigBase base; + BrigOpcode16_t opcode; + BrigType16_t type; + BrigDataOffsetOperandList32_t operands; +}; + +struct BrigInstAddr +{ + BrigInstBase base; + BrigSegment8_t segment; + uint8_t reserved[3]; +}; + +struct BrigInstAtomic +{ + BrigInstBase base; + BrigSegment8_t segment; + BrigMemoryOrder8_t memoryOrder; + BrigMemoryScope8_t memoryScope; + BrigAtomicOperation8_t atomicOperation; + uint8_t equivClass; + uint8_t reserved[3]; +}; + +struct BrigInstBasic +{ + BrigInstBase base; +}; + +struct BrigInstBr +{ + BrigInstBase base; + BrigWidth8_t width; + uint8_t reserved[3]; +}; + +struct BrigInstCmp +{ + BrigInstBase base; + BrigType16_t sourceType; + BrigAluModifier8_t modifier; + BrigCompareOperation8_t compare; + BrigPack8_t pack; + uint8_t reserved[3]; +}; + +struct BrigInstCvt +{ + BrigInstBase base; + BrigType16_t sourceType; + BrigAluModifier8_t modifier; + BrigRound8_t round; +}; + +struct BrigInstImage +{ + BrigInstBase base; + BrigType16_t imageType; + BrigType16_t coordType; + BrigImageGeometry8_t geometry; + uint8_t equivClass; + uint16_t reserved; +}; + +struct BrigInstLane +{ + BrigInstBase base; + BrigType16_t sourceType; + BrigWidth8_t width; + uint8_t reserved; +}; + +struct BrigInstMem +{ + BrigInstBase base; + BrigSegment8_t segment; + BrigAlignment8_t align; + uint8_t equivClass; + BrigWidth8_t width; + BrigMemoryModifier8_t modifier; + uint8_t reserved[3]; +}; + +struct BrigInstMemFence +{ + BrigInstBase base; + BrigMemoryOrder8_t memoryOrder; + BrigMemoryScope8_t globalSegmentMemoryScope; + BrigMemoryScope8_t groupSegmentMemoryScope; + BrigMemoryScope8_t imageSegmentMemoryScope; +}; + +struct BrigInstMod +{ + BrigInstBase base; + BrigAluModifier8_t modifier; + BrigRound8_t round; + BrigPack8_t pack; + uint8_t reserved; +}; + +struct BrigInstQueryImage +{ + BrigInstBase base; + BrigType16_t imageType; + BrigImageGeometry8_t geometry; + BrigImageQuery8_t query; +}; + +struct BrigInstQuerySampler +{ + BrigInstBase base; + BrigSamplerQuery8_t query; + uint8_t reserved[3]; +}; + +struct BrigInstQueue +{ + BrigInstBase base; + BrigSegment8_t segment; + BrigMemoryOrder8_t memoryOrder; + uint16_t reserved; +}; + +struct BrigInstSeg +{ + BrigInstBase base; + BrigSegment8_t segment; + uint8_t reserved[3]; +}; + +struct BrigInstSegCvt +{ + BrigInstBase base; + BrigType16_t sourceType; + BrigSegment8_t segment; + BrigSegCvtModifier8_t modifier; +}; + +struct BrigInstSignal +{ + BrigInstBase base; + BrigType16_t signalType; + BrigMemoryOrder8_t memoryOrder; + BrigAtomicOperation8_t signalOperation; +}; + +struct BrigInstSourceType +{ + BrigInstBase base; + BrigType16_t sourceType; + uint16_t reserved; +}; + +struct BrigOperandAddress +{ + BrigBase base; + BrigCodeOffset32_t symbol; + BrigOperandOffset32_t reg; + BrigUInt64 offset; +}; + +struct BrigOperandAlign +{ + BrigBase base; + BrigAlignment8_t align; + uint8_t reserved[3]; +}; + +struct BrigOperandCodeList +{ + BrigBase base; + BrigDataOffsetCodeList32_t elements; +}; + +struct BrigOperandCodeRef +{ + BrigBase base; + BrigCodeOffset32_t ref; +}; + +struct BrigOperandConstantBytes +{ + BrigBase base; + BrigType16_t type; + uint16_t reserved; + BrigDataOffsetString32_t bytes; +}; + +struct BrigOperandConstantImage +{ + BrigBase base; + BrigType16_t type; + BrigImageGeometry8_t geometry; + BrigImageChannelOrder8_t channelOrder; + BrigImageChannelType8_t channelType; + uint8_t reserved[3]; + BrigUInt64 width; + BrigUInt64 height; + BrigUInt64 depth; + BrigUInt64 array; +}; + +struct BrigOperandConstantOperandList +{ + BrigBase base; + BrigType16_t type; + uint16_t reserved; + BrigDataOffsetOperandList32_t elements; +}; + +struct BrigOperandConstantSampler +{ + BrigBase base; + BrigType16_t type; + BrigSamplerCoordNormalization8_t coord; + BrigSamplerFilter8_t filter; + BrigSamplerAddressing8_t addressing; + uint8_t reserved[3]; +}; + +struct BrigOperandOperandList +{ + BrigBase base; + BrigDataOffsetOperandList32_t elements; +}; + +struct BrigOperandRegister +{ + BrigBase base; + BrigRegisterKind16_t regKind; + uint16_t regNum; +}; + +struct BrigOperandString +{ + BrigBase base; + BrigDataOffsetString32_t string; +}; + +struct BrigOperandWavesize +{ + BrigBase base; +}; + +#endif /* HSA_BRIG_FORMAT_H */ diff --git a/gcc/hsa-brig.c b/gcc/hsa-brig.c new file mode 100644 index 0000000..cfbac58 --- /dev/null +++ b/gcc/hsa-brig.c @@ -0,0 +1,2560 @@ +/* Producing binary form of HSA BRIG from our internal representation. + Copyright (C) 2013-2016 Free Software Foundation, Inc. + Contributed by Martin Jambor <mjambor@suse.cz> and + Martin Liska <mliska@suse.cz>. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "target.h" +#include "tm_p.h" +#include "is-a.h" +#include "vec.h" +#include "hash-table.h" +#include "hash-map.h" +#include "tree.h" +#include "tree-iterator.h" +#include "stor-layout.h" +#include "output.h" +#include "cfg.h" +#include "function.h" +#include "fold-const.h" +#include "stringpool.h" +#include "gimple-pretty-print.h" +#include "diagnostic-core.h" +#include "cgraph.h" +#include "dumpfile.h" +#include "print-tree.h" +#include "symbol-summary.h" +#include "hsa.h" +#include "gomp-constants.h" + +/* Convert VAL to little endian form, if necessary. */ + +static uint16_t +lendian16 (uint16_t val) +{ +#if GCC_VERSION >= 4006 +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + return val; +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + return __builtin_bswap16 (val); +#else /* __ORDER_PDP_ENDIAN__ */ + return val; +#endif +#else +// provide a safe slower default, with shifts and masking +#ifndef WORDS_BIGENDIAN + return val; +#else + return (val >> 8) | (val << 8); +#endif +#endif +} + +/* Convert VAL to little endian form, if necessary. */ + +static uint32_t +lendian32 (uint32_t val) +{ +#if GCC_VERSION >= 4006 +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + return val; +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + return __builtin_bswap32 (val); +#else /* __ORDER_PDP_ENDIAN__ */ + return (val >> 16) | (val << 16); +#endif +#else +// provide a safe slower default, with shifts and masking +#ifndef WORDS_BIGENDIAN + return val; +#else + val = ((val & 0xff00ff00) >> 8) | ((val & 0xff00ff) << 8); + return (val >> 16) | (val << 16); +#endif +#endif +} + +/* Convert VAL to little endian form, if necessary. */ + +static uint64_t +lendian64 (uint64_t val) +{ +#if GCC_VERSION >= 4006 +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + return val; +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + return __builtin_bswap64 (val); +#else /* __ORDER_PDP_ENDIAN__ */ + return (((val & 0xffffll) << 48) + | ((val & 0xffff0000ll) << 16) + | ((val & 0xffff00000000ll) >> 16) + | ((val & 0xffff000000000000ll) >> 48)); +#endif +#else +// provide a safe slower default, with shifts and masking +#ifndef WORDS_BIGENDIAN + return val; +#else + val = (((val & 0xff00ff00ff00ff00ll) >> 8) + | ((val & 0x00ff00ff00ff00ffll) << 8)); + val = ((( val & 0xffff0000ffff0000ll) >> 16) + | (( val & 0x0000ffff0000ffffll) << 16)); + return (val >> 32) | (val << 32); +#endif +#endif +} + +#define BRIG_ELF_SECTION_NAME ".brig" +#define BRIG_LABEL_STRING "hsa_brig" +#define BRIG_SECTION_DATA_NAME "hsa_data" +#define BRIG_SECTION_CODE_NAME "hsa_code" +#define BRIG_SECTION_OPERAND_NAME "hsa_operand" + +#define BRIG_CHUNK_MAX_SIZE (64 * 1024) + +/* Required HSA section alignment. */ + +#define HSA_SECTION_ALIGNMENT 16 + +/* Chunks of BRIG binary data. */ + +struct hsa_brig_data_chunk +{ + /* Size of the data already stored into a chunk. */ + unsigned size; + + /* Pointer to the data. */ + char *data; +}; + +/* Structure representing a BRIG section, holding and writing its data. */ + +class hsa_brig_section +{ +public: + /* Section name that will be output to the BRIG. */ + const char *section_name; + /* Size in bytes of all data stored in the section. */ + unsigned total_size; + /* The size of the header of the section including padding. */ + unsigned header_byte_count; + /* The size of the header of the section without any padding. */ + unsigned header_byte_delta; + + /* Buffers of binary data, each containing BRIG_CHUNK_MAX_SIZE bytes. */ + vec <struct hsa_brig_data_chunk> chunks; + + /* More convenient access to the last chunk from the vector above. */ + struct hsa_brig_data_chunk *cur_chunk; + + void allocate_new_chunk (); + void init (const char *name); + void release (); + void output (); + unsigned add (const void *data, unsigned len); + void round_size_up (int factor); + void *get_ptr_by_offset (unsigned int offset); +}; + +static struct hsa_brig_section brig_data, brig_code, brig_operand; +static uint32_t brig_insn_count; +static bool brig_initialized = false; + +/* Mapping between emitted HSA functions and their offset in code segment. */ +static hash_map<tree, BrigCodeOffset32_t> *function_offsets; + +/* Hash map of emitted function declarations. */ +static hash_map <tree, BrigDirectiveExecutable *> *emitted_declarations; + +/* Hash table of emitted internal function declaration offsets. */ +hash_table <hsa_internal_fn_hasher> *hsa_emitted_internal_decls; + +/* List of sbr instructions. */ +static vec <hsa_insn_sbr *> *switch_instructions; + +struct function_linkage_pair +{ + function_linkage_pair (tree decl, unsigned int off) + : function_decl (decl), offset (off) {} + + /* Declaration of called function. */ + tree function_decl; + + /* Offset in operand section. */ + unsigned int offset; +}; + +/* Vector of function calls where we need to resolve function offsets. */ +static auto_vec <function_linkage_pair> function_call_linkage; + +/* Add a new chunk, allocate data for it and initialize it. */ + +void +hsa_brig_section::allocate_new_chunk () +{ + struct hsa_brig_data_chunk new_chunk; + + new_chunk.data = XCNEWVEC (char, BRIG_CHUNK_MAX_SIZE); + new_chunk.size = 0; + cur_chunk = chunks.safe_push (new_chunk); +} + +/* Initialize the brig section. */ + +void +hsa_brig_section::init (const char *name) +{ + section_name = name; + /* While the following computation is basically wrong, because the intent + certainly wasn't to have the first character of name and padding, which + are a part of sizeof (BrigSectionHeader), included in the first addend, + this is what the disassembler expects. */ + total_size = sizeof (BrigSectionHeader) + strlen (section_name); + chunks.create (1); + allocate_new_chunk (); + header_byte_delta = total_size; + round_size_up (4); + header_byte_count = total_size; +} + +/* Free all data in the section. */ + +void +hsa_brig_section::release () +{ + for (unsigned i = 0; i < chunks.length (); i++) + free (chunks[i].data); + chunks.release (); + cur_chunk = NULL; +} + +/* Write the section to the output file to a section with the name given at + initialization. Switches the output section and does not restore it. */ + +void +hsa_brig_section::output () +{ + struct BrigSectionHeader section_header; + char padding[8]; + + section_header.byteCount = lendian64 (total_size); + section_header.headerByteCount = lendian32 (header_byte_count); + section_header.nameLength = lendian32 (strlen (section_name)); + assemble_string ((const char *) §ion_header, 16); + assemble_string (section_name, (section_header.nameLength)); + memset (&padding, 0, sizeof (padding)); + /* This is also a consequence of the wrong header size computation described + in a comment in hsa_brig_section::init. */ + assemble_string (padding, 8); + for (unsigned i = 0; i < chunks.length (); i++) + assemble_string (chunks[i].data, chunks[i].size); +} + +/* Add to the stream LEN bytes of opaque binary DATA. Return the offset at + which it was stored. */ + +unsigned +hsa_brig_section::add (const void *data, unsigned len) +{ + unsigned offset = total_size; + + gcc_assert (len <= BRIG_CHUNK_MAX_SIZE); + if (cur_chunk->size > (BRIG_CHUNK_MAX_SIZE - len)) + allocate_new_chunk (); + + memcpy (cur_chunk->data + cur_chunk->size, data, len); + cur_chunk->size += len; + total_size += len; + + return offset; +} + +/* Add padding to section so that its size is divisible by FACTOR. */ + +void +hsa_brig_section::round_size_up (int factor) +{ + unsigned padding, res = total_size % factor; + + if (res == 0) + return; + + padding = factor - res; + total_size += padding; + if (cur_chunk->size > (BRIG_CHUNK_MAX_SIZE - padding)) + { + padding -= BRIG_CHUNK_MAX_SIZE - cur_chunk->size; + cur_chunk->size = BRIG_CHUNK_MAX_SIZE; + allocate_new_chunk (); + } + + cur_chunk->size += padding; +} + +/* Return pointer to data by global OFFSET in the section. */ + +void * +hsa_brig_section::get_ptr_by_offset (unsigned int offset) +{ + gcc_assert (offset < total_size); + offset -= header_byte_delta; + + unsigned i; + for (i = 0; offset >= chunks[i].size; i++) + offset -= chunks[i].size; + + return chunks[i].data + offset; +} + +/* BRIG string data hashing. */ + +struct brig_string_slot +{ + const char *s; + char prefix; + int len; + uint32_t offset; +}; + +/* Hash table helpers. */ + +struct brig_string_slot_hasher : pointer_hash <brig_string_slot> +{ + static inline hashval_t hash (const value_type); + static inline bool equal (const value_type, const compare_type); + static inline void remove (value_type); +}; + +/* Returns a hash code for DS. Adapted from libiberty's htab_hash_string + to support strings that may not end in '\0'. */ + +inline hashval_t +brig_string_slot_hasher::hash (const value_type ds) +{ + hashval_t r = ds->len; + int i; + + for (i = 0; i < ds->len; i++) + r = r * 67 + (unsigned) ds->s[i] - 113; + r = r * 67 + (unsigned) ds->prefix - 113; + return r; +} + +/* Returns nonzero if DS1 and DS2 are equal. */ + +inline bool +brig_string_slot_hasher::equal (const value_type ds1, const compare_type ds2) +{ + if (ds1->len == ds2->len) + return ds1->prefix == ds2->prefix + && memcmp (ds1->s, ds2->s, ds1->len) == 0; + + return 0; +} + +/* Deallocate memory for DS upon its removal. */ + +inline void +brig_string_slot_hasher::remove (value_type ds) +{ + free (const_cast<char *> (ds->s)); + free (ds); +} + +/* Hash for strings we output in order not to duplicate them needlessly. */ + +static hash_table<brig_string_slot_hasher> *brig_string_htab; + +/* Emit a null terminated string STR to the data section and return its + offset in it. If PREFIX is non-zero, output it just before STR too. + Sanitize the string if SANITIZE option is set to true. */ + +static unsigned +brig_emit_string (const char *str, char prefix = 0, bool sanitize = true) +{ + unsigned slen = strlen (str); + unsigned offset, len = slen + (prefix ? 1 : 0); + uint32_t hdr_len = lendian32 (len); + brig_string_slot s_slot; + brig_string_slot **slot; + char *str2; + + str2 = xstrdup (str); + + if (sanitize) + hsa_sanitize_name (str2); + s_slot.s = str2; + s_slot.len = slen; + s_slot.prefix = prefix; + s_slot.offset = 0; + + slot = brig_string_htab->find_slot (&s_slot, INSERT); + if (*slot == NULL) + { + brig_string_slot *new_slot = XCNEW (brig_string_slot); + + /* In theory we should fill in BrigData but that would mean copying + the string to a buffer for no reason, so we just emulate it. */ + offset = brig_data.add (&hdr_len, sizeof (hdr_len)); + if (prefix) + brig_data.add (&prefix, 1); + + brig_data.add (str2, slen); + brig_data.round_size_up (4); + + /* TODO: could use the string we just copied into + brig_string->cur_chunk */ + new_slot->s = str2; + new_slot->len = slen; + new_slot->prefix = prefix; + new_slot->offset = offset; + *slot = new_slot; + } + else + { + offset = (*slot)->offset; + free (str2); + } + + return offset; +} + +/* Linked list of queued operands. */ + +static struct operand_queue +{ + /* First from the chain of queued operands. */ + hsa_op_base *first_op, *last_op; + + /* The offset at which the next operand will be enqueued. */ + unsigned projected_size; + +} op_queue; + +/* Unless already initialized, initialize infrastructure to produce BRIG. */ + +static void +brig_init (void) +{ + brig_insn_count = 0; + + if (brig_initialized) + return; + + brig_string_htab = new hash_table<brig_string_slot_hasher> (37); + brig_data.init (BRIG_SECTION_DATA_NAME); + brig_code.init (BRIG_SECTION_CODE_NAME); + brig_operand.init (BRIG_SECTION_OPERAND_NAME); + brig_initialized = true; + + struct BrigDirectiveModule moddir; + memset (&moddir, 0, sizeof (moddir)); + moddir.base.byteCount = lendian16 (sizeof (moddir)); + + char *modname; + if (main_input_filename && *main_input_filename != '\0') + { + const char *part = strrchr (main_input_filename, '/'); + if (!part) + part = main_input_filename; + else + part++; + modname = concat ("&__hsa_module_", part, NULL); + char *extension = strchr (modname, '.'); + if (extension) + *extension = '\0'; + + /* As in LTO mode, we have to emit a different module names. */ + if (flag_ltrans) + { + part = strrchr (asm_file_name, '/'); + if (!part) + part = asm_file_name; + else + part++; + char *modname2; + asprintf (&modname2, "%s_%s", modname, part); + free (modname); + modname = modname2; + } + + hsa_sanitize_name (modname); + moddir.name = brig_emit_string (modname); + free (modname); + } + else + moddir.name = brig_emit_string ("__hsa_module_unnamed", '&'); + moddir.base.kind = lendian16 (BRIG_KIND_DIRECTIVE_MODULE); + moddir.hsailMajor = lendian32 (BRIG_VERSION_HSAIL_MAJOR); + moddir.hsailMinor = lendian32 (BRIG_VERSION_HSAIL_MINOR); + moddir.profile = hsa_full_profile_p () ? BRIG_PROFILE_FULL: BRIG_PROFILE_BASE; + if (hsa_machine_large_p ()) + moddir.machineModel = BRIG_MACHINE_LARGE; + else + moddir.machineModel = BRIG_MACHINE_SMALL; + moddir.defaultFloatRound = BRIG_ROUND_FLOAT_DEFAULT; + brig_code.add (&moddir, sizeof (moddir)); +} + +/* Free all BRIG data. */ + +static void +brig_release_data (void) +{ + delete brig_string_htab; + brig_data.release (); + brig_code.release (); + brig_operand.release (); + + brig_initialized = 0; +} + +/* Enqueue operation OP. Return the offset at which it will be stored. */ + +static unsigned int +enqueue_op (hsa_op_base *op) +{ + unsigned ret; + + if (op->m_brig_op_offset) + return op->m_brig_op_offset; + + ret = op_queue.projected_size; + op->m_brig_op_offset = op_queue.projected_size; + + if (!op_queue.first_op) + op_queue.first_op = op; + else + op_queue.last_op->m_next = op; + op_queue.last_op = op; + + if (is_a <hsa_op_immed *> (op)) + op_queue.projected_size += sizeof (struct BrigOperandConstantBytes); + else if (is_a <hsa_op_reg *> (op)) + op_queue.projected_size += sizeof (struct BrigOperandRegister); + else if (is_a <hsa_op_address *> (op)) + op_queue.projected_size += sizeof (struct BrigOperandAddress); + else if (is_a <hsa_op_code_ref *> (op)) + op_queue.projected_size += sizeof (struct BrigOperandCodeRef); + else if (is_a <hsa_op_code_list *> (op)) + op_queue.projected_size += sizeof (struct BrigOperandCodeList); + else if (is_a <hsa_op_operand_list *> (op)) + op_queue.projected_size += sizeof (struct BrigOperandOperandList); + else + gcc_unreachable (); + return ret; +} + + +/* Emit directive describing a symbol if it has not been emitted already. + Return the offset of the directive. */ + +static unsigned +emit_directive_variable (struct hsa_symbol *symbol) +{ + struct BrigDirectiveVariable dirvar; + unsigned name_offset; + static unsigned res_name_offset; + + if (symbol->m_directive_offset) + return symbol->m_directive_offset; + + memset (&dirvar, 0, sizeof (dirvar)); + dirvar.base.byteCount = lendian16 (sizeof (dirvar)); + dirvar.base.kind = lendian16 (BRIG_KIND_DIRECTIVE_VARIABLE); + dirvar.allocation = symbol->m_allocation; + + char prefix = symbol->m_global_scope_p ? '&' : '%'; + + if (symbol->m_decl && TREE_CODE (symbol->m_decl) == RESULT_DECL) + { + if (res_name_offset == 0) + res_name_offset = brig_emit_string (symbol->m_name, '%'); + name_offset = res_name_offset; + } + else if (symbol->m_name) + name_offset = brig_emit_string (symbol->m_name, prefix); + else + { + char buf[64]; + snprintf (buf, 64, "__%s_%i", hsa_seg_name (symbol->m_segment), + symbol->m_name_number); + name_offset = brig_emit_string (buf, prefix); + } + + dirvar.name = lendian32 (name_offset); + dirvar.init = 0; + dirvar.type = lendian16 (symbol->m_type); + dirvar.segment = symbol->m_segment; + /* TODO: Once we are able to access global variables, we must copy their + alignment. */ + dirvar.align = MAX (hsa_natural_alignment (dirvar.type), + (BrigAlignment8_t) BRIG_ALIGNMENT_4); + dirvar.linkage = symbol->m_linkage; + dirvar.dim.lo = symbol->m_dim; + dirvar.dim.hi = symbol->m_dim >> 32; + + /* Global variables are just declared and linked via HSA runtime. */ + if (symbol->m_linkage != BRIG_ALLOCATION_PROGRAM) + dirvar.modifier |= BRIG_VARIABLE_DEFINITION; + dirvar.reserved = 0; + + if (symbol->m_cst_value) + { + dirvar.modifier |= BRIG_VARIABLE_CONST; + dirvar.init = lendian32 (enqueue_op (symbol->m_cst_value)); + } + + symbol->m_directive_offset = brig_code.add (&dirvar, sizeof (dirvar)); + return symbol->m_directive_offset; +} + +/* Emit directives describing either a function declaration or + definition F. */ + +static BrigDirectiveExecutable * +emit_function_directives (hsa_function_representation *f, bool is_declaration) +{ + struct BrigDirectiveExecutable fndir; + unsigned name_offset, inarg_off, scoped_off, next_toplev_off; + int count = 0; + BrigDirectiveExecutable *ptr_to_fndir; + hsa_symbol *sym; + + if (!f->m_declaration_p) + for (int i = 0; f->m_global_symbols.iterate (i, &sym); i++) + { + emit_directive_variable (sym); + brig_insn_count++; + } + + name_offset = brig_emit_string (f->m_name, '&'); + inarg_off = brig_code.total_size + sizeof (fndir) + + (f->m_output_arg ? sizeof (struct BrigDirectiveVariable) : 0); + scoped_off = inarg_off + + f->m_input_args.length () * sizeof (struct BrigDirectiveVariable); + + if (!f->m_declaration_p) + { + count += f->m_spill_symbols.length (); + count += f->m_private_variables.length (); + } + + next_toplev_off = scoped_off + count * sizeof (struct BrigDirectiveVariable); + + memset (&fndir, 0, sizeof (fndir)); + fndir.base.byteCount = lendian16 (sizeof (fndir)); + fndir.base.kind = lendian16 (f->m_kern_p ? BRIG_KIND_DIRECTIVE_KERNEL + : BRIG_KIND_DIRECTIVE_FUNCTION); + fndir.name = lendian32 (name_offset); + fndir.inArgCount = lendian16 (f->m_input_args.length ()); + fndir.outArgCount = lendian16 (f->m_output_arg ? 1 : 0); + fndir.firstInArg = lendian32 (inarg_off); + fndir.firstCodeBlockEntry = lendian32 (scoped_off); + fndir.nextModuleEntry = lendian32 (next_toplev_off); + fndir.linkage = f->get_linkage (); + if (!f->m_declaration_p) + fndir.modifier |= BRIG_EXECUTABLE_DEFINITION; + memset (&fndir.reserved, 0, sizeof (fndir.reserved)); + + /* Once we put a definition of function_offsets, we should not overwrite + it with a declaration of the function. */ + if (f->m_internal_fn == NULL) + { + if (!function_offsets->get (f->m_decl) || !is_declaration) + function_offsets->put (f->m_decl, brig_code.total_size); + } + else + { + /* Internal function. */ + hsa_internal_fn **slot + = hsa_emitted_internal_decls->find_slot (f->m_internal_fn, INSERT); + hsa_internal_fn *int_fn = new hsa_internal_fn (f->m_internal_fn); + int_fn->m_offset = brig_code.total_size; + *slot = int_fn; + } + + brig_code.add (&fndir, sizeof (fndir)); + /* terrible hack: we need to set instCount after we emit all + insns, but we need to emit directive in order, and we emit directives + during insn emitting. So we need to emit the FUNCTION directive + early, then the insns, and then we need to set instCount, so remember + a pointer to it, in some horrible way. cur_chunk.data+size points + directly to after fndir here. */ + ptr_to_fndir + = (BrigDirectiveExecutable *)(brig_code.cur_chunk->data + + brig_code.cur_chunk->size + - sizeof (fndir)); + + if (f->m_output_arg) + emit_directive_variable (f->m_output_arg); + for (unsigned i = 0; i < f->m_input_args.length (); i++) + emit_directive_variable (f->m_input_args[i]); + + if (!f->m_declaration_p) + { + for (int i = 0; f->m_spill_symbols.iterate (i, &sym); i++) + { + emit_directive_variable (sym); + brig_insn_count++; + } + for (unsigned i = 0; i < f->m_private_variables.length (); i++) + { + emit_directive_variable (f->m_private_variables[i]); + brig_insn_count++; + } + } + + return ptr_to_fndir; +} + +/* Emit a label directive for the given HBB. We assume it is about to start on + the current offset in the code section. */ + +static void +emit_bb_label_directive (hsa_bb *hbb) +{ + struct BrigDirectiveLabel lbldir; + + lbldir.base.byteCount = lendian16 (sizeof (lbldir)); + lbldir.base.kind = lendian16 (BRIG_KIND_DIRECTIVE_LABEL); + char buf[32]; + snprintf (buf, 32, "BB_%u_%i", DECL_UID (current_function_decl), + hbb->m_index); + lbldir.name = lendian32 (brig_emit_string (buf, '@')); + + hbb->m_label_ref.m_directive_offset = brig_code.add (&lbldir, + sizeof (lbldir)); + brig_insn_count++; +} + +/* Map a normal HSAIL type to the type of the equivalent BRIG operand + holding such, for constants and registers. */ + +static BrigType16_t +regtype_for_type (BrigType16_t t) +{ + switch (t) + { + case BRIG_TYPE_B1: + return BRIG_TYPE_B1; + + case BRIG_TYPE_U8: + case BRIG_TYPE_U16: + case BRIG_TYPE_U32: + case BRIG_TYPE_S8: + case BRIG_TYPE_S16: + case BRIG_TYPE_S32: + case BRIG_TYPE_B8: + case BRIG_TYPE_B16: + case BRIG_TYPE_B32: + case BRIG_TYPE_F16: + case BRIG_TYPE_F32: + case BRIG_TYPE_U8X4: + case BRIG_TYPE_U16X2: + case BRIG_TYPE_S8X4: + case BRIG_TYPE_S16X2: + case BRIG_TYPE_F16X2: + return BRIG_TYPE_B32; + + case BRIG_TYPE_U64: + case BRIG_TYPE_S64: + case BRIG_TYPE_F64: + case BRIG_TYPE_B64: + case BRIG_TYPE_U8X8: + case BRIG_TYPE_U16X4: + case BRIG_TYPE_U32X2: + case BRIG_TYPE_S8X8: + case BRIG_TYPE_S16X4: + case BRIG_TYPE_S32X2: + case BRIG_TYPE_F16X4: + case BRIG_TYPE_F32X2: + return BRIG_TYPE_B64; + + case BRIG_TYPE_B128: + case BRIG_TYPE_U8X16: + case BRIG_TYPE_U16X8: + case BRIG_TYPE_U32X4: + case BRIG_TYPE_U64X2: + case BRIG_TYPE_S8X16: + case BRIG_TYPE_S16X8: + case BRIG_TYPE_S32X4: + case BRIG_TYPE_S64X2: + case BRIG_TYPE_F16X8: + case BRIG_TYPE_F32X4: + case BRIG_TYPE_F64X2: + return BRIG_TYPE_B128; + + default: + gcc_unreachable (); + } +} + +/* Return the length of the BRIG type TYPE that is going to be streamed out as + an immediate constant (so it must not be B1). */ + +unsigned +hsa_get_imm_brig_type_len (BrigType16_t type) +{ + BrigType16_t base_type = type & BRIG_TYPE_BASE_MASK; + BrigType16_t pack_type = type & BRIG_TYPE_PACK_MASK; + + switch (pack_type) + { + case BRIG_TYPE_PACK_NONE: + break; + case BRIG_TYPE_PACK_32: + return 4; + case BRIG_TYPE_PACK_64: + return 8; + case BRIG_TYPE_PACK_128: + return 16; + default: + gcc_unreachable (); + } + + switch (base_type) + { + case BRIG_TYPE_U8: + case BRIG_TYPE_S8: + case BRIG_TYPE_B8: + return 1; + case BRIG_TYPE_U16: + case BRIG_TYPE_S16: + case BRIG_TYPE_F16: + case BRIG_TYPE_B16: + return 2; + case BRIG_TYPE_U32: + case BRIG_TYPE_S32: + case BRIG_TYPE_F32: + case BRIG_TYPE_B32: + return 4; + case BRIG_TYPE_U64: + case BRIG_TYPE_S64: + case BRIG_TYPE_F64: + case BRIG_TYPE_B64: + return 8; + case BRIG_TYPE_B128: + return 16; + default: + gcc_unreachable (); + } +} + +/* Emit one scalar VALUE to the buffer DATA intended for BRIG emission. + If NEED_LEN is not equal to zero, shrink or extend the value + to NEED_LEN bytes. Return how many bytes were written. */ + +static int +emit_immediate_scalar_to_buffer (tree value, char *data, unsigned need_len) +{ + union hsa_bytes bytes; + + memset (&bytes, 0, sizeof (bytes)); + tree type = TREE_TYPE (value); + gcc_checking_assert (TREE_CODE (type) != VECTOR_TYPE); + + unsigned data_len = tree_to_uhwi (TYPE_SIZE (type)) / BITS_PER_UNIT; + if (INTEGRAL_TYPE_P (type) + || (POINTER_TYPE_P (type) && TREE_CODE (value) == INTEGER_CST)) + switch (data_len) + { + case 1: + bytes.b8 = (uint8_t) TREE_INT_CST_LOW (value); + break; + case 2: + bytes.b16 = (uint16_t) TREE_INT_CST_LOW (value); + break; + case 4: + bytes.b32 = (uint32_t) TREE_INT_CST_LOW (value); + break; + case 8: + bytes.b64 = (uint64_t) TREE_INT_CST_LOW (value); + break; + default: + gcc_unreachable (); + } + else if (SCALAR_FLOAT_TYPE_P (type)) + { + if (data_len == 2) + { + sorry ("Support for HSA does not implement immediate 16 bit FPU " + "operands"); + return 2; + } + unsigned int_len = GET_MODE_SIZE (TYPE_MODE (type)); + /* There are always 32 bits in each long, no matter the size of + the hosts long. */ + long tmp[6]; + + real_to_target (tmp, TREE_REAL_CST_PTR (value), TYPE_MODE (type)); + + if (int_len == 4) + bytes.b32 = (uint32_t) tmp[0]; + else + { + bytes.b64 = (uint64_t)(uint32_t) tmp[1]; + bytes.b64 <<= 32; + bytes.b64 |= (uint32_t) tmp[0]; + } + } + else + gcc_unreachable (); + + int len; + if (need_len == 0) + len = data_len; + else + len = need_len; + + memcpy (data, &bytes, len); + return len; +} + +void +hsa_op_immed::emit_to_buffer (tree value) +{ + unsigned total_len = m_brig_repr_size; + + /* As we can have a constructor with fewer elements, fill the memory + with zeros. */ + m_brig_repr = XCNEWVEC (char, total_len); + char *p = m_brig_repr; + + if (TREE_CODE (value) == VECTOR_CST) + { + int i, num = VECTOR_CST_NELTS (value); + for (i = 0; i < num; i++) + { + unsigned actual; + actual + = emit_immediate_scalar_to_buffer (VECTOR_CST_ELT (value, i), p, 0); + total_len -= actual; + p += actual; + } + /* Vectors should have the exact size. */ + gcc_assert (total_len == 0); + } + else if (TREE_CODE (value) == STRING_CST) + memcpy (m_brig_repr, TREE_STRING_POINTER (value), + TREE_STRING_LENGTH (value)); + else if (TREE_CODE (value) == COMPLEX_CST) + { + gcc_assert (total_len % 2 == 0); + unsigned actual; + actual + = emit_immediate_scalar_to_buffer (TREE_REALPART (value), p, + total_len / 2); + + gcc_assert (actual == total_len / 2); + p += actual; + + actual + = emit_immediate_scalar_to_buffer (TREE_IMAGPART (value), p, + total_len / 2); + gcc_assert (actual == total_len / 2); + } + else if (TREE_CODE (value) == CONSTRUCTOR) + { + unsigned len = vec_safe_length (CONSTRUCTOR_ELTS (value)); + for (unsigned i = 0; i < len; i++) + { + tree v = CONSTRUCTOR_ELT (value, i)->value; + unsigned actual = emit_immediate_scalar_to_buffer (v, p, 0); + total_len -= actual; + p += actual; + } + } + else + emit_immediate_scalar_to_buffer (value, p, total_len); +} + +/* Emit an immediate BRIG operand IMM. The BRIG type of the immediate might + have been massaged to comply with various HSA/BRIG type requirements, so the + only important aspect of that is the length (because HSAIL might expect + smaller constants or become bit-data). The data should be represented + according to what is in the tree representation. */ + +static void +emit_immediate_operand (hsa_op_immed *imm) +{ + struct BrigOperandConstantBytes out; + + memset (&out, 0, sizeof (out)); + out.base.byteCount = lendian16 (sizeof (out)); + out.base.kind = lendian16 (BRIG_KIND_OPERAND_CONSTANT_BYTES); + uint32_t byteCount = lendian32 (imm->m_brig_repr_size); + out.type = lendian16 (imm->m_type); + out.bytes = lendian32 (brig_data.add (&byteCount, sizeof (byteCount))); + brig_operand.add (&out, sizeof (out)); + brig_data.add (imm->m_brig_repr, imm->m_brig_repr_size); + brig_data.round_size_up (4); +} + +/* Emit a register BRIG operand REG. */ + +static void +emit_register_operand (hsa_op_reg *reg) +{ + struct BrigOperandRegister out; + + out.base.byteCount = lendian16 (sizeof (out)); + out.base.kind = lendian16 (BRIG_KIND_OPERAND_REGISTER); + out.regNum = lendian32 (reg->m_hard_num); + + switch (regtype_for_type (reg->m_type)) + { + case BRIG_TYPE_B32: + out.regKind = BRIG_REGISTER_KIND_SINGLE; + break; + case BRIG_TYPE_B64: + out.regKind = BRIG_REGISTER_KIND_DOUBLE; + break; + case BRIG_TYPE_B128: + out.regKind = BRIG_REGISTER_KIND_QUAD; + break; + case BRIG_TYPE_B1: + out.regKind = BRIG_REGISTER_KIND_CONTROL; + break; + default: + gcc_unreachable (); + } + + brig_operand.add (&out, sizeof (out)); +} + +/* Emit an address BRIG operand ADDR. */ + +static void +emit_address_operand (hsa_op_address *addr) +{ + struct BrigOperandAddress out; + + out.base.byteCount = lendian16 (sizeof (out)); + out.base.kind = lendian16 (BRIG_KIND_OPERAND_ADDRESS); + out.symbol = addr->m_symbol + ? lendian32 (emit_directive_variable (addr->m_symbol)) : 0; + out.reg = addr->m_reg ? lendian32 (enqueue_op (addr->m_reg)) : 0; + + if (sizeof (addr->m_imm_offset) == 8) + { + out.offset.lo = lendian32 (addr->m_imm_offset); + out.offset.hi = lendian32 (addr->m_imm_offset >> 32); + } + else + { + gcc_assert (sizeof (addr->m_imm_offset) == 4); + out.offset.lo = lendian32 (addr->m_imm_offset); + out.offset.hi = 0; + } + + brig_operand.add (&out, sizeof (out)); +} + +/* Emit a code reference operand REF. */ + +static void +emit_code_ref_operand (hsa_op_code_ref *ref) +{ + struct BrigOperandCodeRef out; + + out.base.byteCount = lendian16 (sizeof (out)); + out.base.kind = lendian16 (BRIG_KIND_OPERAND_CODE_REF); + out.ref = lendian32 (ref->m_directive_offset); + brig_operand.add (&out, sizeof (out)); +} + +/* Emit a code list operand CODE_LIST. */ + +static void +emit_code_list_operand (hsa_op_code_list *code_list) +{ + struct BrigOperandCodeList out; + unsigned args = code_list->m_offsets.length (); + + for (unsigned i = 0; i < args; i++) + gcc_assert (code_list->m_offsets[i]); + + out.base.byteCount = lendian16 (sizeof (out)); + out.base.kind = lendian16 (BRIG_KIND_OPERAND_CODE_LIST); + + uint32_t byteCount = lendian32 (4 * args); + + out.elements = lendian32 (brig_data.add (&byteCount, sizeof (byteCount))); + brig_data.add (code_list->m_offsets.address (), args * sizeof (uint32_t)); + brig_data.round_size_up (4); + brig_operand.add (&out, sizeof (out)); +} + +/* Emit an operand list operand OPERAND_LIST. */ + +static void +emit_operand_list_operand (hsa_op_operand_list *operand_list) +{ + struct BrigOperandOperandList out; + unsigned args = operand_list->m_offsets.length (); + + for (unsigned i = 0; i < args; i++) + gcc_assert (operand_list->m_offsets[i]); + + out.base.byteCount = lendian16 (sizeof (out)); + out.base.kind = lendian16 (BRIG_KIND_OPERAND_OPERAND_LIST); + + uint32_t byteCount = lendian32 (4 * args); + + out.elements = lendian32 (brig_data.add (&byteCount, sizeof (byteCount))); + brig_data.add (operand_list->m_offsets.address (), args * sizeof (uint32_t)); + brig_data.round_size_up (4); + brig_operand.add (&out, sizeof (out)); +} + +/* Emit all operands queued for writing. */ + +static void +emit_queued_operands (void) +{ + for (hsa_op_base *op = op_queue.first_op; op; op = op->m_next) + { + gcc_assert (op->m_brig_op_offset == brig_operand.total_size); + if (hsa_op_immed *imm = dyn_cast <hsa_op_immed *> (op)) + emit_immediate_operand (imm); + else if (hsa_op_reg *reg = dyn_cast <hsa_op_reg *> (op)) + emit_register_operand (reg); + else if (hsa_op_address *addr = dyn_cast <hsa_op_address *> (op)) + emit_address_operand (addr); + else if (hsa_op_code_ref *ref = dyn_cast <hsa_op_code_ref *> (op)) + emit_code_ref_operand (ref); + else if (hsa_op_code_list *code_list = dyn_cast <hsa_op_code_list *> (op)) + emit_code_list_operand (code_list); + else if (hsa_op_operand_list *l = dyn_cast <hsa_op_operand_list *> (op)) + emit_operand_list_operand (l); + else + gcc_unreachable (); + } +} + +/* Emit directives describing the function that is used for + a function declaration. */ + +static BrigDirectiveExecutable * +emit_function_declaration (tree decl) +{ + hsa_function_representation *f = hsa_generate_function_declaration (decl); + + BrigDirectiveExecutable *e = emit_function_directives (f, true); + emit_queued_operands (); + + delete f; + + return e; +} + +/* Emit directives describing the function that is used for + an internal function declaration. */ + +static BrigDirectiveExecutable * +emit_internal_fn_decl (hsa_internal_fn *fn) +{ + hsa_function_representation *f = hsa_generate_internal_fn_decl (fn); + + BrigDirectiveExecutable *e = emit_function_directives (f, true); + emit_queued_operands (); + + delete f; + + return e; +} + +/* Enqueue all operands of INSN and return offset to BRIG data section + to list of operand offsets. */ + +static unsigned +emit_insn_operands (hsa_insn_basic *insn) +{ + auto_vec<BrigOperandOffset32_t, HSA_BRIG_INT_STORAGE_OPERANDS> + operand_offsets; + + unsigned l = insn->operand_count (); + operand_offsets.safe_grow (l); + + for (unsigned i = 0; i < l; i++) + operand_offsets[i] = lendian32 (enqueue_op (insn->get_op (i))); + + /* We have N operands so use 4 * N for the byte_count. */ + uint32_t byte_count = lendian32 (4 * l); + + unsigned offset = brig_data.add (&byte_count, sizeof (byte_count)); + brig_data.add (operand_offsets.address (), + l * sizeof (BrigOperandOffset32_t)); + + brig_data.round_size_up (4); + + return offset; +} + +/* Enqueue operand OP0, OP1, OP2 (if different from NULL) and return offset + to BRIG data section to list of operand offsets. */ + +static unsigned +emit_operands (hsa_op_base *op0, hsa_op_base *op1 = NULL, + hsa_op_base *op2 = NULL) +{ + auto_vec<BrigOperandOffset32_t, HSA_BRIG_INT_STORAGE_OPERANDS> + operand_offsets; + + gcc_checking_assert (op0 != NULL); + operand_offsets.safe_push (enqueue_op (op0)); + + if (op1 != NULL) + { + operand_offsets.safe_push (enqueue_op (op1)); + if (op2 != NULL) + operand_offsets.safe_push (enqueue_op (op2)); + } + + unsigned l = operand_offsets.length (); + + /* We have N operands so use 4 * N for the byte_count. */ + uint32_t byte_count = lendian32 (4 * l); + + unsigned offset = brig_data.add (&byte_count, sizeof (byte_count)); + brig_data.add (operand_offsets.address (), + l * sizeof (BrigOperandOffset32_t)); + + brig_data.round_size_up (4); + + return offset; +} + +/* Emit an HSA memory instruction and all necessary directives, schedule + necessary operands for writing. */ + +static void +emit_memory_insn (hsa_insn_mem *mem) +{ + struct BrigInstMem repr; + gcc_checking_assert (mem->operand_count () == 2); + + hsa_op_address *addr = as_a <hsa_op_address *> (mem->get_op (1)); + + /* This is necessary because of the erroneous typedef of + BrigMemoryModifier8_t which introduces padding which may then contain + random stuff (which we do not want so that we can test things don't + change). */ + memset (&repr, 0, sizeof (repr)); + repr.base.base.byteCount = lendian16 (sizeof (repr)); + repr.base.base.kind = lendian16 (BRIG_KIND_INST_MEM); + repr.base.opcode = lendian16 (mem->m_opcode); + repr.base.type = lendian16 (mem->m_type); + repr.base.operands = lendian32 (emit_insn_operands (mem)); + + if (addr->m_symbol) + repr.segment = addr->m_symbol->m_segment; + else + repr.segment = BRIG_SEGMENT_FLAT; + repr.modifier = 0; + repr.equivClass = mem->m_equiv_class; + repr.align = mem->m_align; + if (mem->m_opcode == BRIG_OPCODE_LD) + repr.width = BRIG_WIDTH_1; + else + repr.width = BRIG_WIDTH_NONE; + memset (&repr.reserved, 0, sizeof (repr.reserved)); + brig_code.add (&repr, sizeof (repr)); + brig_insn_count++; +} + +/* Emit an HSA signal memory instruction and all necessary directives, schedule + necessary operands for writing. */ + +static void +emit_signal_insn (hsa_insn_signal *mem) +{ + struct BrigInstSignal repr; + + /* This is necessary because of the erroneous typedef of + BrigMemoryModifier8_t which introduces padding which may then contain + random stuff (which we do not want so that we can test things don't + change). */ + memset (&repr, 0, sizeof (repr)); + repr.base.base.byteCount = lendian16 (sizeof (repr)); + repr.base.base.kind = lendian16 (BRIG_KIND_INST_SIGNAL); + repr.base.opcode = lendian16 (mem->m_opcode); + repr.base.type = lendian16 (mem->m_type); + repr.base.operands = lendian32 (emit_insn_operands (mem)); + + repr.memoryOrder = mem->m_memoryorder; + repr.signalOperation = mem->m_atomicop; + repr.signalType = BRIG_TYPE_SIG64; + + brig_code.add (&repr, sizeof (repr)); + brig_insn_count++; +} + +/* Emit an HSA atomic memory instruction and all necessary directives, schedule + necessary operands for writing. */ + +static void +emit_atomic_insn (hsa_insn_atomic *mem) +{ + struct BrigInstAtomic repr; + + /* Either operand[0] or operand[1] must be an address operand. */ + hsa_op_address *addr = NULL; + if (is_a <hsa_op_address *> (mem->get_op (0))) + addr = as_a <hsa_op_address *> (mem->get_op (0)); + else + addr = as_a <hsa_op_address *> (mem->get_op (1)); + + /* This is necessary because of the erroneous typedef of + BrigMemoryModifier8_t which introduces padding which may then contain + random stuff (which we do not want so that we can test things don't + change). */ + memset (&repr, 0, sizeof (repr)); + repr.base.base.byteCount = lendian16 (sizeof (repr)); + repr.base.base.kind = lendian16 (BRIG_KIND_INST_ATOMIC); + repr.base.opcode = lendian16 (mem->m_opcode); + repr.base.type = lendian16 (mem->m_type); + repr.base.operands = lendian32 (emit_insn_operands (mem)); + + if (addr->m_symbol) + repr.segment = addr->m_symbol->m_segment; + else + repr.segment = BRIG_SEGMENT_FLAT; + repr.memoryOrder = mem->m_memoryorder; + repr.memoryScope = mem->m_memoryscope; + repr.atomicOperation = mem->m_atomicop; + + brig_code.add (&repr, sizeof (repr)); + brig_insn_count++; +} + +/* Emit an HSA LDA instruction and all necessary directives, schedule + necessary operands for writing. */ + +static void +emit_addr_insn (hsa_insn_basic *insn) +{ + struct BrigInstAddr repr; + + hsa_op_address *addr = as_a <hsa_op_address *> (insn->get_op (1)); + + repr.base.base.byteCount = lendian16 (sizeof (repr)); + repr.base.base.kind = lendian16 (BRIG_KIND_INST_ADDR); + repr.base.opcode = lendian16 (insn->m_opcode); + repr.base.type = lendian16 (insn->m_type); + repr.base.operands = lendian32 (emit_insn_operands (insn)); + + if (addr->m_symbol) + repr.segment = addr->m_symbol->m_segment; + else + repr.segment = BRIG_SEGMENT_FLAT; + memset (&repr.reserved, 0, sizeof (repr.reserved)); + + brig_code.add (&repr, sizeof (repr)); + brig_insn_count++; +} + +/* Emit an HSA segment conversion instruction and all necessary directives, + schedule necessary operands for writing. */ + +static void +emit_segment_insn (hsa_insn_seg *seg) +{ + struct BrigInstSegCvt repr; + + repr.base.base.byteCount = lendian16 (sizeof (repr)); + repr.base.base.kind = lendian16 (BRIG_KIND_INST_SEG_CVT); + repr.base.opcode = lendian16 (seg->m_opcode); + repr.base.type = lendian16 (seg->m_type); + repr.base.operands = lendian32 (emit_insn_operands (seg)); + repr.sourceType = lendian16 (as_a <hsa_op_reg *> (seg->get_op (1))->m_type); + repr.segment = seg->m_segment; + repr.modifier = 0; + + brig_code.add (&repr, sizeof (repr)); + + brig_insn_count++; +} + +/* Emit an HSA alloca instruction and all necessary directives, + schedule necessary operands for writing. */ + +static void +emit_alloca_insn (hsa_insn_alloca *alloca) +{ + struct BrigInstMem repr; + gcc_checking_assert (alloca->operand_count () == 2); + + /* This is necessary because of the erroneous typedef of + BrigMemoryModifier8_t which introduces padding which may then contain + random stuff (which we do not want so that we can test things don't + change). */ + memset (&repr, 0, sizeof (repr)); + repr.base.base.byteCount = lendian16 (sizeof (repr)); + repr.base.base.kind = lendian16 (BRIG_KIND_INST_MEM); + repr.base.opcode = lendian16 (alloca->m_opcode); + repr.base.type = lendian16 (alloca->m_type); + repr.base.operands = lendian32 (emit_insn_operands (alloca)); + repr.segment = BRIG_SEGMENT_PRIVATE; + repr.modifier = 0; + repr.equivClass = 0; + repr.align = alloca->m_align; + repr.width = BRIG_WIDTH_NONE; + memset (&repr.reserved, 0, sizeof (repr.reserved)); + brig_code.add (&repr, sizeof (repr)); + brig_insn_count++; +} + +/* Emit an HSA comparison instruction and all necessary directives, + schedule necessary operands for writing. */ + +static void +emit_cmp_insn (hsa_insn_cmp *cmp) +{ + struct BrigInstCmp repr; + + memset (&repr, 0, sizeof (repr)); + repr.base.base.byteCount = lendian16 (sizeof (repr)); + repr.base.base.kind = lendian16 (BRIG_KIND_INST_CMP); + repr.base.opcode = lendian16 (cmp->m_opcode); + repr.base.type = lendian16 (cmp->m_type); + repr.base.operands = lendian32 (emit_insn_operands (cmp)); + + if (is_a <hsa_op_reg *> (cmp->get_op (1))) + repr.sourceType + = lendian16 (as_a <hsa_op_reg *> (cmp->get_op (1))->m_type); + else + repr.sourceType + = lendian16 (as_a <hsa_op_immed *> (cmp->get_op (1))->m_type); + repr.modifier = 0; + repr.compare = cmp->m_compare; + repr.pack = 0; + + brig_code.add (&repr, sizeof (repr)); + brig_insn_count++; +} + +/* Emit an HSA branching instruction and all necessary directives, schedule + necessary operands for writing. */ + +static void +emit_branch_insn (hsa_insn_br *br) +{ + struct BrigInstBr repr; + + basic_block target = NULL; + edge_iterator ei; + edge e; + + /* At the moment we only handle direct conditional jumps. */ + gcc_assert (br->m_opcode == BRIG_OPCODE_CBR); + repr.base.base.byteCount = lendian16 (sizeof (repr)); + repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR); + repr.base.opcode = lendian16 (br->m_opcode); + repr.width = BRIG_WIDTH_1; + /* For Conditional jumps the type is always B1. */ + repr.base.type = lendian16 (BRIG_TYPE_B1); + + FOR_EACH_EDGE (e, ei, br->m_bb->succs) + if (e->flags & EDGE_TRUE_VALUE) + { + target = e->dest; + break; + } + gcc_assert (target); + + repr.base.operands + = lendian32 (emit_operands (br->get_op (0), + &hsa_bb_for_bb (target)->m_label_ref)); + memset (&repr.reserved, 0, sizeof (repr.reserved)); + + brig_code.add (&repr, sizeof (repr)); + brig_insn_count++; +} + +/* Emit an HSA unconditional jump branching instruction that points to + a label REFERENCE. */ + +static void +emit_unconditional_jump (hsa_op_code_ref *reference) +{ + struct BrigInstBr repr; + + repr.base.base.byteCount = lendian16 (sizeof (repr)); + repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR); + repr.base.opcode = lendian16 (BRIG_OPCODE_BR); + repr.base.type = lendian16 (BRIG_TYPE_NONE); + /* Direct branches to labels must be width(all). */ + repr.width = BRIG_WIDTH_ALL; + + repr.base.operands = lendian32 (emit_operands (reference)); + memset (&repr.reserved, 0, sizeof (repr.reserved)); + brig_code.add (&repr, sizeof (repr)); + brig_insn_count++; +} + +/* Emit an HSA switch jump instruction that uses a jump table to + jump to a destination label. */ + +static void +emit_switch_insn (hsa_insn_sbr *sbr) +{ + struct BrigInstBr repr; + + gcc_assert (sbr->m_opcode == BRIG_OPCODE_SBR); + repr.base.base.byteCount = lendian16 (sizeof (repr)); + repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR); + repr.base.opcode = lendian16 (sbr->m_opcode); + repr.width = BRIG_WIDTH_1; + /* For Conditional jumps the type is always B1. */ + hsa_op_reg *index = as_a <hsa_op_reg *> (sbr->get_op (0)); + repr.base.type = lendian16 (index->m_type); + repr.base.operands + = lendian32 (emit_operands (sbr->get_op (0), sbr->m_label_code_list)); + memset (&repr.reserved, 0, sizeof (repr.reserved)); + + brig_code.add (&repr, sizeof (repr)); + brig_insn_count++; + + /* Emit jump to default label. */ + hsa_bb *hbb = hsa_bb_for_bb (sbr->m_default_bb); + emit_unconditional_jump (&hbb->m_label_ref); +} + +/* Emit a HSA convert instruction and all necessary directives, schedule + necessary operands for writing. */ + +static void +emit_cvt_insn (hsa_insn_cvt *insn) +{ + struct BrigInstCvt repr; + BrigType16_t srctype; + + repr.base.base.byteCount = lendian16 (sizeof (repr)); + repr.base.base.kind = lendian16 (BRIG_KIND_INST_CVT); + repr.base.opcode = lendian16 (insn->m_opcode); + repr.base.type = lendian16 (insn->m_type); + repr.base.operands = lendian32 (emit_insn_operands (insn)); + + if (is_a <hsa_op_reg *> (insn->get_op (1))) + srctype = as_a <hsa_op_reg *> (insn->get_op (1))->m_type; + else + srctype = as_a <hsa_op_immed *> (insn->get_op (1))->m_type; + repr.sourceType = lendian16 (srctype); + repr.modifier = 0; + /* float to smaller float requires a rounding setting (we default + to 'near'. */ + if (hsa_type_float_p (insn->m_type) + && (!hsa_type_float_p (srctype) + || ((insn->m_type & BRIG_TYPE_BASE_MASK) + < (srctype & BRIG_TYPE_BASE_MASK)))) + repr.round = BRIG_ROUND_FLOAT_NEAR_EVEN; + else if (hsa_type_integer_p (insn->m_type) && + hsa_type_float_p (srctype)) + repr.round = BRIG_ROUND_INTEGER_ZERO; + else + repr.round = BRIG_ROUND_NONE; + brig_code.add (&repr, sizeof (repr)); + brig_insn_count++; +} + +/* Emit call instruction INSN, where this instruction must be closed + within a call block instruction. */ + +static void +emit_call_insn (hsa_insn_call *call) +{ + struct BrigInstBr repr; + + repr.base.base.byteCount = lendian16 (sizeof (repr)); + repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR); + repr.base.opcode = lendian16 (BRIG_OPCODE_CALL); + repr.base.type = lendian16 (BRIG_TYPE_NONE); + + repr.base.operands + = lendian32 (emit_operands (call->m_result_code_list, &call->m_func, + call->m_args_code_list)); + + /* Internal functions have not set m_called_function. */ + if (call->m_called_function) + { + function_linkage_pair pair (call->m_called_function, + call->m_func.m_brig_op_offset); + function_call_linkage.safe_push (pair); + } + else + { + hsa_internal_fn *slot + = hsa_emitted_internal_decls->find (call->m_called_internal_fn); + gcc_assert (slot); + gcc_assert (slot->m_offset > 0); + call->m_func.m_directive_offset = slot->m_offset; + } + + repr.width = BRIG_WIDTH_ALL; + memset (&repr.reserved, 0, sizeof (repr.reserved)); + + brig_code.add (&repr, sizeof (repr)); + brig_insn_count++; +} + +/* Emit argument block directive. */ + +static void +emit_arg_block_insn (hsa_insn_arg_block *insn) +{ + switch (insn->m_kind) + { + case BRIG_KIND_DIRECTIVE_ARG_BLOCK_START: + { + struct BrigDirectiveArgBlock repr; + repr.base.byteCount = lendian16 (sizeof (repr)); + repr.base.kind = lendian16 (insn->m_kind); + brig_code.add (&repr, sizeof (repr)); + + for (unsigned i = 0; i < insn->m_call_insn->m_input_args.length (); i++) + { + insn->m_call_insn->m_args_code_list->m_offsets[i] + = lendian32 (emit_directive_variable + (insn->m_call_insn->m_input_args[i])); + brig_insn_count++; + } + + if (insn->m_call_insn->m_output_arg) + { + insn->m_call_insn->m_result_code_list->m_offsets[0] + = lendian32 (emit_directive_variable + (insn->m_call_insn->m_output_arg)); + brig_insn_count++; + } + + break; + } + case BRIG_KIND_DIRECTIVE_ARG_BLOCK_END: + { + struct BrigDirectiveArgBlock repr; + repr.base.byteCount = lendian16 (sizeof (repr)); + repr.base.kind = lendian16 (insn->m_kind); + brig_code.add (&repr, sizeof (repr)); + break; + } + default: + gcc_unreachable (); + } + + brig_insn_count++; +} + +/* Emit comment directive. */ + +static void +emit_comment_insn (hsa_insn_comment *insn) +{ + struct BrigDirectiveComment repr; + memset (&repr, 0, sizeof (repr)); + + repr.base.byteCount = lendian16 (sizeof (repr)); + repr.base.kind = lendian16 (insn->m_opcode); + repr.name = brig_emit_string (insn->m_comment, '\0', false); + brig_code.add (&repr, sizeof (repr)); +} + +/* Emit queue instruction INSN. */ + +static void +emit_queue_insn (hsa_insn_queue *insn) +{ + BrigInstQueue repr; + memset (&repr, 0, sizeof (repr)); + + repr.base.base.byteCount = lendian16 (sizeof (repr)); + repr.base.base.kind = lendian16 (BRIG_KIND_INST_QUEUE); + repr.base.opcode = lendian16 (insn->m_opcode); + repr.base.type = lendian16 (insn->m_type); + repr.segment = BRIG_SEGMENT_GLOBAL; + repr.memoryOrder = BRIG_MEMORY_ORDER_SC_RELEASE; + repr.base.operands = lendian32 (emit_insn_operands (insn)); + brig_data.round_size_up (4); + brig_code.add (&repr, sizeof (repr)); + + brig_insn_count++; +} + +/* Emit source type instruction INSN. */ + +static void +emit_srctype_insn (hsa_insn_srctype *insn) +{ + /* We assume that BrigInstMod has a BrigInstBasic prefix. */ + struct BrigInstSourceType repr; + unsigned operand_count = insn->operand_count (); + gcc_checking_assert (operand_count >= 2); + + memset (&repr, 0, sizeof (repr)); + repr.sourceType = lendian16 (insn->m_source_type); + repr.base.base.byteCount = lendian16 (sizeof (repr)); + repr.base.base.kind = lendian16 (BRIG_KIND_INST_SOURCE_TYPE); + repr.base.opcode = lendian16 (insn->m_opcode); + repr.base.type = lendian16 (insn->m_type); + + repr.base.operands = lendian32 (emit_insn_operands (insn)); + brig_code.add (&repr, sizeof (struct BrigInstSourceType)); + brig_insn_count++; +} + +/* Emit packed instruction INSN. */ + +static void +emit_packed_insn (hsa_insn_packed *insn) +{ + /* We assume that BrigInstMod has a BrigInstBasic prefix. */ + struct BrigInstSourceType repr; + unsigned operand_count = insn->operand_count (); + gcc_checking_assert (operand_count >= 2); + + memset (&repr, 0, sizeof (repr)); + repr.sourceType = lendian16 (insn->m_source_type); + repr.base.base.byteCount = lendian16 (sizeof (repr)); + repr.base.base.kind = lendian16 (BRIG_KIND_INST_SOURCE_TYPE); + repr.base.opcode = lendian16 (insn->m_opcode); + repr.base.type = lendian16 (insn->m_type); + + if (insn->m_opcode == BRIG_OPCODE_COMBINE) + { + /* Create operand list for packed type. */ + for (unsigned i = 1; i < operand_count; i++) + { + gcc_checking_assert (insn->get_op (i)); + insn->m_operand_list->m_offsets[i - 1] + = lendian32 (enqueue_op (insn->get_op (i))); + } + + repr.base.operands = lendian32 (emit_operands (insn->get_op (0), + insn->m_operand_list)); + } + else if (insn->m_opcode == BRIG_OPCODE_EXPAND) + { + /* Create operand list for packed type. */ + for (unsigned i = 0; i < operand_count - 1; i++) + { + gcc_checking_assert (insn->get_op (i)); + insn->m_operand_list->m_offsets[i] + = lendian32 (enqueue_op (insn->get_op (i))); + } + + unsigned ops = emit_operands (insn->m_operand_list, + insn->get_op (insn->operand_count () - 1)); + repr.base.operands = lendian32 (ops); + } + + + brig_code.add (&repr, sizeof (struct BrigInstSourceType)); + brig_insn_count++; +} + +/* Emit a basic HSA instruction and all necessary directives, schedule + necessary operands for writing. */ + +static void +emit_basic_insn (hsa_insn_basic *insn) +{ + /* We assume that BrigInstMod has a BrigInstBasic prefix. */ + struct BrigInstMod repr; + BrigType16_t type; + + memset (&repr, 0, sizeof (repr)); + repr.base.base.byteCount = lendian16 (sizeof (BrigInstBasic)); + repr.base.base.kind = lendian16 (BRIG_KIND_INST_BASIC); + repr.base.opcode = lendian16 (insn->m_opcode); + switch (insn->m_opcode) + { + /* And the bit-logical operations need bit types and whine about + arithmetic types :-/ */ + case BRIG_OPCODE_AND: + case BRIG_OPCODE_OR: + case BRIG_OPCODE_XOR: + case BRIG_OPCODE_NOT: + type = regtype_for_type (insn->m_type); + break; + default: + type = insn->m_type; + break; + } + repr.base.type = lendian16 (type); + repr.base.operands = lendian32 (emit_insn_operands (insn)); + + if ((type & BRIG_TYPE_PACK_MASK) != BRIG_TYPE_PACK_NONE) + { + if (hsa_type_float_p (type) + && !hsa_opcode_floating_bit_insn_p (insn->m_opcode)) + repr.round = BRIG_ROUND_FLOAT_NEAR_EVEN; + else + repr.round = 0; + /* We assume that destination and sources agree in packing layout. */ + if (insn->num_used_ops () >= 2) + repr.pack = BRIG_PACK_PP; + else + repr.pack = BRIG_PACK_P; + repr.reserved = 0; + repr.base.base.byteCount = lendian16 (sizeof (BrigInstMod)); + repr.base.base.kind = lendian16 (BRIG_KIND_INST_MOD); + brig_code.add (&repr, sizeof (struct BrigInstMod)); + } + else + brig_code.add (&repr, sizeof (struct BrigInstBasic)); + brig_insn_count++; +} + +/* Emit an HSA instruction and all necessary directives, schedule necessary + operands for writing. */ + +static void +emit_insn (hsa_insn_basic *insn) +{ + gcc_assert (!is_a <hsa_insn_phi *> (insn)); + + insn->m_brig_offset = brig_code.total_size; + + if (hsa_insn_signal *signal = dyn_cast <hsa_insn_signal *> (insn)) + emit_signal_insn (signal); + else if (hsa_insn_atomic *atom = dyn_cast <hsa_insn_atomic *> (insn)) + emit_atomic_insn (atom); + else if (hsa_insn_mem *mem = dyn_cast <hsa_insn_mem *> (insn)) + emit_memory_insn (mem); + else if (insn->m_opcode == BRIG_OPCODE_LDA) + emit_addr_insn (insn); + else if (hsa_insn_seg *seg = dyn_cast <hsa_insn_seg *> (insn)) + emit_segment_insn (seg); + else if (hsa_insn_cmp *cmp = dyn_cast <hsa_insn_cmp *> (insn)) + emit_cmp_insn (cmp); + else if (hsa_insn_br *br = dyn_cast <hsa_insn_br *> (insn)) + emit_branch_insn (br); + else if (hsa_insn_sbr *sbr = dyn_cast <hsa_insn_sbr *> (insn)) + { + if (switch_instructions == NULL) + switch_instructions = new vec <hsa_insn_sbr *> (); + + switch_instructions->safe_push (sbr); + emit_switch_insn (sbr); + } + else if (hsa_insn_arg_block *block = dyn_cast <hsa_insn_arg_block *> (insn)) + emit_arg_block_insn (block); + else if (hsa_insn_call *call = dyn_cast <hsa_insn_call *> (insn)) + emit_call_insn (call); + else if (hsa_insn_comment *comment = dyn_cast <hsa_insn_comment *> (insn)) + emit_comment_insn (comment); + else if (hsa_insn_queue *queue = dyn_cast <hsa_insn_queue *> (insn)) + emit_queue_insn (queue); + else if (hsa_insn_srctype *srctype = dyn_cast <hsa_insn_srctype *> (insn)) + emit_srctype_insn (srctype); + else if (hsa_insn_packed *packed = dyn_cast <hsa_insn_packed *> (insn)) + emit_packed_insn (packed); + else if (hsa_insn_cvt *cvt = dyn_cast <hsa_insn_cvt *> (insn)) + emit_cvt_insn (cvt); + else if (hsa_insn_alloca *alloca = dyn_cast <hsa_insn_alloca *> (insn)) + emit_alloca_insn (alloca); + else + emit_basic_insn (insn); +} + +/* We have just finished emitting BB and are about to emit NEXT_BB if non-NULL, + or we are about to finish emitting code, if it is NULL. If the fall through + edge from BB does not lead to NEXT_BB, emit an unconditional jump. */ + +static void +perhaps_emit_branch (basic_block bb, basic_block next_bb) +{ + basic_block t_bb = NULL, ff = NULL; + + edge_iterator ei; + edge e; + + /* If the last instruction of BB is a switch, ignore emission of all + edges. */ + if (hsa_bb_for_bb (bb)->m_last_insn + && is_a <hsa_insn_sbr *> (hsa_bb_for_bb (bb)->m_last_insn)) + return; + + FOR_EACH_EDGE (e, ei, bb->succs) + if (e->flags & EDGE_TRUE_VALUE) + { + gcc_assert (!t_bb); + t_bb = e->dest; + } + else + { + gcc_assert (!ff); + ff = e->dest; + } + + if (!ff || ff == next_bb || ff == EXIT_BLOCK_PTR_FOR_FN (cfun)) + return; + + emit_unconditional_jump (&hsa_bb_for_bb (ff)->m_label_ref); +} + +/* Emit the a function with name NAME to the various brig sections. */ + +void +hsa_brig_emit_function (void) +{ + basic_block bb, prev_bb; + hsa_insn_basic *insn; + BrigDirectiveExecutable *ptr_to_fndir; + + brig_init (); + + brig_insn_count = 0; + memset (&op_queue, 0, sizeof (op_queue)); + op_queue.projected_size = brig_operand.total_size; + + if (!function_offsets) + function_offsets = new hash_map<tree, BrigCodeOffset32_t> (); + + if (!emitted_declarations) + emitted_declarations = new hash_map <tree, BrigDirectiveExecutable *> (); + + for (unsigned i = 0; i < hsa_cfun->m_called_functions.length (); i++) + { + tree called = hsa_cfun->m_called_functions[i]; + + /* If the function has no definition, emit a declaration. */ + if (!emitted_declarations->get (called)) + { + BrigDirectiveExecutable *e = emit_function_declaration (called); + emitted_declarations->put (called, e); + } + } + + for (unsigned i = 0; i < hsa_cfun->m_called_internal_fns.length (); i++) + { + hsa_internal_fn *called = hsa_cfun->m_called_internal_fns[i]; + emit_internal_fn_decl (called); + } + + ptr_to_fndir = emit_function_directives (hsa_cfun, false); + for (insn = hsa_bb_for_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun))->m_first_insn; + insn; + insn = insn->m_next) + emit_insn (insn); + prev_bb = ENTRY_BLOCK_PTR_FOR_FN (cfun); + FOR_EACH_BB_FN (bb, cfun) + { + perhaps_emit_branch (prev_bb, bb); + emit_bb_label_directive (hsa_bb_for_bb (bb)); + for (insn = hsa_bb_for_bb (bb)->m_first_insn; insn; insn = insn->m_next) + emit_insn (insn); + prev_bb = bb; + } + perhaps_emit_branch (prev_bb, NULL); + ptr_to_fndir->nextModuleEntry = brig_code.total_size; + + /* Fill up label references for all sbr instructions. */ + if (switch_instructions) + { + for (unsigned i = 0; i < switch_instructions->length (); i++) + { + hsa_insn_sbr *sbr = (*switch_instructions)[i]; + for (unsigned j = 0; j < sbr->m_jump_table.length (); j++) + { + hsa_bb *hbb = hsa_bb_for_bb (sbr->m_jump_table[j]); + sbr->m_label_code_list->m_offsets[j] + = hbb->m_label_ref.m_directive_offset; + } + } + + switch_instructions->release (); + delete switch_instructions; + switch_instructions = NULL; + } + + if (dump_file) + { + fprintf (dump_file, "------- After BRIG emission: -------\n"); + dump_hsa_cfun (dump_file); + } + + emit_queued_operands (); +} + +/* Emit all OMP symbols related to OMP. */ + +void +hsa_brig_emit_omp_symbols (void) +{ + brig_init (); + emit_directive_variable (hsa_num_threads); +} + +static GTY(()) tree hsa_cdtor_statements[2]; + +/* Create and return __hsa_global_variables symbol that contains + all informations consumed by libgomp to link global variables + with their string names used by an HSA kernel. */ + +static tree +hsa_output_global_variables () +{ + unsigned l = hsa_global_variable_symbols->elements (); + + tree variable_info_type = make_node (RECORD_TYPE); + tree id_f1 = build_decl (BUILTINS_LOCATION, FIELD_DECL, + get_identifier ("name"), ptr_type_node); + DECL_CHAIN (id_f1) = NULL_TREE; + tree id_f2 = build_decl (BUILTINS_LOCATION, FIELD_DECL, + get_identifier ("omp_data_size"), + ptr_type_node); + DECL_CHAIN (id_f2) = id_f1; + finish_builtin_struct (variable_info_type, "__hsa_variable_info", id_f2, + NULL_TREE); + + tree int_num_of_global_vars; + int_num_of_global_vars = build_int_cst (uint32_type_node, l); + tree global_vars_num_index_type = build_index_type (int_num_of_global_vars); + tree global_vars_array_type = build_array_type (variable_info_type, + global_vars_num_index_type); + TYPE_ARTIFICIAL (global_vars_array_type) = 1; + + vec<constructor_elt, va_gc> *global_vars_vec = NULL; + + for (hash_table <hsa_noop_symbol_hasher>::iterator it + = hsa_global_variable_symbols->begin (); + it != hsa_global_variable_symbols->end (); ++it) + { + unsigned len = strlen ((*it)->m_name); + char *copy = XNEWVEC (char, len + 2); + copy[0] = '&'; + memcpy (copy + 1, (*it)->m_name, len); + copy[len + 1] = '\0'; + len++; + hsa_sanitize_name (copy); + + tree var_name = build_string (len, copy); + TREE_TYPE (var_name) + = build_array_type (char_type_node, build_index_type (size_int (len))); + free (copy); + + vec<constructor_elt, va_gc> *variable_info_vec = NULL; + CONSTRUCTOR_APPEND_ELT (variable_info_vec, NULL_TREE, + build1 (ADDR_EXPR, + build_pointer_type (TREE_TYPE (var_name)), + var_name)); + CONSTRUCTOR_APPEND_ELT (variable_info_vec, NULL_TREE, + build_fold_addr_expr ((*it)->m_decl)); + + tree variable_info_ctor = build_constructor (variable_info_type, + variable_info_vec); + + CONSTRUCTOR_APPEND_ELT (global_vars_vec, NULL_TREE, + variable_info_ctor); + } + + tree global_vars_ctor = build_constructor (global_vars_array_type, + global_vars_vec); + + char tmp_name[64]; + ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_global_variables", 1); + tree global_vars_table = build_decl (UNKNOWN_LOCATION, VAR_DECL, + get_identifier (tmp_name), + global_vars_array_type); + TREE_STATIC (global_vars_table) = 1; + TREE_READONLY (global_vars_table) = 1; + TREE_PUBLIC (global_vars_table) = 0; + DECL_ARTIFICIAL (global_vars_table) = 1; + DECL_IGNORED_P (global_vars_table) = 1; + DECL_EXTERNAL (global_vars_table) = 0; + TREE_CONSTANT (global_vars_table) = 1; + DECL_INITIAL (global_vars_table) = global_vars_ctor; + varpool_node::finalize_decl (global_vars_table); + + return global_vars_table; +} + +/* Create __hsa_host_functions and __hsa_kernels that contain + all informations consumed by libgomp to register all kernels + in the BRIG binary. */ + +static void +hsa_output_kernels (tree *host_func_table, tree *kernels) +{ + unsigned map_count = hsa_get_number_decl_kernel_mappings (); + + tree int_num_of_kernels; + int_num_of_kernels = build_int_cst (uint32_type_node, map_count); + tree kernel_num_index_type = build_index_type (int_num_of_kernels); + tree host_functions_array_type = build_array_type (ptr_type_node, + kernel_num_index_type); + TYPE_ARTIFICIAL (host_functions_array_type) = 1; + + vec<constructor_elt, va_gc> *host_functions_vec = NULL; + for (unsigned i = 0; i < map_count; ++i) + { + tree decl = hsa_get_decl_kernel_mapping_decl (i); + tree host_fn = build_fold_addr_expr (hsa_get_host_function (decl)); + CONSTRUCTOR_APPEND_ELT (host_functions_vec, NULL_TREE, host_fn); + } + tree host_functions_ctor = build_constructor (host_functions_array_type, + host_functions_vec); + char tmp_name[64]; + ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_host_functions", 1); + tree hsa_host_func_table = build_decl (UNKNOWN_LOCATION, VAR_DECL, + get_identifier (tmp_name), + host_functions_array_type); + TREE_STATIC (hsa_host_func_table) = 1; + TREE_READONLY (hsa_host_func_table) = 1; + TREE_PUBLIC (hsa_host_func_table) = 0; + DECL_ARTIFICIAL (hsa_host_func_table) = 1; + DECL_IGNORED_P (hsa_host_func_table) = 1; + DECL_EXTERNAL (hsa_host_func_table) = 0; + TREE_CONSTANT (hsa_host_func_table) = 1; + DECL_INITIAL (hsa_host_func_table) = host_functions_ctor; + varpool_node::finalize_decl (hsa_host_func_table); + *host_func_table = hsa_host_func_table; + + /* Following code emits list of kernel_info structures. */ + + tree kernel_info_type = make_node (RECORD_TYPE); + tree id_f1 = build_decl (BUILTINS_LOCATION, FIELD_DECL, + get_identifier ("name"), ptr_type_node); + DECL_CHAIN (id_f1) = NULL_TREE; + tree id_f2 = build_decl (BUILTINS_LOCATION, FIELD_DECL, + get_identifier ("omp_data_size"), + unsigned_type_node); + DECL_CHAIN (id_f2) = id_f1; + tree id_f3 = build_decl (BUILTINS_LOCATION, FIELD_DECL, + get_identifier ("gridified_kernel_p"), + boolean_type_node); + DECL_CHAIN (id_f3) = id_f2; + tree id_f4 = build_decl (BUILTINS_LOCATION, FIELD_DECL, + get_identifier ("kernel_dependencies_count"), + unsigned_type_node); + DECL_CHAIN (id_f4) = id_f3; + tree id_f5 = build_decl (BUILTINS_LOCATION, FIELD_DECL, + get_identifier ("kernel_dependencies"), + build_pointer_type (build_pointer_type + (char_type_node))); + DECL_CHAIN (id_f5) = id_f4; + finish_builtin_struct (kernel_info_type, "__hsa_kernel_info", id_f5, + NULL_TREE); + + int_num_of_kernels = build_int_cstu (uint32_type_node, map_count); + tree kernel_info_vector_type + = build_array_type (kernel_info_type, + build_index_type (int_num_of_kernels)); + TYPE_ARTIFICIAL (kernel_info_vector_type) = 1; + + vec<constructor_elt, va_gc> *kernel_info_vector_vec = NULL; + tree kernel_dependencies_vector_type = NULL; + + for (unsigned i = 0; i < map_count; ++i) + { + tree kernel = hsa_get_decl_kernel_mapping_decl (i); + char *name = hsa_get_decl_kernel_mapping_name (i); + unsigned len = strlen (name); + char *copy = XNEWVEC (char, len + 2); + copy[0] = '&'; + memcpy (copy + 1, name, len); + copy[len + 1] = '\0'; + len++; + + tree kern_name = build_string (len, copy); + TREE_TYPE (kern_name) + = build_array_type (char_type_node, build_index_type (size_int (len))); + free (copy); + + unsigned omp_size = hsa_get_decl_kernel_mapping_omp_size (i); + tree omp_data_size = build_int_cstu (unsigned_type_node, omp_size); + bool gridified_kernel_p = hsa_get_decl_kernel_mapping_gridified (i); + tree gridified_kernel_p_tree = build_int_cstu (boolean_type_node, + gridified_kernel_p); + unsigned count = 0; + + kernel_dependencies_vector_type + = build_array_type (build_pointer_type (char_type_node), + build_index_type (size_int (0))); + + vec<constructor_elt, va_gc> *kernel_dependencies_vec = NULL; + if (hsa_decl_kernel_dependencies) + { + vec<const char *> **slot; + slot = hsa_decl_kernel_dependencies->get (kernel); + if (slot) + { + vec <const char *> *dependencies = *slot; + count = dependencies->length (); + + kernel_dependencies_vector_type + = build_array_type (build_pointer_type (char_type_node), + build_index_type (size_int (count))); + TYPE_ARTIFICIAL (kernel_dependencies_vector_type) = 1; + + for (unsigned j = 0; j < count; j++) + { + const char *d = (*dependencies)[j]; + len = strlen (d); + tree dependency_name = build_string (len, d); + TREE_TYPE (dependency_name) + = build_array_type (char_type_node, + build_index_type (size_int (len))); + + CONSTRUCTOR_APPEND_ELT + (kernel_dependencies_vec, NULL_TREE, + build1 (ADDR_EXPR, + build_pointer_type (TREE_TYPE (dependency_name)), + dependency_name)); + } + } + } + + tree dependencies_count = build_int_cstu (unsigned_type_node, count); + + vec<constructor_elt, va_gc> *kernel_info_vec = NULL; + CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, + build1 (ADDR_EXPR, + build_pointer_type (TREE_TYPE + (kern_name)), + kern_name)); + CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, omp_data_size); + CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, + gridified_kernel_p_tree); + CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, dependencies_count); + + if (count > 0) + { + ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_dependencies_list", i); + tree dependencies_list = build_decl (UNKNOWN_LOCATION, VAR_DECL, + get_identifier (tmp_name), + kernel_dependencies_vector_type); + + TREE_STATIC (dependencies_list) = 1; + TREE_READONLY (dependencies_list) = 1; + TREE_PUBLIC (dependencies_list) = 0; + DECL_ARTIFICIAL (dependencies_list) = 1; + DECL_IGNORED_P (dependencies_list) = 1; + DECL_EXTERNAL (dependencies_list) = 0; + TREE_CONSTANT (dependencies_list) = 1; + DECL_INITIAL (dependencies_list) + = build_constructor (kernel_dependencies_vector_type, + kernel_dependencies_vec); + varpool_node::finalize_decl (dependencies_list); + + CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, + build1 (ADDR_EXPR, + build_pointer_type + (TREE_TYPE (dependencies_list)), + dependencies_list)); + } + else + CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, null_pointer_node); + + tree kernel_info_ctor = build_constructor (kernel_info_type, + kernel_info_vec); + + CONSTRUCTOR_APPEND_ELT (kernel_info_vector_vec, NULL_TREE, + kernel_info_ctor); + } + + ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_kernels", 1); + tree hsa_kernels = build_decl (UNKNOWN_LOCATION, VAR_DECL, + get_identifier (tmp_name), + kernel_info_vector_type); + + TREE_STATIC (hsa_kernels) = 1; + TREE_READONLY (hsa_kernels) = 1; + TREE_PUBLIC (hsa_kernels) = 0; + DECL_ARTIFICIAL (hsa_kernels) = 1; + DECL_IGNORED_P (hsa_kernels) = 1; + DECL_EXTERNAL (hsa_kernels) = 0; + TREE_CONSTANT (hsa_kernels) = 1; + DECL_INITIAL (hsa_kernels) = build_constructor (kernel_info_vector_type, + kernel_info_vector_vec); + varpool_node::finalize_decl (hsa_kernels); + *kernels = hsa_kernels; +} + +/* Create a static constructor that will register out brig stuff with + libgomp. */ + +static void +hsa_output_libgomp_mapping (tree brig_decl) +{ + unsigned kernel_count = hsa_get_number_decl_kernel_mappings (); + unsigned global_variable_count = hsa_global_variable_symbols->elements (); + + tree kernels; + tree host_func_table; + + hsa_output_kernels (&host_func_table, &kernels); + tree global_vars = hsa_output_global_variables (); + + tree hsa_image_desc_type = make_node (RECORD_TYPE); + tree id_f1 = build_decl (BUILTINS_LOCATION, FIELD_DECL, + get_identifier ("brig_module"), ptr_type_node); + DECL_CHAIN (id_f1) = NULL_TREE; + tree id_f2 = build_decl (BUILTINS_LOCATION, FIELD_DECL, + get_identifier ("kernel_count"), + unsigned_type_node); + + DECL_CHAIN (id_f2) = id_f1; + tree id_f3 = build_decl (BUILTINS_LOCATION, FIELD_DECL, + get_identifier ("hsa_kernel_infos"), + ptr_type_node); + DECL_CHAIN (id_f3) = id_f2; + tree id_f4 = build_decl (BUILTINS_LOCATION, FIELD_DECL, + get_identifier ("global_variable_count"), + unsigned_type_node); + DECL_CHAIN (id_f4) = id_f3; + tree id_f5 = build_decl (BUILTINS_LOCATION, FIELD_DECL, + get_identifier ("hsa_global_variable_infos"), + ptr_type_node); + DECL_CHAIN (id_f5) = id_f4; + finish_builtin_struct (hsa_image_desc_type, "__hsa_image_desc", id_f5, + NULL_TREE); + TYPE_ARTIFICIAL (hsa_image_desc_type) = 1; + + vec<constructor_elt, va_gc> *img_desc_vec = NULL; + CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE, + build_fold_addr_expr (brig_decl)); + CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE, + build_int_cstu (unsigned_type_node, kernel_count)); + CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE, + build1 (ADDR_EXPR, + build_pointer_type (TREE_TYPE (kernels)), + kernels)); + CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE, + build_int_cstu (unsigned_type_node, + global_variable_count)); + CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE, + build1 (ADDR_EXPR, + build_pointer_type (TREE_TYPE (global_vars)), + global_vars)); + + tree img_desc_ctor = build_constructor (hsa_image_desc_type, img_desc_vec); + + char tmp_name[64]; + ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_img_descriptor", 1); + tree hsa_img_descriptor = build_decl (UNKNOWN_LOCATION, VAR_DECL, + get_identifier (tmp_name), + hsa_image_desc_type); + TREE_STATIC (hsa_img_descriptor) = 1; + TREE_READONLY (hsa_img_descriptor) = 1; + TREE_PUBLIC (hsa_img_descriptor) = 0; + DECL_ARTIFICIAL (hsa_img_descriptor) = 1; + DECL_IGNORED_P (hsa_img_descriptor) = 1; + DECL_EXTERNAL (hsa_img_descriptor) = 0; + TREE_CONSTANT (hsa_img_descriptor) = 1; + DECL_INITIAL (hsa_img_descriptor) = img_desc_ctor; + varpool_node::finalize_decl (hsa_img_descriptor); + + /* Construct the "host_table" libgomp expects. */ + tree index_type = build_index_type (build_int_cst (integer_type_node, 4)); + tree libgomp_host_table_type = build_array_type (ptr_type_node, index_type); + TYPE_ARTIFICIAL (libgomp_host_table_type) = 1; + vec<constructor_elt, va_gc> *libgomp_host_table_vec = NULL; + tree host_func_table_addr = build_fold_addr_expr (host_func_table); + CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE, + host_func_table_addr); + offset_int func_table_size + = wi::to_offset (TYPE_SIZE_UNIT (ptr_type_node)) * kernel_count; + CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE, + fold_build2 (POINTER_PLUS_EXPR, + TREE_TYPE (host_func_table_addr), + host_func_table_addr, + build_int_cst (size_type_node, + func_table_size.to_uhwi + ()))); + CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE, null_pointer_node); + CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE, null_pointer_node); + tree libgomp_host_table_ctor = build_constructor (libgomp_host_table_type, + libgomp_host_table_vec); + ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_libgomp_host_table", 1); + tree hsa_libgomp_host_table = build_decl (UNKNOWN_LOCATION, VAR_DECL, + get_identifier (tmp_name), + libgomp_host_table_type); + + TREE_STATIC (hsa_libgomp_host_table) = 1; + TREE_READONLY (hsa_libgomp_host_table) = 1; + TREE_PUBLIC (hsa_libgomp_host_table) = 0; + DECL_ARTIFICIAL (hsa_libgomp_host_table) = 1; + DECL_IGNORED_P (hsa_libgomp_host_table) = 1; + DECL_EXTERNAL (hsa_libgomp_host_table) = 0; + TREE_CONSTANT (hsa_libgomp_host_table) = 1; + DECL_INITIAL (hsa_libgomp_host_table) = libgomp_host_table_ctor; + varpool_node::finalize_decl (hsa_libgomp_host_table); + + /* Generate an initializer with a call to the registration routine. */ + + tree offload_register + = builtin_decl_explicit (BUILT_IN_GOMP_OFFLOAD_REGISTER); + gcc_checking_assert (offload_register); + + append_to_statement_list + (build_call_expr (offload_register, 4, + build_int_cstu (unsigned_type_node, + GOMP_VERSION_PACK (GOMP_VERSION, + GOMP_VERSION_HSA)), + build_fold_addr_expr (hsa_libgomp_host_table), + build_int_cst (integer_type_node, GOMP_DEVICE_HSA), + build_fold_addr_expr (hsa_img_descriptor)), + &hsa_cdtor_statements[0]); + + cgraph_build_static_cdtor ('I', hsa_cdtor_statements[0], + DEFAULT_INIT_PRIORITY); + + tree offload_unregister + = builtin_decl_explicit (BUILT_IN_GOMP_OFFLOAD_UNREGISTER); + gcc_checking_assert (offload_unregister); + + append_to_statement_list + (build_call_expr (offload_unregister, 4, + build_int_cstu (unsigned_type_node, + GOMP_VERSION_PACK (GOMP_VERSION, + GOMP_VERSION_HSA)), + build_fold_addr_expr (hsa_libgomp_host_table), + build_int_cst (integer_type_node, GOMP_DEVICE_HSA), + build_fold_addr_expr (hsa_img_descriptor)), + &hsa_cdtor_statements[1]); + cgraph_build_static_cdtor ('D', hsa_cdtor_statements[1], + DEFAULT_INIT_PRIORITY); +} + +/* Emit the brig module we have compiled to a section in the final assembly and + also create a compile unit static constructor that will register the brig + module with libgomp. */ + +void +hsa_output_brig (void) +{ + section *saved_section; + + if (!brig_initialized) + return; + + for (unsigned i = 0; i < function_call_linkage.length (); i++) + { + function_linkage_pair p = function_call_linkage[i]; + + BrigCodeOffset32_t *func_offset = function_offsets->get (p.function_decl); + gcc_assert (*func_offset); + BrigOperandCodeRef *code_ref + = (BrigOperandCodeRef *) (brig_operand.get_ptr_by_offset (p.offset)); + gcc_assert (code_ref->base.kind == BRIG_KIND_OPERAND_CODE_REF); + code_ref->ref = lendian32 (*func_offset); + } + + /* Iterate all function declarations and if we meet a function that should + have module linkage and we are unable to emit HSAIL for the function, + then change the linkage to program linkage. Doing so, we will emit + a valid BRIG image. */ + if (hsa_failed_functions != NULL && emitted_declarations != NULL) + for (hash_map <tree, BrigDirectiveExecutable *>::iterator it + = emitted_declarations->begin (); + it != emitted_declarations->end (); + ++it) + { + if (hsa_failed_functions->contains ((*it).first)) + (*it).second->linkage = BRIG_LINKAGE_PROGRAM; + } + + saved_section = in_section; + + switch_to_section (get_section (BRIG_ELF_SECTION_NAME, SECTION_NOTYPE, NULL)); + char tmp_name[64]; + ASM_GENERATE_INTERNAL_LABEL (tmp_name, BRIG_LABEL_STRING, 1); + ASM_OUTPUT_LABEL (asm_out_file, tmp_name); + tree brig_id = get_identifier (tmp_name); + tree brig_decl = build_decl (UNKNOWN_LOCATION, VAR_DECL, brig_id, + char_type_node); + SET_DECL_ASSEMBLER_NAME (brig_decl, brig_id); + TREE_ADDRESSABLE (brig_decl) = 1; + TREE_READONLY (brig_decl) = 1; + DECL_ARTIFICIAL (brig_decl) = 1; + DECL_IGNORED_P (brig_decl) = 1; + TREE_STATIC (brig_decl) = 1; + TREE_PUBLIC (brig_decl) = 0; + TREE_USED (brig_decl) = 1; + DECL_INITIAL (brig_decl) = brig_decl; + TREE_ASM_WRITTEN (brig_decl) = 1; + + BrigModuleHeader module_header; + memcpy (&module_header.identification, "HSA BRIG", + sizeof (module_header.identification)); + module_header.brigMajor = lendian32 (BRIG_VERSION_BRIG_MAJOR); + module_header.brigMinor = lendian32 (BRIG_VERSION_BRIG_MINOR); + uint64_t section_index[3]; + + int data_padding, code_padding, operand_padding; + data_padding = HSA_SECTION_ALIGNMENT + - brig_data.total_size % HSA_SECTION_ALIGNMENT; + code_padding = HSA_SECTION_ALIGNMENT + - brig_code.total_size % HSA_SECTION_ALIGNMENT; + operand_padding = HSA_SECTION_ALIGNMENT + - brig_operand.total_size % HSA_SECTION_ALIGNMENT; + + uint64_t module_size = sizeof (module_header) + + sizeof (section_index) + + brig_data.total_size + + data_padding + + brig_code.total_size + + code_padding + + brig_operand.total_size + + operand_padding; + gcc_assert ((module_size % 16) == 0); + module_header.byteCount = lendian64 (module_size); + memset (&module_header.hash, 0, sizeof (module_header.hash)); + module_header.reserved = 0; + module_header.sectionCount = lendian32 (3); + module_header.sectionIndex = lendian64 (sizeof (module_header)); + assemble_string ((const char *) &module_header, sizeof (module_header)); + uint64_t off = sizeof (module_header) + sizeof (section_index); + section_index[0] = lendian64 (off); + off += brig_data.total_size + data_padding; + section_index[1] = lendian64 (off); + off += brig_code.total_size + code_padding; + section_index[2] = lendian64 (off); + assemble_string ((const char *) §ion_index, sizeof (section_index)); + + char padding[HSA_SECTION_ALIGNMENT]; + memset (padding, 0, sizeof (padding)); + + brig_data.output (); + assemble_string (padding, data_padding); + brig_code.output (); + assemble_string (padding, code_padding); + brig_operand.output (); + assemble_string (padding, operand_padding); + + if (saved_section) + switch_to_section (saved_section); + + hsa_output_libgomp_mapping (brig_decl); + + hsa_free_decl_kernel_mapping (); + brig_release_data (); + hsa_deinit_compilation_unit_data (); + + delete emitted_declarations; + emitted_declarations = NULL; + delete function_offsets; + function_offsets = NULL; +} diff --git a/gcc/hsa-dump.c b/gcc/hsa-dump.c new file mode 100644 index 0000000..c5f1f69 --- /dev/null +++ b/gcc/hsa-dump.c @@ -0,0 +1,1189 @@ +/* Infrastructure to dump our HSAIL IL + Copyright (C) 2013-2016 Free Software Foundation, Inc. + Contributed by Martin Jambor <mjambor@suse.cz> and + Martin Liska <mliska@suse.cz>. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "is-a.h" +#include "vec.h" +#include "tree.h" +#include "cfg.h" +#include "function.h" +#include "dumpfile.h" +#include "gimple-pretty-print.h" +#include "cgraph.h" +#include "print-tree.h" +#include "symbol-summary.h" +#include "hsa.h" + +/* Return textual name of TYPE. */ + +static const char * +hsa_type_name (BrigType16_t type) +{ + switch (type) + { + case BRIG_TYPE_NONE: + return "none"; + case BRIG_TYPE_U8: + return "u8"; + case BRIG_TYPE_U16: + return "u16"; + case BRIG_TYPE_U32: + return "u32"; + case BRIG_TYPE_U64: + return "u64"; + case BRIG_TYPE_S8: + return "s8"; + case BRIG_TYPE_S16: + return "s16"; + case BRIG_TYPE_S32: + return "s32"; + case BRIG_TYPE_S64: + return "s64"; + case BRIG_TYPE_F16: + return "f16"; + case BRIG_TYPE_F32: + return "f32"; + case BRIG_TYPE_F64: + return "f64"; + case BRIG_TYPE_B1: + return "b1"; + case BRIG_TYPE_B8: + return "b8"; + case BRIG_TYPE_B16: + return "b16"; + case BRIG_TYPE_B32: + return "b32"; + case BRIG_TYPE_B64: + return "b64"; + case BRIG_TYPE_B128: + return "b128"; + case BRIG_TYPE_SAMP: + return "samp"; + case BRIG_TYPE_ROIMG: + return "roimg"; + case BRIG_TYPE_WOIMG: + return "woimg"; + case BRIG_TYPE_RWIMG: + return "rwimg"; + case BRIG_TYPE_SIG32: + return "sig32"; + case BRIG_TYPE_SIG64: + return "sig64"; + case BRIG_TYPE_U8X4: + return "u8x4"; + case BRIG_TYPE_U8X8: + return "u8x8"; + case BRIG_TYPE_U8X16: + return "u8x16"; + case BRIG_TYPE_U16X2: + return "u16x2"; + case BRIG_TYPE_U16X4: + return "u16x4"; + case BRIG_TYPE_U16X8: + return "u16x8"; + case BRIG_TYPE_U32X2: + return "u32x2"; + case BRIG_TYPE_U32X4: + return "u32x4"; + case BRIG_TYPE_U64X2: + return "u64x2"; + case BRIG_TYPE_S8X4: + return "s8x4"; + case BRIG_TYPE_S8X8: + return "s8x8"; + case BRIG_TYPE_S8X16: + return "s8x16"; + case BRIG_TYPE_S16X2: + return "s16x2"; + case BRIG_TYPE_S16X4: + return "s16x4"; + case BRIG_TYPE_S16X8: + return "s16x8"; + case BRIG_TYPE_S32X2: + return "s32x2"; + case BRIG_TYPE_S32X4: + return "s32x4"; + case BRIG_TYPE_S64X2: + return "s64x2"; + case BRIG_TYPE_F16X2: + return "f16x2"; + case BRIG_TYPE_F16X4: + return "f16x4"; + case BRIG_TYPE_F16X8: + return "f16x8"; + case BRIG_TYPE_F32X2: + return "f32x2"; + case BRIG_TYPE_F32X4: + return "f32x4"; + case BRIG_TYPE_F64X2: + return "f64x2"; + default: + return "UNKNOWN_TYPE"; + } +} + +/* Return textual name of OPCODE. */ + +static const char * +hsa_opcode_name (BrigOpcode16_t opcode) +{ + switch (opcode) + { + case BRIG_OPCODE_NOP: + return "nop"; + case BRIG_OPCODE_ABS: + return "abs"; + case BRIG_OPCODE_ADD: + return "add"; + case BRIG_OPCODE_BORROW: + return "borrow"; + case BRIG_OPCODE_CARRY: + return "carry"; + case BRIG_OPCODE_CEIL: + return "ceil"; + case BRIG_OPCODE_COPYSIGN: + return "copysign"; + case BRIG_OPCODE_DIV: + return "div"; + case BRIG_OPCODE_FLOOR: + return "floor"; + case BRIG_OPCODE_FMA: + return "fma"; + case BRIG_OPCODE_FRACT: + return "fract"; + case BRIG_OPCODE_MAD: + return "mad"; + case BRIG_OPCODE_MAX: + return "max"; + case BRIG_OPCODE_MIN: + return "min"; + case BRIG_OPCODE_MUL: + return "mul"; + case BRIG_OPCODE_MULHI: + return "mulhi"; + case BRIG_OPCODE_NEG: + return "neg"; + case BRIG_OPCODE_REM: + return "rem"; + case BRIG_OPCODE_RINT: + return "rint"; + case BRIG_OPCODE_SQRT: + return "sqrt"; + case BRIG_OPCODE_SUB: + return "sub"; + case BRIG_OPCODE_TRUNC: + return "trunc"; + case BRIG_OPCODE_MAD24: + return "mad24"; + case BRIG_OPCODE_MAD24HI: + return "mad24hi"; + case BRIG_OPCODE_MUL24: + return "mul24"; + case BRIG_OPCODE_MUL24HI: + return "mul24hi"; + case BRIG_OPCODE_SHL: + return "shl"; + case BRIG_OPCODE_SHR: + return "shr"; + case BRIG_OPCODE_AND: + return "and"; + case BRIG_OPCODE_NOT: + return "not"; + case BRIG_OPCODE_OR: + return "or"; + case BRIG_OPCODE_POPCOUNT: + return "popcount"; + case BRIG_OPCODE_XOR: + return "xor"; + case BRIG_OPCODE_BITEXTRACT: + return "bitextract"; + case BRIG_OPCODE_BITINSERT: + return "bitinsert"; + case BRIG_OPCODE_BITMASK: + return "bitmask"; + case BRIG_OPCODE_BITREV: + return "bitrev"; + case BRIG_OPCODE_BITSELECT: + return "bitselect"; + case BRIG_OPCODE_FIRSTBIT: + return "firstbit"; + case BRIG_OPCODE_LASTBIT: + return "lastbit"; + case BRIG_OPCODE_COMBINE: + return "combine"; + case BRIG_OPCODE_EXPAND: + return "expand"; + case BRIG_OPCODE_LDA: + return "lda"; + case BRIG_OPCODE_MOV: + return "mov"; + case BRIG_OPCODE_SHUFFLE: + return "shuffle"; + case BRIG_OPCODE_UNPACKHI: + return "unpackhi"; + case BRIG_OPCODE_UNPACKLO: + return "unpacklo"; + case BRIG_OPCODE_PACK: + return "pack"; + case BRIG_OPCODE_UNPACK: + return "unpack"; + case BRIG_OPCODE_CMOV: + return "cmov"; + case BRIG_OPCODE_CLASS: + return "class"; + case BRIG_OPCODE_NCOS: + return "ncos"; + case BRIG_OPCODE_NEXP2: + return "nexp2"; + case BRIG_OPCODE_NFMA: + return "nfma"; + case BRIG_OPCODE_NLOG2: + return "nlog2"; + case BRIG_OPCODE_NRCP: + return "nrcp"; + case BRIG_OPCODE_NRSQRT: + return "nrsqrt"; + case BRIG_OPCODE_NSIN: + return "nsin"; + case BRIG_OPCODE_NSQRT: + return "nsqrt"; + case BRIG_OPCODE_BITALIGN: + return "bitalign"; + case BRIG_OPCODE_BYTEALIGN: + return "bytealign"; + case BRIG_OPCODE_PACKCVT: + return "packcvt"; + case BRIG_OPCODE_UNPACKCVT: + return "unpackcvt"; + case BRIG_OPCODE_LERP: + return "lerp"; + case BRIG_OPCODE_SAD: + return "sad"; + case BRIG_OPCODE_SADHI: + return "sadhi"; + case BRIG_OPCODE_SEGMENTP: + return "segmentp"; + case BRIG_OPCODE_FTOS: + return "ftos"; + case BRIG_OPCODE_STOF: + return "stof"; + case BRIG_OPCODE_CMP: + return "cmp"; + case BRIG_OPCODE_CVT: + return "cvt"; + case BRIG_OPCODE_LD: + return "ld"; + case BRIG_OPCODE_ST: + return "st"; + case BRIG_OPCODE_ATOMIC: + return "atomic"; + case BRIG_OPCODE_ATOMICNORET: + return "atomicnoret"; + case BRIG_OPCODE_SIGNAL: + return "signal"; + case BRIG_OPCODE_SIGNALNORET: + return "signalnoret"; + case BRIG_OPCODE_MEMFENCE: + return "memfence"; + case BRIG_OPCODE_RDIMAGE: + return "rdimage"; + case BRIG_OPCODE_LDIMAGE: + return "ldimage"; + case BRIG_OPCODE_STIMAGE: + return "stimage"; + case BRIG_OPCODE_QUERYIMAGE: + return "queryimage"; + case BRIG_OPCODE_QUERYSAMPLER: + return "querysampler"; + case BRIG_OPCODE_CBR: + return "cbr"; + case BRIG_OPCODE_BR: + return "br"; + case BRIG_OPCODE_SBR: + return "sbr"; + case BRIG_OPCODE_BARRIER: + return "barrier"; + case BRIG_OPCODE_WAVEBARRIER: + return "wavebarrier"; + case BRIG_OPCODE_ARRIVEFBAR: + return "arrivefbar"; + case BRIG_OPCODE_INITFBAR: + return "initfbar"; + case BRIG_OPCODE_JOINFBAR: + return "joinfbar"; + case BRIG_OPCODE_LEAVEFBAR: + return "leavefbar"; + case BRIG_OPCODE_RELEASEFBAR: + return "releasefbar"; + case BRIG_OPCODE_WAITFBAR: + return "waitfbar"; + case BRIG_OPCODE_LDF: + return "ldf"; + case BRIG_OPCODE_ACTIVELANECOUNT: + return "activelanecount"; + case BRIG_OPCODE_ACTIVELANEID: + return "activelaneid"; + case BRIG_OPCODE_ACTIVELANEMASK: + return "activelanemask"; + case BRIG_OPCODE_CALL: + return "call"; + case BRIG_OPCODE_SCALL: + return "scall"; + case BRIG_OPCODE_ICALL: + return "icall"; + case BRIG_OPCODE_RET: + return "ret"; + case BRIG_OPCODE_ALLOCA: + return "alloca"; + case BRIG_OPCODE_CURRENTWORKGROUPSIZE: + return "currentworkgroupsize"; + case BRIG_OPCODE_DIM: + return "dim"; + case BRIG_OPCODE_GRIDGROUPS: + return "gridgroups"; + case BRIG_OPCODE_GRIDSIZE: + return "gridsize"; + case BRIG_OPCODE_PACKETCOMPLETIONSIG: + return "packetcompletionsig"; + case BRIG_OPCODE_PACKETID: + return "packetid"; + case BRIG_OPCODE_WORKGROUPID: + return "workgroupid"; + case BRIG_OPCODE_WORKGROUPSIZE: + return "workgroupsize"; + case BRIG_OPCODE_WORKITEMABSID: + return "workitemabsid"; + case BRIG_OPCODE_WORKITEMFLATABSID: + return "workitemflatabsid"; + case BRIG_OPCODE_WORKITEMFLATID: + return "workitemflatid"; + case BRIG_OPCODE_WORKITEMID: + return "workitemid"; + case BRIG_OPCODE_CLEARDETECTEXCEPT: + return "cleardetectexcept"; + case BRIG_OPCODE_GETDETECTEXCEPT: + return "getdetectexcept"; + case BRIG_OPCODE_SETDETECTEXCEPT: + return "setdetectexcept"; + case BRIG_OPCODE_ADDQUEUEWRITEINDEX: + return "addqueuewriteindex"; + case BRIG_OPCODE_CASQUEUEWRITEINDEX: + return "casqueuewriteindex"; + case BRIG_OPCODE_LDQUEUEREADINDEX: + return "ldqueuereadindex"; + case BRIG_OPCODE_LDQUEUEWRITEINDEX: + return "ldqueuewriteindex"; + case BRIG_OPCODE_STQUEUEREADINDEX: + return "stqueuereadindex"; + case BRIG_OPCODE_STQUEUEWRITEINDEX: + return "stqueuewriteindex"; + case BRIG_OPCODE_CLOCK: + return "clock"; + case BRIG_OPCODE_CUID: + return "cuid"; + case BRIG_OPCODE_DEBUGTRAP: + return "debugtrap"; + case BRIG_OPCODE_GROUPBASEPTR: + return "groupbaseptr"; + case BRIG_OPCODE_KERNARGBASEPTR: + return "kernargbaseptr"; + case BRIG_OPCODE_LANEID: + return "laneid"; + case BRIG_OPCODE_MAXCUID: + return "maxcuid"; + case BRIG_OPCODE_MAXWAVEID: + return "maxwaveid"; + case BRIG_OPCODE_NULLPTR: + return "nullptr"; + case BRIG_OPCODE_WAVEID: + return "waveid"; + default: + return "UNKNOWN_OPCODE"; + } +} + +/* Return textual name of SEG. */ + +const char * +hsa_seg_name (BrigSegment8_t seg) +{ + switch (seg) + { + case BRIG_SEGMENT_NONE: + return "none"; + case BRIG_SEGMENT_FLAT: + return "flat"; + case BRIG_SEGMENT_GLOBAL: + return "global"; + case BRIG_SEGMENT_READONLY: + return "readonly"; + case BRIG_SEGMENT_KERNARG: + return "kernarg"; + case BRIG_SEGMENT_GROUP: + return "group"; + case BRIG_SEGMENT_PRIVATE: + return "private"; + case BRIG_SEGMENT_SPILL: + return "spill"; + case BRIG_SEGMENT_ARG: + return "arg"; + default: + return "UNKNOWN_SEGMENT"; + } +} + +/* Return textual name of CMPOP. */ + +static const char * +hsa_cmpop_name (BrigCompareOperation8_t cmpop) +{ + switch (cmpop) + { + case BRIG_COMPARE_EQ: + return "eq"; + case BRIG_COMPARE_NE: + return "ne"; + case BRIG_COMPARE_LT: + return "lt"; + case BRIG_COMPARE_LE: + return "le"; + case BRIG_COMPARE_GT: + return "gt"; + case BRIG_COMPARE_GE: + return "ge"; + case BRIG_COMPARE_EQU: + return "equ"; + case BRIG_COMPARE_NEU: + return "neu"; + case BRIG_COMPARE_LTU: + return "ltu"; + case BRIG_COMPARE_LEU: + return "leu"; + case BRIG_COMPARE_GTU: + return "gtu"; + case BRIG_COMPARE_GEU: + return "geu"; + case BRIG_COMPARE_NUM: + return "num"; + case BRIG_COMPARE_NAN: + return "nan"; + case BRIG_COMPARE_SEQ: + return "seq"; + case BRIG_COMPARE_SNE: + return "sne"; + case BRIG_COMPARE_SLT: + return "slt"; + case BRIG_COMPARE_SLE: + return "sle"; + case BRIG_COMPARE_SGT: + return "sgt"; + case BRIG_COMPARE_SGE: + return "sge"; + case BRIG_COMPARE_SGEU: + return "sgeu"; + case BRIG_COMPARE_SEQU: + return "sequ"; + case BRIG_COMPARE_SNEU: + return "sneu"; + case BRIG_COMPARE_SLTU: + return "sltu"; + case BRIG_COMPARE_SLEU: + return "sleu"; + case BRIG_COMPARE_SNUM: + return "snum"; + case BRIG_COMPARE_SNAN: + return "snan"; + case BRIG_COMPARE_SGTU: + return "sgtu"; + default: + return "UNKNOWN_COMPARISON"; + } +} + +/* Return textual name for memory order. */ + +static const char * +hsa_memsem_name (enum BrigMemoryOrder mo) +{ + switch (mo) + { + case BRIG_MEMORY_ORDER_NONE: + return ""; + case BRIG_MEMORY_ORDER_RELAXED: + return "rlx"; + case BRIG_MEMORY_ORDER_SC_ACQUIRE: + return "scacq"; + case BRIG_MEMORY_ORDER_SC_RELEASE: + return "screl"; + case BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE: + return "scar"; + default: + return "UNKNOWN_MEMORY_ORDER"; + } +} + +/* Return textual name for memory scope. */ + +static const char * +hsa_memscope_name (enum BrigMemoryScope scope) +{ + switch (scope) + { + case BRIG_MEMORY_SCOPE_NONE: + return ""; + case BRIG_MEMORY_SCOPE_WORKITEM: + return "wi"; + case BRIG_MEMORY_SCOPE_WAVEFRONT: + return "wave"; + case BRIG_MEMORY_SCOPE_WORKGROUP: + return "wg"; + case BRIG_MEMORY_SCOPE_AGENT: + return "agent"; + case BRIG_MEMORY_SCOPE_SYSTEM: + return "sys"; + default: + return "UNKNOWN_SCOPE"; + } +} + +/* Return textual name for atomic operation. */ + +static const char * +hsa_m_atomicop_name (enum BrigAtomicOperation op) +{ + switch (op) + { + case BRIG_ATOMIC_ADD: + return "add"; + case BRIG_ATOMIC_AND: + return "and"; + case BRIG_ATOMIC_CAS: + return "cas"; + case BRIG_ATOMIC_EXCH: + return "exch"; + case BRIG_ATOMIC_LD: + return "ld"; + case BRIG_ATOMIC_MAX: + return "max"; + case BRIG_ATOMIC_MIN: + return "min"; + case BRIG_ATOMIC_OR: + return "or"; + case BRIG_ATOMIC_ST: + return "st"; + case BRIG_ATOMIC_SUB: + return "sub"; + case BRIG_ATOMIC_WRAPDEC: + return "wrapdec"; + case BRIG_ATOMIC_WRAPINC: + return "wrapinc"; + case BRIG_ATOMIC_XOR: + return "xor"; + case BRIG_ATOMIC_WAIT_EQ: + return "wait_eq"; + case BRIG_ATOMIC_WAIT_NE: + return "wait_ne"; + case BRIG_ATOMIC_WAIT_LT: + return "wait_lt"; + case BRIG_ATOMIC_WAIT_GTE: + return "wait_gte"; + case BRIG_ATOMIC_WAITTIMEOUT_EQ: + return "waittimeout_eq"; + case BRIG_ATOMIC_WAITTIMEOUT_NE: + return "waittimeout_ne"; + case BRIG_ATOMIC_WAITTIMEOUT_LT: + return "waittimeout_lt"; + case BRIG_ATOMIC_WAITTIMEOUT_GTE: + return "waittimeout_gte"; + default: + return "UNKNOWN_ATOMIC_OP"; + } +} + +/* Return byte alignment for given BrigAlignment8_t value. */ + +static unsigned +hsa_byte_alignment (BrigAlignment8_t alignment) +{ + gcc_assert (alignment != BRIG_ALIGNMENT_NONE); + + return 1 << (alignment - 1); +} + +/* Dump textual representation of HSA IL register REG to file F. */ + +static void +dump_hsa_reg (FILE *f, hsa_op_reg *reg, bool dump_type = false) +{ + if (reg->m_reg_class) + fprintf (f, "$%c%i", reg->m_reg_class, reg->m_hard_num); + else + fprintf (f, "$_%i", reg->m_order); + if (dump_type) + fprintf (f, " (%s)", hsa_type_name (reg->m_type)); +} + +/* Dump textual representation of HSA IL immediate operand IMM to file F. */ + +static void +dump_hsa_immed (FILE *f, hsa_op_immed *imm) +{ + bool unsigned_int_type + = (BRIG_TYPE_U8 | BRIG_TYPE_U16 | BRIG_TYPE_U32 | BRIG_TYPE_U64) + & imm->m_type; + + if (imm->m_tree_value) + print_generic_expr (f, imm->m_tree_value, 0); + else + { + gcc_checking_assert (imm->m_brig_repr_size <= 8); + + if (unsigned_int_type) + fprintf (f, HOST_WIDE_INT_PRINT_DEC, imm->m_int_value); + else + fprintf (f, HOST_WIDE_INT_PRINT_UNSIGNED, + (unsigned HOST_WIDE_INT) imm->m_int_value); + } + + fprintf (f, " (%s)", hsa_type_name (imm->m_type)); +} + +/* Dump textual representation of HSA IL address operand ADDR to file F. */ + +static void +dump_hsa_address (FILE *f, hsa_op_address *addr) +{ + bool sth = false; + + if (addr->m_symbol) + { + sth = true; + if (addr->m_symbol->m_name) + fprintf (f, "[%%%s]", addr->m_symbol->m_name); + else + fprintf (f, "[%%__%s_%i]", hsa_seg_name (addr->m_symbol->m_segment), + addr->m_symbol->m_name_number); + } + + if (addr->m_reg) + { + fprintf (f, "["); + dump_hsa_reg (f, addr->m_reg); + if (addr->m_imm_offset != 0) + fprintf (f, " + " HOST_WIDE_INT_PRINT_DEC "]", addr->m_imm_offset); + else + fprintf (f, "]"); + } + else if (!sth || addr->m_imm_offset != 0) + fprintf (f, "[" HOST_WIDE_INT_PRINT_DEC "]", addr->m_imm_offset); +} + +/* Dump textual representation of HSA IL symbol SYMBOL to file F. */ + +static void +dump_hsa_symbol (FILE *f, hsa_symbol *symbol) +{ + const char *name; + if (symbol->m_name) + name = symbol->m_name; + else + { + char buf[64]; + sprintf (buf, "__%s_%i", hsa_seg_name (symbol->m_segment), + symbol->m_name_number); + + name = buf; + } + + fprintf (f, "%s_%s %s", hsa_seg_name (symbol->m_segment), + hsa_type_name (symbol->m_type & ~BRIG_TYPE_ARRAY_MASK), name); + + if (symbol->m_type & BRIG_TYPE_ARRAY_MASK) + fprintf (f, "[%lu]", (unsigned long) symbol->m_dim); +} + +/* Dump textual representation of HSA IL operand OP to file F. */ + +static void +dump_hsa_operand (FILE *f, hsa_op_base *op, bool dump_reg_type = false) +{ + if (is_a <hsa_op_immed *> (op)) + dump_hsa_immed (f, as_a <hsa_op_immed *> (op)); + else if (is_a <hsa_op_reg *> (op)) + dump_hsa_reg (f, as_a <hsa_op_reg *> (op), dump_reg_type); + else if (is_a <hsa_op_address *> (op)) + dump_hsa_address (f, as_a <hsa_op_address *> (op)); + else + fprintf (f, "UNKNOWN_OP_KIND"); +} + +/* Dump textual representation of HSA IL operands in VEC to file F. */ + +static void +dump_hsa_operands (FILE *f, hsa_insn_basic *insn, int start = 0, + int end = -1, bool dump_reg_type = false) +{ + if (end == -1) + end = insn->operand_count (); + + for (int i = start; i < end; i++) + { + dump_hsa_operand (f, insn->get_op (i), dump_reg_type); + if (i != end - 1) + fprintf (f, ", "); + } +} + +/* Indent F stream with INDENT spaces. */ + +static void indent_stream (FILE *f, int indent) +{ + for (int i = 0; i < indent; i++) + fputc (' ', f); +} + +/* Dump textual representation of HSA IL instruction INSN to file F. Prepend + the instruction with *INDENT spaces and adjust the indentation for call + instructions as appropriate. */ + +static void +dump_hsa_insn_1 (FILE *f, hsa_insn_basic *insn, int *indent) +{ + gcc_checking_assert (insn); + + if (insn->m_number) + fprintf (f, "%5d: ", insn->m_number); + + indent_stream (f, *indent); + + if (is_a <hsa_insn_phi *> (insn)) + { + hsa_insn_phi *phi = as_a <hsa_insn_phi *> (insn); + bool first = true; + dump_hsa_reg (f, phi->m_dest, true); + fprintf (f, " = PHI <"); + unsigned count = phi->operand_count (); + for (unsigned i = 0; i < count; i++) + { + if (!phi->get_op (i)) + break; + if (!first) + fprintf (f, ", "); + else + first = false; + dump_hsa_operand (f, phi->get_op (i), true); + } + fprintf (f, ">"); + } + else if (is_a <hsa_insn_signal *> (insn)) + { + hsa_insn_signal *mem = as_a <hsa_insn_signal *> (insn); + + fprintf (f, "%s", hsa_opcode_name (mem->m_opcode)); + fprintf (f, "_%s", hsa_m_atomicop_name (mem->m_atomicop)); + if (mem->m_memoryorder != BRIG_MEMORY_ORDER_NONE) + fprintf (f, "_%s", hsa_memsem_name (mem->m_memoryorder)); + fprintf (f, "_%s ", hsa_type_name (mem->m_type)); + + dump_hsa_operands (f, mem); + } + + else if (is_a <hsa_insn_atomic *> (insn)) + { + hsa_insn_atomic *mem = as_a <hsa_insn_atomic *> (insn); + + /* Either operand[0] or operand[1] must be an address operand. */ + hsa_op_address *addr = NULL; + if (is_a <hsa_op_address *> (mem->get_op (0))) + addr = as_a <hsa_op_address *> (mem->get_op (0)); + else + addr = as_a <hsa_op_address *> (mem->get_op (1)); + + fprintf (f, "%s", hsa_opcode_name (mem->m_opcode)); + fprintf (f, "_%s", hsa_m_atomicop_name (mem->m_atomicop)); + if (addr->m_symbol) + fprintf (f, "_%s", hsa_seg_name (addr->m_symbol->m_segment)); + if (mem->m_memoryorder != BRIG_MEMORY_ORDER_NONE) + fprintf (f, "_%s", hsa_memsem_name (mem->m_memoryorder)); + if (mem->m_memoryscope != BRIG_MEMORY_SCOPE_NONE) + fprintf (f, "_%s", hsa_memscope_name (mem->m_memoryscope)); + fprintf (f, "_%s ", hsa_type_name (mem->m_type)); + + dump_hsa_operands (f, mem); + } + else if (is_a <hsa_insn_mem *> (insn)) + { + hsa_insn_mem *mem = as_a <hsa_insn_mem *> (insn); + hsa_op_address *addr = as_a <hsa_op_address *> (mem->get_op (1)); + + fprintf (f, "%s", hsa_opcode_name (mem->m_opcode)); + if (addr->m_symbol) + fprintf (f, "_%s", hsa_seg_name (addr->m_symbol->m_segment)); + if (mem->m_align != BRIG_ALIGNMENT_NONE) + fprintf (f, "_align(%u)", hsa_byte_alignment (mem->m_align)); + if (mem->m_equiv_class != 0) + fprintf (f, "_equiv(%i)", mem->m_equiv_class); + fprintf (f, "_%s ", hsa_type_name (mem->m_type)); + + dump_hsa_operand (f, mem->get_op (0)); + fprintf (f, ", "); + dump_hsa_address (f, addr); + } + else if (insn->m_opcode == BRIG_OPCODE_LDA) + { + hsa_op_address *addr = as_a <hsa_op_address *> (insn->get_op (1)); + + fprintf (f, "%s", hsa_opcode_name (insn->m_opcode)); + if (addr->m_symbol) + fprintf (f, "_%s", hsa_seg_name (addr->m_symbol->m_segment)); + fprintf (f, "_%s ", hsa_type_name (insn->m_type)); + + dump_hsa_operand (f, insn->get_op (0)); + fprintf (f, ", "); + dump_hsa_address (f, addr); + } + else if (is_a <hsa_insn_seg *> (insn)) + { + hsa_insn_seg *seg = as_a <hsa_insn_seg *> (insn); + fprintf (f, "%s_%s_%s_%s ", hsa_opcode_name (seg->m_opcode), + hsa_seg_name (seg->m_segment), + hsa_type_name (seg->m_type), hsa_type_name (seg->m_src_type)); + dump_hsa_reg (f, as_a <hsa_op_reg *> (seg->get_op (0))); + fprintf (f, ", "); + dump_hsa_operand (f, seg->get_op (1)); + } + else if (is_a <hsa_insn_cmp *> (insn)) + { + hsa_insn_cmp *cmp = as_a <hsa_insn_cmp *> (insn); + BrigType16_t src_type; + + if (is_a <hsa_op_reg *> (cmp->get_op (1))) + src_type = as_a <hsa_op_reg *> (cmp->get_op (1))->m_type; + else + src_type = as_a <hsa_op_immed *> (cmp->get_op (1))->m_type; + + fprintf (f, "%s_%s_%s_%s ", hsa_opcode_name (cmp->m_opcode), + hsa_cmpop_name (cmp->m_compare), + hsa_type_name (cmp->m_type), hsa_type_name (src_type)); + dump_hsa_reg (f, as_a <hsa_op_reg *> (cmp->get_op (0))); + fprintf (f, ", "); + dump_hsa_operand (f, cmp->get_op (1)); + fprintf (f, ", "); + dump_hsa_operand (f, cmp->get_op (2)); + } + else if (is_a <hsa_insn_br *> (insn)) + { + hsa_insn_br *br = as_a <hsa_insn_br *> (insn); + basic_block target = NULL; + edge_iterator ei; + edge e; + + fprintf (f, "%s ", hsa_opcode_name (br->m_opcode)); + if (br->m_opcode == BRIG_OPCODE_CBR) + { + dump_hsa_reg (f, as_a <hsa_op_reg *> (br->get_op (0))); + fprintf (f, ", "); + } + + FOR_EACH_EDGE (e, ei, br->m_bb->succs) + if (e->flags & EDGE_TRUE_VALUE) + { + target = e->dest; + break; + } + fprintf (f, "BB %i", hsa_bb_for_bb (target)->m_index); + } + else if (is_a <hsa_insn_sbr *> (insn)) + { + hsa_insn_sbr *sbr = as_a <hsa_insn_sbr *> (insn); + + fprintf (f, "%s ", hsa_opcode_name (sbr->m_opcode)); + dump_hsa_reg (f, as_a <hsa_op_reg *> (sbr->get_op (0))); + fprintf (f, ", ["); + + for (unsigned i = 0; i < sbr->m_jump_table.length (); i++) + { + fprintf (f, "BB %i", hsa_bb_for_bb (sbr->m_jump_table[i])->m_index); + if (i != sbr->m_jump_table.length () - 1) + fprintf (f, ", "); + } + + fprintf (f, "]"); + } + else if (is_a <hsa_insn_arg_block *> (insn)) + { + hsa_insn_arg_block *arg_block = as_a <hsa_insn_arg_block *> (insn); + bool start_p = arg_block->m_kind == BRIG_KIND_DIRECTIVE_ARG_BLOCK_START; + char c = start_p ? '{' : '}'; + + if (start_p) + { + *indent += 2; + indent_stream (f, 2); + } + + if (!start_p) + *indent -= 2; + + fprintf (f, "%c", c); + } + else if (is_a <hsa_insn_call *> (insn)) + { + hsa_insn_call *call = as_a <hsa_insn_call *> (insn); + if (call->m_called_function) + { + const char *name = hsa_get_declaration_name (call->m_called_function); + fprintf (f, "call &%s", name); + } + else + { + char *name = call->m_called_internal_fn->name (); + fprintf (f, "call &%s", name); + free (name); + } + + if (call->m_output_arg) + fprintf (f, "(%%res) "); + + fprintf (f, "("); + for (unsigned i = 0; i < call->m_input_args.length (); i++) + { + fprintf (f, "%%__arg_%u", i); + + if (i != call->m_input_args.length () - 1) + fprintf (f, ", "); + } + fprintf (f, ")"); + } + else if (is_a <hsa_insn_comment *> (insn)) + { + hsa_insn_comment *c = as_a <hsa_insn_comment *> (insn); + fprintf (f, "%s", c->m_comment); + } + else if (is_a <hsa_insn_srctype *> (insn)) + { + hsa_insn_srctype *srctype = as_a <hsa_insn_srctype *> (insn); + + fprintf (f, "%s_%s_%s ", hsa_opcode_name (srctype->m_opcode), + hsa_type_name (srctype->m_type), + hsa_type_name (srctype->m_source_type)); + + dump_hsa_operands (f, insn); + } + else if (is_a <hsa_insn_packed *> (insn)) + { + hsa_insn_packed *packed = as_a <hsa_insn_packed *> (insn); + + fprintf (f, "%s_v%u_%s_%s ", hsa_opcode_name (packed->m_opcode), + packed->operand_count () - 1, + hsa_type_name (packed->m_type), + hsa_type_name (packed->m_source_type)); + + if (packed->m_opcode == BRIG_OPCODE_COMBINE) + { + dump_hsa_operand (f, insn->get_op (0)); + fprintf (f, ", ("); + dump_hsa_operands (f, insn, 1); + fprintf (f, ")"); + } + else if (packed->m_opcode == BRIG_OPCODE_EXPAND) + { + fprintf (f, "("); + dump_hsa_operands (f, insn, 0, insn->operand_count () - 1); + fprintf (f, "), "); + dump_hsa_operand (f, insn->get_op (insn->operand_count () - 1)); + + } + else + gcc_unreachable (); + } + else if (is_a <hsa_insn_alloca *> (insn)) + { + hsa_insn_alloca *alloca = as_a <hsa_insn_alloca *> (insn); + + fprintf (f, "%s_align(%u)_%s ", hsa_opcode_name (insn->m_opcode), + hsa_byte_alignment (alloca->m_align), + hsa_type_name (insn->m_type)); + + dump_hsa_operands (f, insn); + } + else + { + fprintf (f, "%s_%s ", hsa_opcode_name (insn->m_opcode), + hsa_type_name (insn->m_type)); + + dump_hsa_operands (f, insn); + } + + if (insn->m_brig_offset) + { + fprintf (f, " /* BRIG offset: %u", insn->m_brig_offset); + + for (unsigned i = 0; i < insn->operand_count (); i++) + fprintf (f, ", op%u: %u", i, insn->get_op (i)->m_brig_op_offset); + + fprintf (f, " */"); + } + + fprintf (f, "\n"); +} + +/* Dump textual representation of HSA IL instruction INSN to file F. */ + +void +dump_hsa_insn (FILE *f, hsa_insn_basic *insn) +{ + int indent = 0; + dump_hsa_insn_1 (f, insn, &indent); +} + +/* Dump textual representation of HSA IL in HBB to file F. */ + +void +dump_hsa_bb (FILE *f, hsa_bb *hbb) +{ + hsa_insn_basic *insn; + edge_iterator ei; + edge e; + basic_block true_bb = NULL, other = NULL; + + fprintf (f, "BB %i:\n", hbb->m_index); + + int indent = 2; + for (insn = hbb->m_first_phi; insn; insn = insn->m_next) + dump_hsa_insn_1 (f, insn, &indent); + + for (insn = hbb->m_first_insn; insn; insn = insn->m_next) + dump_hsa_insn_1 (f, insn, &indent); + + if (hbb->m_last_insn && is_a <hsa_insn_sbr *> (hbb->m_last_insn)) + goto exit; + + FOR_EACH_EDGE (e, ei, hbb->m_bb->succs) + if (e->flags & EDGE_TRUE_VALUE) + { + gcc_assert (!true_bb); + true_bb = e->dest; + } + else + { + gcc_assert (!other); + other = e->dest; + } + + if (true_bb) + { + if (!hbb->m_last_insn + || hbb->m_last_insn->m_opcode != BRIG_OPCODE_CBR) + fprintf (f, "WARNING: No branch insn for a true edge. \n"); + } + else if (hbb->m_last_insn + && hbb->m_last_insn->m_opcode == BRIG_OPCODE_CBR) + fprintf (f, "WARNING: No true edge for a cbr statement\n"); + + if (other && other->aux) + fprintf (f, " Fall-through to BB %i\n", + hsa_bb_for_bb (other)->m_index); + else if (hbb->m_last_insn + && hbb->m_last_insn->m_opcode != BRIG_OPCODE_RET) + fprintf (f, " WARNING: Fall through to a BB with no aux!\n"); + +exit: + fprintf (f, "\n"); +} + +/* Dump textual representation of HSA IL of the current function to file F. */ + +void +dump_hsa_cfun (FILE *f) +{ + basic_block bb; + + if (hsa_cfun->m_global_symbols.length () > 0) + fprintf (f, "\nHSAIL in global scope\n"); + + for (unsigned i = 0; i < hsa_cfun->m_global_symbols.length (); i++) + { + fprintf (f, " "); + dump_hsa_symbol (f, hsa_cfun->m_global_symbols[i]); + fprintf (f, "\n"); + } + + fprintf (f, "\nHSAIL IL for %s\n", hsa_cfun->m_name); + + for (unsigned i = 0; i < hsa_cfun->m_private_variables.length (); i++) + { + fprintf (f, " "); + dump_hsa_symbol (f, hsa_cfun->m_private_variables[i]); + fprintf (f, "\n"); + } + + FOR_ALL_BB_FN (bb, cfun) + { + hsa_bb *hbb = (struct hsa_bb *) bb->aux; + dump_hsa_bb (f, hbb); + } +} + +/* Dump textual representation of HSA IL instruction INSN to stderr. */ + +DEBUG_FUNCTION void +debug_hsa_insn (hsa_insn_basic *insn) +{ + dump_hsa_insn (stderr, insn); +} + +/* Dump textual representation of HSA IL in HBB to stderr. */ + +DEBUG_FUNCTION void +debug_hsa_bb (hsa_bb *hbb) +{ + dump_hsa_bb (stderr, hbb); +} + +/* Dump textual representation of HSA IL of the current function to stderr. */ + +DEBUG_FUNCTION void +debug_hsa_cfun (void) +{ + dump_hsa_cfun (stderr); +} + +/* Dump textual representation of an HSA operand to stderr. */ + +DEBUG_FUNCTION void +debug_hsa_operand (hsa_op_base *opc) +{ + dump_hsa_operand (stderr, opc, true); + fprintf (stderr, "\n"); +} + +/* Dump textual representation of as HSA symbol. */ + +DEBUG_FUNCTION void +debug_hsa_symbol (hsa_symbol *symbol) +{ + dump_hsa_symbol (stderr, symbol); + fprintf (stderr, "\n"); +} diff --git a/gcc/hsa-gen.c b/gcc/hsa-gen.c new file mode 100644 index 0000000..a6f4170 --- /dev/null +++ b/gcc/hsa-gen.c @@ -0,0 +1,6151 @@ +/* A pass for lowering gimple to HSAIL + Copyright (C) 2013-2016 Free Software Foundation, Inc. + Contributed by Martin Jambor <mjambor@suse.cz> and + Martin Liska <mliska@suse.cz>. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "is-a.h" +#include "hash-table.h" +#include "vec.h" +#include "tree.h" +#include "tree-pass.h" +#include "cfg.h" +#include "function.h" +#include "basic-block.h" +#include "fold-const.h" +#include "gimple.h" +#include "gimple-iterator.h" +#include "bitmap.h" +#include "dumpfile.h" +#include "gimple-pretty-print.h" +#include "diagnostic-core.h" +#include "alloc-pool.h" +#include "gimple-ssa.h" +#include "tree-phinodes.h" +#include "stringpool.h" +#include "tree-ssanames.h" +#include "tree-dfa.h" +#include "ssa-iterators.h" +#include "cgraph.h" +#include "print-tree.h" +#include "symbol-summary.h" +#include "hsa.h" +#include "cfghooks.h" +#include "tree-cfg.h" +#include "cfgloop.h" +#include "cfganal.h" +#include "builtins.h" +#include "params.h" +#include "gomp-constants.h" +#include "internal-fn.h" +#include "builtins.h" +#include "stor-layout.h" + +/* Print a warning message and set that we have seen an error. */ + +#define HSA_SORRY_ATV(location, message, ...) \ + do \ + { \ + hsa_fail_cfun (); \ + if (warning_at (EXPR_LOCATION (hsa_cfun->m_decl), OPT_Whsa, \ + HSA_SORRY_MSG)) \ + inform (location, message, __VA_ARGS__); \ + } \ + while (false); + +/* Same as previous, but highlight a location. */ + +#define HSA_SORRY_AT(location, message) \ + do \ + { \ + hsa_fail_cfun (); \ + if (warning_at (EXPR_LOCATION (hsa_cfun->m_decl), OPT_Whsa, \ + HSA_SORRY_MSG)) \ + inform (location, message); \ + } \ + while (false); + +/* Default number of threads used by kernel dispatch. */ + +#define HSA_DEFAULT_NUM_THREADS 64 + +/* Following structures are defined in the final version + of HSA specification. */ + +/* HSA queue packet is shadow structure, originally provided by AMD. */ + +struct hsa_queue_packet +{ + uint16_t header; + uint16_t setup; + uint16_t workgroup_size_x; + uint16_t workgroup_size_y; + uint16_t workgroup_size_z; + uint16_t reserved0; + uint32_t grid_size_x; + uint32_t grid_size_y; + uint32_t grid_size_z; + uint32_t private_segment_size; + uint32_t group_segment_size; + uint64_t kernel_object; + void *kernarg_address; + uint64_t reserved2; + uint64_t completion_signal; +}; + +/* HSA queue is shadow structure, originally provided by AMD. */ + +struct hsa_queue +{ + int type; + uint32_t features; + void *base_address; + uint64_t doorbell_signal; + uint32_t size; + uint32_t reserved1; + uint64_t id; +}; + +/* Alloc pools for allocating basic hsa structures such as operands, + instructions and other basic entities. */ +static object_allocator<hsa_op_address> *hsa_allocp_operand_address; +static object_allocator<hsa_op_immed> *hsa_allocp_operand_immed; +static object_allocator<hsa_op_reg> *hsa_allocp_operand_reg; +static object_allocator<hsa_op_code_list> *hsa_allocp_operand_code_list; +static object_allocator<hsa_op_operand_list> *hsa_allocp_operand_operand_list; +static object_allocator<hsa_insn_basic> *hsa_allocp_inst_basic; +static object_allocator<hsa_insn_phi> *hsa_allocp_inst_phi; +static object_allocator<hsa_insn_mem> *hsa_allocp_inst_mem; +static object_allocator<hsa_insn_atomic> *hsa_allocp_inst_atomic; +static object_allocator<hsa_insn_signal> *hsa_allocp_inst_signal; +static object_allocator<hsa_insn_seg> *hsa_allocp_inst_seg; +static object_allocator<hsa_insn_cmp> *hsa_allocp_inst_cmp; +static object_allocator<hsa_insn_br> *hsa_allocp_inst_br; +static object_allocator<hsa_insn_sbr> *hsa_allocp_inst_sbr; +static object_allocator<hsa_insn_call> *hsa_allocp_inst_call; +static object_allocator<hsa_insn_arg_block> *hsa_allocp_inst_arg_block; +static object_allocator<hsa_insn_comment> *hsa_allocp_inst_comment; +static object_allocator<hsa_insn_queue> *hsa_allocp_inst_queue; +static object_allocator<hsa_insn_srctype> *hsa_allocp_inst_srctype; +static object_allocator<hsa_insn_packed> *hsa_allocp_inst_packed; +static object_allocator<hsa_insn_cvt> *hsa_allocp_inst_cvt; +static object_allocator<hsa_insn_alloca> *hsa_allocp_inst_alloca; +static object_allocator<hsa_bb> *hsa_allocp_bb; + +/* List of pointers to all instructions that come from an object allocator. */ +static vec <hsa_insn_basic *> hsa_instructions; + +/* List of pointers to all operands that come from an object allocator. */ +static vec <hsa_op_base *> hsa_operands; + +hsa_symbol::hsa_symbol () + : m_decl (NULL_TREE), m_name (NULL), m_name_number (0), + m_directive_offset (0), m_type (BRIG_TYPE_NONE), + m_segment (BRIG_SEGMENT_NONE), m_linkage (BRIG_LINKAGE_NONE), m_dim (0), + m_cst_value (NULL), m_global_scope_p (false), m_seen_error (false), + m_allocation (BRIG_ALLOCATION_AUTOMATIC) +{ +} + + +hsa_symbol::hsa_symbol (BrigType16_t type, BrigSegment8_t segment, + BrigLinkage8_t linkage, bool global_scope_p, + BrigAllocation allocation) + : m_decl (NULL_TREE), m_name (NULL), m_name_number (0), + m_directive_offset (0), m_type (type), m_segment (segment), + m_linkage (linkage), m_dim (0), m_cst_value (NULL), + m_global_scope_p (global_scope_p), m_seen_error (false), + m_allocation (allocation) +{ +} + +unsigned HOST_WIDE_INT +hsa_symbol::total_byte_size () +{ + unsigned HOST_WIDE_INT s + = hsa_type_bit_size (~BRIG_TYPE_ARRAY_MASK & m_type); + gcc_assert (s % BITS_PER_UNIT == 0); + s /= BITS_PER_UNIT; + + if (m_dim) + s *= m_dim; + + return s; +} + +/* Forward declaration. */ + +static BrigType16_t +hsa_type_for_tree_type (const_tree type, unsigned HOST_WIDE_INT *dim_p, + bool min32int); + +void +hsa_symbol::fillup_for_decl (tree decl) +{ + m_decl = decl; + m_type = hsa_type_for_tree_type (TREE_TYPE (decl), &m_dim, false); + + if (hsa_seen_error ()) + m_seen_error = true; +} + +/* Constructor of class representing global HSA function/kernel information and + state. FNDECL is function declaration, KERNEL_P is true if the function + is going to become a HSA kernel. If the function has body, SSA_NAMES_COUNT + should be set to number of SSA names used in the function. */ + +hsa_function_representation::hsa_function_representation + (tree fdecl, bool kernel_p, unsigned ssa_names_count) + : m_name (NULL), + m_reg_count (0), m_input_args (vNULL), + m_output_arg (NULL), m_spill_symbols (vNULL), m_global_symbols (vNULL), + m_private_variables (vNULL), m_called_functions (vNULL), + m_called_internal_fns (vNULL), m_hbb_count (0), + m_in_ssa (true), m_kern_p (kernel_p), m_declaration_p (false), + m_decl (fdecl), m_internal_fn (NULL), m_shadow_reg (NULL), + m_kernel_dispatch_count (0), m_maximum_omp_data_size (0), + m_seen_error (false), m_temp_symbol_count (0), m_ssa_map () +{ + int sym_init_len = (vec_safe_length (cfun->local_decls) / 2) + 1;; + m_local_symbols = new hash_table <hsa_noop_symbol_hasher> (sym_init_len); + m_ssa_map.safe_grow_cleared (ssa_names_count); +} + +/* Constructor of class representing HSA function information that + is derived for an internal function. */ +hsa_function_representation::hsa_function_representation (hsa_internal_fn *fn) + : m_reg_count (0), m_input_args (vNULL), + m_output_arg (NULL), m_local_symbols (NULL), + m_spill_symbols (vNULL), m_global_symbols (vNULL), + m_private_variables (vNULL), m_called_functions (vNULL), + m_called_internal_fns (vNULL), m_hbb_count (0), + m_in_ssa (true), m_kern_p (false), m_declaration_p (true), m_decl (NULL), + m_internal_fn (fn), m_shadow_reg (NULL), m_kernel_dispatch_count (0), + m_maximum_omp_data_size (0), m_seen_error (false), m_temp_symbol_count (0), + m_ssa_map () {} + +/* Destructor of class holding function/kernel-wide information and state. */ + +hsa_function_representation::~hsa_function_representation () +{ + /* Kernel names are deallocated at the end of BRIG output when deallocating + hsa_decl_kernel_mapping. */ + if (!m_kern_p || m_seen_error) + free (m_name); + + for (unsigned i = 0; i < m_input_args.length (); i++) + delete m_input_args[i]; + m_input_args.release (); + + delete m_output_arg; + delete m_local_symbols; + + for (unsigned i = 0; i < m_spill_symbols.length (); i++) + delete m_spill_symbols[i]; + m_spill_symbols.release (); + + hsa_symbol *sym; + for (unsigned i = 0; i < m_global_symbols.iterate (i, &sym); i++) + if (sym->m_linkage != BRIG_ALLOCATION_PROGRAM) + delete sym; + m_global_symbols.release (); + + for (unsigned i = 0; i < m_private_variables.length (); i++) + delete m_private_variables[i]; + m_private_variables.release (); + m_called_functions.release (); + m_ssa_map.release (); + + for (unsigned i = 0; i < m_called_internal_fns.length (); i++) + delete m_called_internal_fns[i]; +} + +hsa_op_reg * +hsa_function_representation::get_shadow_reg () +{ + /* If we compile a function with kernel dispatch and does not set + an optimization level, the function won't be inlined and + we return NULL. */ + if (!m_kern_p) + return NULL; + + if (m_shadow_reg) + return m_shadow_reg; + + /* Append the shadow argument. */ + hsa_symbol *shadow = new hsa_symbol (BRIG_TYPE_U64, BRIG_SEGMENT_KERNARG, + BRIG_LINKAGE_FUNCTION); + m_input_args.safe_push (shadow); + shadow->m_name = "hsa_runtime_shadow"; + + hsa_op_reg *r = new hsa_op_reg (BRIG_TYPE_U64); + hsa_op_address *addr = new hsa_op_address (shadow); + + hsa_insn_mem *mem = new hsa_insn_mem (BRIG_OPCODE_LD, BRIG_TYPE_U64, r, addr); + hsa_bb_for_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun))->append_insn (mem); + m_shadow_reg = r; + + return r; +} + +bool hsa_function_representation::has_shadow_reg_p () +{ + return m_shadow_reg != NULL; +} + +void +hsa_function_representation::init_extra_bbs () +{ + hsa_init_new_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun)); + hsa_init_new_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)); +} + +hsa_symbol * +hsa_function_representation::create_hsa_temporary (BrigType16_t type) +{ + hsa_symbol *s = new hsa_symbol (type, BRIG_SEGMENT_PRIVATE, + BRIG_LINKAGE_FUNCTION); + s->m_name_number = m_temp_symbol_count++; + + hsa_cfun->m_private_variables.safe_push (s); + return s; +} + +BrigLinkage8_t +hsa_function_representation::get_linkage () +{ + if (m_internal_fn) + return BRIG_LINKAGE_PROGRAM; + + return m_kern_p || TREE_PUBLIC (m_decl) ? + BRIG_LINKAGE_PROGRAM : BRIG_LINKAGE_MODULE; +} + +/* Hash map of simple OMP builtins. */ +static hash_map <nofree_string_hash, omp_simple_builtin> *omp_simple_builtins + = NULL; + +/* Warning messages for OMP builtins. */ + +#define HSA_WARN_LOCK_ROUTINE "support for HSA does not implement OpenMP " \ + "lock routines" +#define HSA_WARN_TIMING_ROUTINE "support for HSA does not implement OpenMP " \ + "timing routines" +#define HSA_WARN_MEMORY_ROUTINE "OpenMP device memory library routines have " \ + "undefined semantics within target regions, support for HSA ignores them" +#define HSA_WARN_AFFINITY "Support for HSA does not implement OpenMP " \ + "affinity feateres" + +/* Initialize hash map with simple OMP builtins. */ + +static void +hsa_init_simple_builtins () +{ + if (omp_simple_builtins != NULL) + return; + + omp_simple_builtins + = new hash_map <nofree_string_hash, omp_simple_builtin> (); + + omp_simple_builtin omp_builtins[] = + { + omp_simple_builtin ("omp_get_initial_device", NULL, false, + new hsa_op_immed (GOMP_DEVICE_HOST, + (BrigType16_t) BRIG_TYPE_S32)), + omp_simple_builtin ("omp_is_initial_device", NULL, false, + new hsa_op_immed (0, (BrigType16_t) BRIG_TYPE_S32)), + omp_simple_builtin ("omp_get_dynamic", NULL, false, + new hsa_op_immed (0, (BrigType16_t) BRIG_TYPE_S32)), + omp_simple_builtin ("omp_set_dynamic", NULL, false, NULL), + omp_simple_builtin ("omp_init_lock", HSA_WARN_LOCK_ROUTINE, true), + omp_simple_builtin ("omp_init_lock_with_hint", HSA_WARN_LOCK_ROUTINE, + true), + omp_simple_builtin ("omp_init_nest_lock_with_hint", HSA_WARN_LOCK_ROUTINE, + true), + omp_simple_builtin ("omp_destroy_lock", HSA_WARN_LOCK_ROUTINE, true), + omp_simple_builtin ("omp_set_lock", HSA_WARN_LOCK_ROUTINE, true), + omp_simple_builtin ("omp_unset_lock", HSA_WARN_LOCK_ROUTINE, true), + omp_simple_builtin ("omp_test_lock", HSA_WARN_LOCK_ROUTINE, true), + omp_simple_builtin ("omp_get_wtime", HSA_WARN_TIMING_ROUTINE, true), + omp_simple_builtin ("omp_get_wtick", HSA_WARN_TIMING_ROUTINE, true), + omp_simple_builtin ("omp_target_alloc", HSA_WARN_MEMORY_ROUTINE, false, + new hsa_op_immed (0, (BrigType16_t) BRIG_TYPE_U64)), + omp_simple_builtin ("omp_target_free", HSA_WARN_MEMORY_ROUTINE, false), + omp_simple_builtin ("omp_target_is_present", HSA_WARN_MEMORY_ROUTINE, + false, + new hsa_op_immed (-1, (BrigType16_t) BRIG_TYPE_S32)), + omp_simple_builtin ("omp_target_memcpy", HSA_WARN_MEMORY_ROUTINE, false, + new hsa_op_immed (-1, (BrigType16_t) BRIG_TYPE_S32)), + omp_simple_builtin ("omp_target_memcpy_rect", HSA_WARN_MEMORY_ROUTINE, + false, + new hsa_op_immed (-1, (BrigType16_t) BRIG_TYPE_S32)), + omp_simple_builtin ("omp_target_associate_ptr", HSA_WARN_MEMORY_ROUTINE, + false, + new hsa_op_immed (-1, (BrigType16_t) BRIG_TYPE_S32)), + omp_simple_builtin ("omp_target_disassociate_ptr", + HSA_WARN_MEMORY_ROUTINE, + false, + new hsa_op_immed (-1, (BrigType16_t) BRIG_TYPE_S32)), + omp_simple_builtin ("omp_set_max_active_levels", + "Support for HSA only allows only one active level, " + "call to omp_set_max_active_levels will be ignored " + "in the generated HSAIL", + false, NULL), + omp_simple_builtin ("omp_get_max_active_levels", NULL, false, + new hsa_op_immed (1, (BrigType16_t) BRIG_TYPE_S32)), + omp_simple_builtin ("omp_in_final", NULL, false, + new hsa_op_immed (0, (BrigType16_t) BRIG_TYPE_S32)), + omp_simple_builtin ("omp_get_proc_bind", HSA_WARN_AFFINITY, false, + new hsa_op_immed (0, (BrigType16_t) BRIG_TYPE_S32)), + omp_simple_builtin ("omp_get_num_places", HSA_WARN_AFFINITY, false, + new hsa_op_immed (0, (BrigType16_t) BRIG_TYPE_S32)), + omp_simple_builtin ("omp_get_place_num_procs", HSA_WARN_AFFINITY, false, + new hsa_op_immed (0, (BrigType16_t) BRIG_TYPE_S32)), + omp_simple_builtin ("omp_get_place_proc_ids", HSA_WARN_AFFINITY, false, + NULL), + omp_simple_builtin ("omp_get_place_num", HSA_WARN_AFFINITY, false, + new hsa_op_immed (-1, (BrigType16_t) BRIG_TYPE_S32)), + omp_simple_builtin ("omp_get_partition_num_places", HSA_WARN_AFFINITY, + false, + new hsa_op_immed (0, (BrigType16_t) BRIG_TYPE_S32)), + omp_simple_builtin ("omp_get_partition_place_nums", HSA_WARN_AFFINITY, + false, NULL), + omp_simple_builtin ("omp_set_default_device", + "omp_set_default_device has undefined semantics " + "within target regions, support for HSA ignores it", + false, NULL), + omp_simple_builtin ("omp_get_default_device", + "omp_get_default_device has undefined semantics " + "within target regions, support for HSA ignores it", + false, + new hsa_op_immed (0, (BrigType16_t) BRIG_TYPE_S32)), + omp_simple_builtin ("omp_get_num_devices", + "omp_get_num_devices has undefined semantics " + "within target regions, support for HSA ignores it", + false, + new hsa_op_immed (0, (BrigType16_t) BRIG_TYPE_S32)), + omp_simple_builtin ("omp_get_num_procs", NULL, true, NULL), + omp_simple_builtin ("omp_get_cancellation", NULL, true, NULL), + omp_simple_builtin ("omp_set_nested", NULL, true, NULL), + omp_simple_builtin ("omp_get_nested", NULL, true, NULL), + omp_simple_builtin ("omp_set_schedule", NULL, true, NULL), + omp_simple_builtin ("omp_get_schedule", NULL, true, NULL), + omp_simple_builtin ("omp_get_thread_limit", NULL, true, NULL), + omp_simple_builtin ("omp_get_team_size", NULL, true, NULL), + omp_simple_builtin ("omp_get_ancestor_thread_num", NULL, true, NULL), + omp_simple_builtin ("omp_get_max_task_priority", NULL, true, NULL) + }; + + unsigned count = sizeof (omp_builtins) / sizeof (omp_simple_builtin); + + for (unsigned i = 0; i < count; i++) + omp_simple_builtins->put (omp_builtins[i].m_name, omp_builtins[i]); +} + +/* Allocate HSA structures that we need only while generating with this. */ + +static void +hsa_init_data_for_cfun () +{ + hsa_init_compilation_unit_data (); + hsa_allocp_operand_address + = new object_allocator<hsa_op_address> ("HSA address operands"); + hsa_allocp_operand_immed + = new object_allocator<hsa_op_immed> ("HSA immediate operands"); + hsa_allocp_operand_reg + = new object_allocator<hsa_op_reg> ("HSA register operands"); + hsa_allocp_operand_code_list + = new object_allocator<hsa_op_code_list> ("HSA code list operands"); + hsa_allocp_operand_operand_list + = new object_allocator<hsa_op_operand_list> ("HSA operand list operands"); + hsa_allocp_inst_basic + = new object_allocator<hsa_insn_basic> ("HSA basic instructions"); + hsa_allocp_inst_phi + = new object_allocator<hsa_insn_phi> ("HSA phi operands"); + hsa_allocp_inst_mem + = new object_allocator<hsa_insn_mem> ("HSA memory instructions"); + hsa_allocp_inst_atomic + = new object_allocator<hsa_insn_atomic> ("HSA atomic instructions"); + hsa_allocp_inst_signal + = new object_allocator<hsa_insn_signal> ("HSA signal instructions"); + hsa_allocp_inst_seg + = new object_allocator<hsa_insn_seg> ("HSA segment conversion " + "instructions"); + hsa_allocp_inst_cmp + = new object_allocator<hsa_insn_cmp> ("HSA comparison instructions"); + hsa_allocp_inst_br + = new object_allocator<hsa_insn_br> ("HSA branching instructions"); + hsa_allocp_inst_sbr + = new object_allocator<hsa_insn_sbr> ("HSA switch branching instructions"); + hsa_allocp_inst_call + = new object_allocator<hsa_insn_call> ("HSA call instructions"); + hsa_allocp_inst_arg_block + = new object_allocator<hsa_insn_arg_block> ("HSA arg block instructions"); + hsa_allocp_inst_comment + = new object_allocator<hsa_insn_comment> ("HSA comment instructions"); + hsa_allocp_inst_queue + = new object_allocator<hsa_insn_queue> ("HSA queue instructions"); + hsa_allocp_inst_srctype + = new object_allocator<hsa_insn_srctype> ("HSA source type instructions"); + hsa_allocp_inst_packed + = new object_allocator<hsa_insn_packed> ("HSA packed instructions"); + hsa_allocp_inst_cvt + = new object_allocator<hsa_insn_cvt> ("HSA convert instructions"); + hsa_allocp_inst_alloca + = new object_allocator<hsa_insn_alloca> ("HSA alloca instructions"); + hsa_allocp_bb = new object_allocator<hsa_bb> ("HSA basic blocks"); +} + +/* Deinitialize HSA subsystem and free all allocated memory. */ + +static void +hsa_deinit_data_for_cfun (void) +{ + basic_block bb; + + FOR_ALL_BB_FN (bb, cfun) + if (bb->aux) + { + hsa_bb *hbb = hsa_bb_for_bb (bb); + hbb->~hsa_bb (); + bb->aux = NULL; + } + + for (unsigned int i = 0; i < hsa_operands.length (); i++) + hsa_destroy_operand (hsa_operands[i]); + + hsa_operands.release (); + + for (unsigned i = 0; i < hsa_instructions.length (); i++) + hsa_destroy_insn (hsa_instructions[i]); + + hsa_instructions.release (); + + if (omp_simple_builtins != NULL) + { + delete omp_simple_builtins; + omp_simple_builtins = NULL; + } + + delete hsa_allocp_operand_address; + delete hsa_allocp_operand_immed; + delete hsa_allocp_operand_reg; + delete hsa_allocp_operand_code_list; + delete hsa_allocp_operand_operand_list; + delete hsa_allocp_inst_basic; + delete hsa_allocp_inst_phi; + delete hsa_allocp_inst_atomic; + delete hsa_allocp_inst_mem; + delete hsa_allocp_inst_signal; + delete hsa_allocp_inst_seg; + delete hsa_allocp_inst_cmp; + delete hsa_allocp_inst_br; + delete hsa_allocp_inst_sbr; + delete hsa_allocp_inst_call; + delete hsa_allocp_inst_arg_block; + delete hsa_allocp_inst_comment; + delete hsa_allocp_inst_queue; + delete hsa_allocp_inst_srctype; + delete hsa_allocp_inst_packed; + delete hsa_allocp_inst_cvt; + delete hsa_allocp_inst_alloca; + delete hsa_allocp_bb; + delete hsa_cfun; +} + +/* Return the type which holds addresses in the given SEGMENT. */ + +static BrigType16_t +hsa_get_segment_addr_type (BrigSegment8_t segment) +{ + switch (segment) + { + case BRIG_SEGMENT_NONE: + gcc_unreachable (); + + case BRIG_SEGMENT_FLAT: + case BRIG_SEGMENT_GLOBAL: + case BRIG_SEGMENT_READONLY: + case BRIG_SEGMENT_KERNARG: + return hsa_machine_large_p () ? BRIG_TYPE_U64 : BRIG_TYPE_U32; + + case BRIG_SEGMENT_GROUP: + case BRIG_SEGMENT_PRIVATE: + case BRIG_SEGMENT_SPILL: + case BRIG_SEGMENT_ARG: + return BRIG_TYPE_U32; + } + gcc_unreachable (); +} + +/* Return integer brig type according to provided SIZE in bytes. If SIGN + is set to true, return signed integer type. */ + +static BrigType16_t +get_integer_type_by_bytes (unsigned size, bool sign) +{ + if (sign) + switch (size) + { + case 1: + return BRIG_TYPE_S8; + case 2: + return BRIG_TYPE_S16; + case 4: + return BRIG_TYPE_S32; + case 8: + return BRIG_TYPE_S64; + default: + break; + } + else + switch (size) + { + case 1: + return BRIG_TYPE_U8; + case 2: + return BRIG_TYPE_U16; + case 4: + return BRIG_TYPE_U32; + case 8: + return BRIG_TYPE_U64; + default: + break; + } + + return 0; +} + +/* Return HSA type for tree TYPE, which has to fit into BrigType16_t. Pointers + are assumed to use flat addressing. If min32int is true, always expand + integer types to one that has at least 32 bits. */ + +static BrigType16_t +hsa_type_for_scalar_tree_type (const_tree type, bool min32int) +{ + HOST_WIDE_INT bsize; + const_tree base; + BrigType16_t res = BRIG_TYPE_NONE; + + gcc_checking_assert (TYPE_P (type)); + gcc_checking_assert (!AGGREGATE_TYPE_P (type)); + if (POINTER_TYPE_P (type)) + return hsa_get_segment_addr_type (BRIG_SEGMENT_FLAT); + + if (TREE_CODE (type) == VECTOR_TYPE || TREE_CODE (type) == COMPLEX_TYPE) + base = TREE_TYPE (type); + else + base = type; + + if (!tree_fits_uhwi_p (TYPE_SIZE (base))) + { + HSA_SORRY_ATV (EXPR_LOCATION (type), + "support for HSA does not implement huge or " + "variable-sized type %T", type); + return res; + } + + bsize = tree_to_uhwi (TYPE_SIZE (base)); + unsigned byte_size = bsize / BITS_PER_UNIT; + if (INTEGRAL_TYPE_P (base)) + res = get_integer_type_by_bytes (byte_size, !TYPE_UNSIGNED (base)); + else if (SCALAR_FLOAT_TYPE_P (base)) + { + switch (bsize) + { + case 16: + res = BRIG_TYPE_F16; + break; + case 32: + res = BRIG_TYPE_F32; + break; + case 64: + res = BRIG_TYPE_F64; + break; + default: + break; + } + } + + if (res == BRIG_TYPE_NONE) + { + HSA_SORRY_ATV (EXPR_LOCATION (type), + "support for HSA does not implement type %T", type); + return res; + } + + if (TREE_CODE (type) == VECTOR_TYPE) + { + HOST_WIDE_INT tsize = tree_to_uhwi (TYPE_SIZE (type)); + + if (bsize == tsize) + { + HSA_SORRY_ATV (EXPR_LOCATION (type), + "support for HSA does not implement a vector type " + "where a type and unit size are equal: %T", type); + return res; + } + + switch (tsize) + { + case 32: + res |= BRIG_TYPE_PACK_32; + break; + case 64: + res |= BRIG_TYPE_PACK_64; + break; + case 128: + res |= BRIG_TYPE_PACK_128; + break; + default: + HSA_SORRY_ATV (EXPR_LOCATION (type), + "support for HSA does not implement type %T", type); + } + } + + if (min32int) + { + /* Registers/immediate operands can only be 32bit or more except for + f16. */ + if (res == BRIG_TYPE_U8 || res == BRIG_TYPE_U16) + res = BRIG_TYPE_U32; + else if (res == BRIG_TYPE_S8 || res == BRIG_TYPE_S16) + res = BRIG_TYPE_S32; + } + + if (TREE_CODE (type) == COMPLEX_TYPE) + { + unsigned bsize = 2 * hsa_type_bit_size (res); + res = hsa_bittype_for_bitsize (bsize); + } + + return res; +} + +/* Returns the BRIG type we need to load/store entities of TYPE. */ + +static BrigType16_t +mem_type_for_type (BrigType16_t type) +{ + /* HSA has non-intuitive constraints on load/store types. If it's + a bit-type it _must_ be B128, if it's not a bit-type it must be + 64bit max. So for loading entities of 128 bits (e.g. vectors) + we have to to B128, while for loading the rest we have to use the + input type (??? or maybe also flattened to a equally sized non-vector + unsigned type?). */ + if ((type & BRIG_TYPE_PACK_MASK) == BRIG_TYPE_PACK_128) + return BRIG_TYPE_B128; + else if (hsa_btype_p (type)) + { + unsigned bitsize = hsa_type_bit_size (type); + if (bitsize < 128) + return hsa_uint_for_bitsize (bitsize); + } + return type; +} + +/* Return HSA type for tree TYPE. If it cannot fit into BrigType16_t, some + kind of array will be generated, setting DIM appropriately. Otherwise, it + will be set to zero. */ + +static BrigType16_t +hsa_type_for_tree_type (const_tree type, unsigned HOST_WIDE_INT *dim_p = NULL, + bool min32int = false) +{ + gcc_checking_assert (TYPE_P (type)); + if (!tree_fits_uhwi_p (TYPE_SIZE_UNIT (type))) + { + HSA_SORRY_ATV (EXPR_LOCATION (type), "support for HSA does not " + "implement huge or variable-sized type %T", type); + return BRIG_TYPE_NONE; + } + + if (RECORD_OR_UNION_TYPE_P (type)) + { + if (dim_p) + *dim_p = tree_to_uhwi (TYPE_SIZE_UNIT (type)); + return BRIG_TYPE_U8 | BRIG_TYPE_ARRAY; + } + + if (TREE_CODE (type) == ARRAY_TYPE) + { + /* We try to be nice and use the real base-type when this is an array of + scalars and only resort to an array of bytes if the type is more + complex. */ + + unsigned HOST_WIDE_INT dim = 1; + + while (TREE_CODE (type) == ARRAY_TYPE) + { + tree domain = TYPE_DOMAIN (type); + if (!TYPE_MIN_VALUE (domain) + || !TYPE_MAX_VALUE (domain) + || !tree_fits_shwi_p (TYPE_MIN_VALUE (domain)) + || !tree_fits_shwi_p (TYPE_MAX_VALUE (domain))) + { + HSA_SORRY_ATV (EXPR_LOCATION (type), + "support for HSA does not implement array %T with " + "unknown bounds", type); + return BRIG_TYPE_NONE; + } + HOST_WIDE_INT min = tree_to_shwi (TYPE_MIN_VALUE (domain)); + HOST_WIDE_INT max = tree_to_shwi (TYPE_MAX_VALUE (domain)); + dim = dim * (unsigned HOST_WIDE_INT) (max - min + 1); + type = TREE_TYPE (type); + } + + BrigType16_t res; + if (RECORD_OR_UNION_TYPE_P (type)) + { + dim = dim * tree_to_uhwi (TYPE_SIZE_UNIT (type)); + res = BRIG_TYPE_U8; + } + else + res = hsa_type_for_scalar_tree_type (type, false); + + if (dim_p) + *dim_p = dim; + return res | BRIG_TYPE_ARRAY; + } + + /* Scalar case: */ + if (dim_p) + *dim_p = 0; + + return hsa_type_for_scalar_tree_type (type, min32int); +} + +/* Returns true if converting from STYPE into DTYPE needs the _CVT + opcode. If false a normal _MOV is enough. */ + +static bool +hsa_needs_cvt (BrigType16_t dtype, BrigType16_t stype) +{ + if (hsa_btype_p (dtype)) + return false; + + /* float <-> int conversions are real converts. */ + if (hsa_type_float_p (dtype) != hsa_type_float_p (stype)) + return true; + /* When both types have different size, then we need CVT as well. */ + if (hsa_type_bit_size (dtype) != hsa_type_bit_size (stype)) + return true; + return false; +} + +/* Lookup or create the associated hsa_symbol structure with a given VAR_DECL + or lookup the hsa_structure corresponding to a PARM_DECL. */ + +static hsa_symbol * +get_symbol_for_decl (tree decl) +{ + hsa_symbol **slot; + hsa_symbol dummy (BRIG_TYPE_NONE, BRIG_SEGMENT_NONE, BRIG_LINKAGE_NONE); + + gcc_assert (TREE_CODE (decl) == PARM_DECL + || TREE_CODE (decl) == RESULT_DECL + || TREE_CODE (decl) == VAR_DECL); + + dummy.m_decl = decl; + + bool is_in_global_vars + = TREE_CODE (decl) == VAR_DECL && is_global_var (decl); + + if (is_in_global_vars) + slot = hsa_global_variable_symbols->find_slot (&dummy, INSERT); + else + slot = hsa_cfun->m_local_symbols->find_slot (&dummy, INSERT); + + gcc_checking_assert (slot); + if (*slot) + { + /* If the symbol is problematic, mark current function also as + problematic. */ + if ((*slot)->m_seen_error) + hsa_fail_cfun (); + + return *slot; + } + else + { + hsa_symbol *sym; + gcc_assert (TREE_CODE (decl) == VAR_DECL); + + if (is_in_global_vars) + { + sym = new hsa_symbol (BRIG_TYPE_NONE, BRIG_SEGMENT_GLOBAL, + BRIG_LINKAGE_PROGRAM, true, + BRIG_ALLOCATION_PROGRAM); + hsa_cfun->m_global_symbols.safe_push (sym); + } + else + { + /* PARM_DECL and RESULT_DECL should be already in m_local_symbols. */ + gcc_assert (TREE_CODE (decl) == VAR_DECL); + + sym = new hsa_symbol (BRIG_TYPE_NONE, BRIG_SEGMENT_PRIVATE, + BRIG_LINKAGE_FUNCTION); + hsa_cfun->m_private_variables.safe_push (sym); + } + + sym->fillup_for_decl (decl); + sym->m_name = hsa_get_declaration_name (decl); + + *slot = sym; + return sym; + } +} + +/* For a given HSA function declaration, return a host + function declaration. */ + +tree +hsa_get_host_function (tree decl) +{ + hsa_function_summary *s + = hsa_summaries->get (cgraph_node::get_create (decl)); + gcc_assert (s->m_kind != HSA_NONE); + gcc_assert (s->m_gpu_implementation_p); + + return s->m_binded_function->decl; +} + +/* Return true if function DECL has a host equivalent function. */ + +static char * +get_brig_function_name (tree decl) +{ + tree d = decl; + + hsa_function_summary *s = hsa_summaries->get (cgraph_node::get_create (d)); + if (s->m_kind != HSA_NONE && s->m_gpu_implementation_p) + d = s->m_binded_function->decl; + + /* IPA split can create a function that has no host equivalent. */ + if (d == NULL) + d = decl; + + char *name = xstrdup (hsa_get_declaration_name (d)); + hsa_sanitize_name (name); + + return name; +} + +/* Create a spill symbol of type TYPE. */ + +hsa_symbol * +hsa_get_spill_symbol (BrigType16_t type) +{ + hsa_symbol *sym = new hsa_symbol (type, BRIG_SEGMENT_SPILL, + BRIG_LINKAGE_FUNCTION); + hsa_cfun->m_spill_symbols.safe_push (sym); + return sym; +} + +/* Create a symbol for a read-only string constant. */ +hsa_symbol * +hsa_get_string_cst_symbol (tree string_cst) +{ + gcc_checking_assert (TREE_CODE (string_cst) == STRING_CST); + + hsa_symbol **slot = hsa_cfun->m_string_constants_map.get (string_cst); + if (slot) + return *slot; + + hsa_op_immed *cst = new hsa_op_immed (string_cst); + hsa_symbol *sym = new hsa_symbol (cst->m_type, BRIG_SEGMENT_GLOBAL, + BRIG_LINKAGE_MODULE, true, + BRIG_ALLOCATION_AGENT); + sym->m_cst_value = cst; + sym->m_dim = TREE_STRING_LENGTH (string_cst); + sym->m_name_number = hsa_cfun->m_global_symbols.length (); + + hsa_cfun->m_global_symbols.safe_push (sym); + hsa_cfun->m_string_constants_map.put (string_cst, sym); + return sym; +} + +/* Constructor of the ancestor of all operands. K is BRIG kind that identified + what the operator is. */ + +hsa_op_base::hsa_op_base (BrigKind16_t k) + : m_next (NULL), m_brig_op_offset (0), m_kind (k) +{ + hsa_operands.safe_push (this); +} + +/* Constructor of ancestor of all operands which have a type. K is BRIG kind + that identified what the operator is. T is the type of the operator. */ + +hsa_op_with_type::hsa_op_with_type (BrigKind16_t k, BrigType16_t t) + : hsa_op_base (k), m_type (t) +{ +} + +hsa_op_with_type * +hsa_op_with_type::get_in_type (BrigType16_t dtype, hsa_bb *hbb) +{ + if (m_type == dtype) + return this; + + hsa_op_reg *dest; + + if (hsa_needs_cvt (dtype, m_type)) + { + dest = new hsa_op_reg (dtype); + hbb->append_insn (new hsa_insn_cvt (dest, this)); + } + else + { + dest = new hsa_op_reg (m_type); + hbb->append_insn (new hsa_insn_basic (2, BRIG_OPCODE_MOV, + dest->m_type, dest, this)); + + /* We cannot simply for instance: 'mov_u32 $_3, 48 (s32)' because + type of the operand must be same as type of the instruction. */ + dest->m_type = dtype; + } + + return dest; +} + +/* Constructor of class representing HSA immediate values. TREE_VAL is the + tree representation of the immediate value. If min32int is true, + always expand integer types to one that has at least 32 bits. */ + +hsa_op_immed::hsa_op_immed (tree tree_val, bool min32int) + : hsa_op_with_type (BRIG_KIND_OPERAND_CONSTANT_BYTES, + hsa_type_for_tree_type (TREE_TYPE (tree_val), NULL, + min32int)), + m_brig_repr (NULL) +{ + if (hsa_seen_error ()) + return; + + gcc_checking_assert ((is_gimple_min_invariant (tree_val) + && (!POINTER_TYPE_P (TREE_TYPE (tree_val)) + || TREE_CODE (tree_val) == INTEGER_CST)) + || TREE_CODE (tree_val) == CONSTRUCTOR); + m_tree_value = tree_val; + m_brig_repr_size = hsa_get_imm_brig_type_len (m_type); + + if (TREE_CODE (m_tree_value) == STRING_CST) + m_brig_repr_size = TREE_STRING_LENGTH (m_tree_value); + else if (TREE_CODE (m_tree_value) == CONSTRUCTOR) + { + m_brig_repr_size + = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (m_tree_value))); + + /* Verify that all elements of a constructor are constants. */ + for (unsigned i = 0; + i < vec_safe_length (CONSTRUCTOR_ELTS (m_tree_value)); i++) + { + tree v = CONSTRUCTOR_ELT (m_tree_value, i)->value; + if (!CONSTANT_CLASS_P (v)) + { + HSA_SORRY_AT (EXPR_LOCATION (tree_val), + "HSA ctor should have only constants"); + return; + } + } + } + + emit_to_buffer (m_tree_value); +} + +/* Constructor of class representing HSA immediate values. INTEGER_VALUE is the + integer representation of the immediate value. TYPE is BRIG type. */ + +hsa_op_immed::hsa_op_immed (HOST_WIDE_INT integer_value, BrigType16_t type) + : hsa_op_with_type (BRIG_KIND_OPERAND_CONSTANT_BYTES, type), + m_tree_value (NULL), m_brig_repr (NULL) +{ + gcc_assert (hsa_type_integer_p (type)); + m_int_value = integer_value; + m_brig_repr_size = hsa_type_bit_size (type) / BITS_PER_UNIT; + + hsa_bytes bytes; + + switch (m_brig_repr_size) + { + case 1: + bytes.b8 = (uint8_t) m_int_value; + break; + case 2: + bytes.b16 = (uint16_t) m_int_value; + break; + case 4: + bytes.b32 = (uint32_t) m_int_value; + break; + case 8: + bytes.b64 = (uint64_t) m_int_value; + break; + default: + gcc_unreachable (); + } + + m_brig_repr = XNEWVEC (char, m_brig_repr_size); + memcpy (m_brig_repr, &bytes, m_brig_repr_size); +} + +hsa_op_immed::hsa_op_immed () + : hsa_op_with_type (BRIG_KIND_NONE, BRIG_TYPE_NONE), m_brig_repr (NULL) +{ +} + +/* New operator to allocate immediate operands from pool alloc. */ + +void * +hsa_op_immed::operator new (size_t) +{ + return hsa_allocp_operand_immed->allocate_raw (); +} + +/* Destructor. */ + +hsa_op_immed::~hsa_op_immed () +{ + free (m_brig_repr); +} + +/* Change type of the immediate value to T. */ + +void +hsa_op_immed::set_type (BrigType16_t t) +{ + m_type = t; +} + +/* Constructor of class representing HSA registers and pseudo-registers. T is + the BRIG type of the new register. */ + +hsa_op_reg::hsa_op_reg (BrigType16_t t) + : hsa_op_with_type (BRIG_KIND_OPERAND_REGISTER, t), m_gimple_ssa (NULL_TREE), + m_def_insn (NULL), m_spill_sym (NULL), m_order (hsa_cfun->m_reg_count++), + m_lr_begin (0), m_lr_end (0), m_reg_class (0), m_hard_num (0) +{ +} + +/* New operator to allocate a register from pool alloc. */ + +void * +hsa_op_reg::operator new (size_t) +{ + return hsa_allocp_operand_reg->allocate_raw (); +} + +/* Verify register operand. */ + +void +hsa_op_reg::verify_ssa () +{ + /* Verify that each HSA register has a definition assigned. + Exceptions are VAR_DECL and PARM_DECL that are a default + definition. */ + gcc_checking_assert (m_def_insn + || (m_gimple_ssa != NULL + && (!SSA_NAME_VAR (m_gimple_ssa) + || (TREE_CODE (SSA_NAME_VAR (m_gimple_ssa)) + != PARM_DECL)) + && SSA_NAME_IS_DEFAULT_DEF (m_gimple_ssa))); + + /* Verify that every use of the register is really present + in an instruction. */ + for (unsigned i = 0; i < m_uses.length (); i++) + { + hsa_insn_basic *use = m_uses[i]; + + bool is_visited = false; + for (unsigned j = 0; j < use->operand_count (); j++) + { + hsa_op_base *u = use->get_op (j); + hsa_op_address *addr; addr = dyn_cast <hsa_op_address *> (u); + if (addr && addr->m_reg) + u = addr->m_reg; + + if (u == this) + { + bool r = !addr && use->op_output_p (j); + + if (r) + { + error ("HSA SSA name defined by instruction that is supposed " + "to be using it"); + debug_hsa_operand (this); + debug_hsa_insn (use); + internal_error ("HSA SSA verification failed"); + } + + is_visited = true; + } + } + + if (!is_visited) + { + error ("HSA SSA name not among operands of instruction that is " + "supposed to use it"); + debug_hsa_operand (this); + debug_hsa_insn (use); + internal_error ("HSA SSA verification failed"); + } + } +} + +hsa_op_address::hsa_op_address (hsa_symbol *sym, hsa_op_reg *r, + HOST_WIDE_INT offset) + : hsa_op_base (BRIG_KIND_OPERAND_ADDRESS), m_symbol (sym), m_reg (r), + m_imm_offset (offset) +{ +} + +hsa_op_address::hsa_op_address (hsa_symbol *sym, HOST_WIDE_INT offset) + : hsa_op_base (BRIG_KIND_OPERAND_ADDRESS), m_symbol (sym), m_reg (NULL), + m_imm_offset (offset) +{ +} + +hsa_op_address::hsa_op_address (hsa_op_reg *r, HOST_WIDE_INT offset) + : hsa_op_base (BRIG_KIND_OPERAND_ADDRESS), m_symbol (NULL), m_reg (r), + m_imm_offset (offset) +{ +} + +/* New operator to allocate address operands from pool alloc. */ + +void * +hsa_op_address::operator new (size_t) +{ + return hsa_allocp_operand_address->allocate_raw (); +} + +/* Constructor of an operand referring to HSAIL code. */ + +hsa_op_code_ref::hsa_op_code_ref () : hsa_op_base (BRIG_KIND_OPERAND_CODE_REF), + m_directive_offset (0) +{ +} + +/* Constructor of an operand representing a code list. Set it up so that it + can contain ELEMENTS number of elements. */ + +hsa_op_code_list::hsa_op_code_list (unsigned elements) + : hsa_op_base (BRIG_KIND_OPERAND_CODE_LIST) +{ + m_offsets.create (1); + m_offsets.safe_grow_cleared (elements); +} + +/* New operator to allocate code list operands from pool alloc. */ + +void * +hsa_op_code_list::operator new (size_t) +{ + return hsa_allocp_operand_code_list->allocate_raw (); +} + +/* Constructor of an operand representing an operand list. + Set it up so that it can contain ELEMENTS number of elements. */ + +hsa_op_operand_list::hsa_op_operand_list (unsigned elements) + : hsa_op_base (BRIG_KIND_OPERAND_OPERAND_LIST) +{ + m_offsets.create (elements); + m_offsets.safe_grow (elements); +} + +/* New operator to allocate operand list operands from pool alloc. */ + +void * +hsa_op_operand_list::operator new (size_t) +{ + return hsa_allocp_operand_operand_list->allocate_raw (); +} + +hsa_op_operand_list::~hsa_op_operand_list () +{ + m_offsets.release (); +} + + +hsa_op_reg * +hsa_function_representation::reg_for_gimple_ssa (tree ssa) +{ + hsa_op_reg *hreg; + + gcc_checking_assert (TREE_CODE (ssa) == SSA_NAME); + if (m_ssa_map[SSA_NAME_VERSION (ssa)]) + return m_ssa_map[SSA_NAME_VERSION (ssa)]; + + hreg = new hsa_op_reg (hsa_type_for_scalar_tree_type (TREE_TYPE (ssa), + true)); + hreg->m_gimple_ssa = ssa; + m_ssa_map[SSA_NAME_VERSION (ssa)] = hreg; + + return hreg; +} + +void +hsa_op_reg::set_definition (hsa_insn_basic *insn) +{ + if (hsa_cfun->m_in_ssa) + { + gcc_checking_assert (!m_def_insn); + m_def_insn = insn; + } + else + m_def_insn = NULL; +} + +/* Constructor of the class which is the bases of all instructions and directly + represents the most basic ones. NOPS is the number of operands that the + operand vector will contain (and which will be cleared). OP is the opcode + of the instruction. This constructor does not set type. */ + +hsa_insn_basic::hsa_insn_basic (unsigned nops, int opc) + : m_prev (NULL), + m_next (NULL), m_bb (NULL), m_opcode (opc), m_number (0), + m_type (BRIG_TYPE_NONE), m_brig_offset (0) +{ + if (nops > 0) + m_operands.safe_grow_cleared (nops); + + hsa_instructions.safe_push (this); +} + +/* Make OP the operand number INDEX of operands of this instruction. If OP is a + register or an address containing a register, then either set the definition + of the register to this instruction if it an output operand or add this + instruction to the uses if it is an input one. */ + +void +hsa_insn_basic::set_op (int index, hsa_op_base *op) +{ + /* Each address operand is always use. */ + hsa_op_address *addr = dyn_cast <hsa_op_address *> (op); + if (addr && addr->m_reg) + addr->m_reg->m_uses.safe_push (this); + else + { + hsa_op_reg *reg = dyn_cast <hsa_op_reg *> (op); + if (reg) + { + if (op_output_p (index)) + reg->set_definition (this); + else + reg->m_uses.safe_push (this); + } + } + + m_operands[index] = op; +} + +/* Get INDEX-th operand of the instruction. */ + +hsa_op_base * +hsa_insn_basic::get_op (int index) +{ + return m_operands[index]; +} + +/* Get address of INDEX-th operand of the instruction. */ + +hsa_op_base ** +hsa_insn_basic::get_op_addr (int index) +{ + return &m_operands[index]; +} + +/* Get number of operands of the instruction. */ +unsigned int +hsa_insn_basic::operand_count () +{ + return m_operands.length (); +} + +/* Constructor of the class which is the bases of all instructions and directly + represents the most basic ones. NOPS is the number of operands that the + operand vector will contain (and which will be cleared). OPC is the opcode + of the instruction, T is the type of the instruction. */ + +hsa_insn_basic::hsa_insn_basic (unsigned nops, int opc, BrigType16_t t, + hsa_op_base *arg0, hsa_op_base *arg1, + hsa_op_base *arg2, hsa_op_base *arg3) + : m_prev (NULL), m_next (NULL), m_bb (NULL), m_opcode (opc),m_number (0), + m_type (t), m_brig_offset (0) +{ + if (nops > 0) + m_operands.safe_grow_cleared (nops); + + if (arg0 != NULL) + { + gcc_checking_assert (nops >= 1); + set_op (0, arg0); + } + + if (arg1 != NULL) + { + gcc_checking_assert (nops >= 2); + set_op (1, arg1); + } + + if (arg2 != NULL) + { + gcc_checking_assert (nops >= 3); + set_op (2, arg2); + } + + if (arg3 != NULL) + { + gcc_checking_assert (nops >= 4); + set_op (3, arg3); + } + + hsa_instructions.safe_push (this); +} + +/* New operator to allocate basic instruction from pool alloc. */ + +void * +hsa_insn_basic::operator new (size_t) +{ + return hsa_allocp_inst_basic->allocate_raw (); +} + +/* Verify the instruction. */ + +void +hsa_insn_basic::verify () +{ + hsa_op_address *addr; + hsa_op_reg *reg; + + /* Iterate all register operands and verify that the instruction + is set in uses of the register. */ + for (unsigned i = 0; i < operand_count (); i++) + { + hsa_op_base *use = get_op (i); + + if ((addr = dyn_cast <hsa_op_address *> (use)) && addr->m_reg) + { + gcc_assert (addr->m_reg->m_def_insn != this); + use = addr->m_reg; + } + + if ((reg = dyn_cast <hsa_op_reg *> (use)) && !op_output_p (i)) + { + unsigned j; + for (j = 0; j < reg->m_uses.length (); j++) + { + if (reg->m_uses[j] == this) + break; + } + + if (j == reg->m_uses.length ()) + { + error ("HSA instruction uses a register but is not among " + "recorded register uses"); + debug_hsa_operand (reg); + debug_hsa_insn (this); + internal_error ("HSA instruction verification failed"); + } + } + } +} + +/* Constructor of an instruction representing a PHI node. NOPS is the number + of operands (equal to the number of predecessors). */ + +hsa_insn_phi::hsa_insn_phi (unsigned nops, hsa_op_reg *dst) + : hsa_insn_basic (nops, HSA_OPCODE_PHI), m_dest (dst) +{ + dst->set_definition (this); +} + +/* New operator to allocate PHI instruction from pool alloc. */ + +void * +hsa_insn_phi::operator new (size_t) +{ + return hsa_allocp_inst_phi->allocate_raw (); +} + +/* Constructor of class representing instruction for conditional jump, CTRL is + the control register determining whether the jump will be carried out, the + new instruction is automatically added to its uses list. */ + +hsa_insn_br::hsa_insn_br (hsa_op_reg *ctrl) + : hsa_insn_basic (1, BRIG_OPCODE_CBR, BRIG_TYPE_B1, ctrl), + m_width (BRIG_WIDTH_1) +{ +} + +/* New operator to allocate branch instruction from pool alloc. */ + +void * +hsa_insn_br::operator new (size_t) +{ + return hsa_allocp_inst_br->allocate_raw (); +} + +/* Constructor of class representing instruction for switch jump, CTRL is + the index register. */ + +hsa_insn_sbr::hsa_insn_sbr (hsa_op_reg *index, unsigned jump_count) + : hsa_insn_basic (1, BRIG_OPCODE_SBR, BRIG_TYPE_B1, index), + m_width (BRIG_WIDTH_1), m_jump_table (vNULL), m_default_bb (NULL), + m_label_code_list (new hsa_op_code_list (jump_count)) +{ +} + +/* New operator to allocate switch branch instruction from pool alloc. */ + +void * +hsa_insn_sbr::operator new (size_t) +{ + return hsa_allocp_inst_sbr->allocate_raw (); +} + +/* Replace all occurrences of OLD_BB with NEW_BB in the statements + jump table. */ + +void +hsa_insn_sbr::replace_all_labels (basic_block old_bb, basic_block new_bb) +{ + for (unsigned i = 0; i < m_jump_table.length (); i++) + if (m_jump_table[i] == old_bb) + m_jump_table[i] = new_bb; +} + +hsa_insn_sbr::~hsa_insn_sbr () +{ + m_jump_table.release (); +} + +/* Constructor of comparison instruction. CMP is the comparison operation and T + is the result type. */ + +hsa_insn_cmp::hsa_insn_cmp (BrigCompareOperation8_t cmp, BrigType16_t t, + hsa_op_base *arg0, hsa_op_base *arg1, + hsa_op_base *arg2) + : hsa_insn_basic (3 , BRIG_OPCODE_CMP, t, arg0, arg1, arg2), m_compare (cmp) +{ +} + +/* New operator to allocate compare instruction from pool alloc. */ + +void * +hsa_insn_cmp::operator new (size_t) +{ + return hsa_allocp_inst_cmp->allocate_raw (); +} + +/* Constructor of classes representing memory accesses. OPC is the opcode (must + be BRIG_OPCODE_ST or BRIG_OPCODE_LD) and T is the type. The instruction + operands are provided as ARG0 and ARG1. */ + +hsa_insn_mem::hsa_insn_mem (int opc, BrigType16_t t, hsa_op_base *arg0, + hsa_op_base *arg1) + : hsa_insn_basic (2, opc, t, arg0, arg1), + m_align (hsa_natural_alignment (t)), m_equiv_class (0) +{ + gcc_checking_assert (opc == BRIG_OPCODE_LD || opc == BRIG_OPCODE_ST); +} + +/* Constructor for descendants allowing different opcodes and number of + operands, it passes its arguments directly to hsa_insn_basic + constructor. The instruction operands are provided as ARG[0-3]. */ + + +hsa_insn_mem::hsa_insn_mem (unsigned nops, int opc, BrigType16_t t, + hsa_op_base *arg0, hsa_op_base *arg1, + hsa_op_base *arg2, hsa_op_base *arg3) + : hsa_insn_basic (nops, opc, t, arg0, arg1, arg2, arg3), + m_align (hsa_natural_alignment (t)), m_equiv_class (0) +{ +} + +/* New operator to allocate memory instruction from pool alloc. */ + +void * +hsa_insn_mem::operator new (size_t) +{ + return hsa_allocp_inst_mem->allocate_raw (); +} + +/* Constructor of class representing atomic instructions and signals. OPC is + the principal opcode, aop is the specific atomic operation opcode. T is the + type of the instruction. The instruction operands + are provided as ARG[0-3]. */ + +hsa_insn_atomic::hsa_insn_atomic (int nops, int opc, + enum BrigAtomicOperation aop, + BrigType16_t t, BrigMemoryOrder memorder, + hsa_op_base *arg0, + hsa_op_base *arg1, hsa_op_base *arg2, + hsa_op_base *arg3) + : hsa_insn_mem (nops, opc, t, arg0, arg1, arg2, arg3), m_atomicop (aop), + m_memoryorder (memorder), + m_memoryscope (BRIG_MEMORY_SCOPE_SYSTEM) +{ + gcc_checking_assert (opc == BRIG_OPCODE_ATOMICNORET || + opc == BRIG_OPCODE_ATOMIC || + opc == BRIG_OPCODE_SIGNAL || + opc == BRIG_OPCODE_SIGNALNORET); +} + +/* New operator to allocate signal instruction from pool alloc. */ + +void * +hsa_insn_atomic::operator new (size_t) +{ + return hsa_allocp_inst_atomic->allocate_raw (); +} + +/* Constructor of class representing signal instructions. OPC is the prinicpal + opcode, sop is the specific signal operation opcode. T is the type of the + instruction. The instruction operands are provided as ARG[0-3]. */ + +hsa_insn_signal::hsa_insn_signal (int nops, int opc, + enum BrigAtomicOperation sop, + BrigType16_t t, hsa_op_base *arg0, + hsa_op_base *arg1, hsa_op_base *arg2, + hsa_op_base *arg3) + : hsa_insn_atomic (nops, opc, sop, t, BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE, + arg0, arg1, arg2, arg3) +{ +} + +/* New operator to allocate signal instruction from pool alloc. */ + +void * +hsa_insn_signal::operator new (size_t) +{ + return hsa_allocp_inst_signal->allocate_raw (); +} + +/* Constructor of class representing segment conversion instructions. OPC is + the opcode which must be either BRIG_OPCODE_STOF or BRIG_OPCODE_FTOS. DEST + and SRCT are destination and source types respectively, SEG is the segment + we are converting to or from. The instruction operands are + provided as ARG0 and ARG1. */ + +hsa_insn_seg::hsa_insn_seg (int opc, BrigType16_t dest, BrigType16_t srct, + BrigSegment8_t seg, hsa_op_base *arg0, + hsa_op_base *arg1) + : hsa_insn_basic (2, opc, dest, arg0, arg1), m_src_type (srct), + m_segment (seg) +{ + gcc_checking_assert (opc == BRIG_OPCODE_STOF || opc == BRIG_OPCODE_FTOS); +} + +/* New operator to allocate address conversion instruction from pool alloc. */ + +void * +hsa_insn_seg::operator new (size_t) +{ + return hsa_allocp_inst_seg->allocate_raw (); +} + +/* Constructor of class representing a call instruction. CALLEE is the tree + representation of the function being called. */ + +hsa_insn_call::hsa_insn_call (tree callee) + : hsa_insn_basic (0, BRIG_OPCODE_CALL), m_called_function (callee), + m_output_arg (NULL), m_args_code_list (NULL), m_result_code_list (NULL) +{ +} + +hsa_insn_call::hsa_insn_call (hsa_internal_fn *fn) + : hsa_insn_basic (0, BRIG_OPCODE_CALL), m_called_function (NULL), + m_called_internal_fn (fn), m_output_arg (NULL), m_args_code_list (NULL), + m_result_code_list (NULL) +{ +} + +/* New operator to allocate call instruction from pool alloc. */ + +void * +hsa_insn_call::operator new (size_t) +{ + return hsa_allocp_inst_call->allocate_raw (); +} + +hsa_insn_call::~hsa_insn_call () +{ + for (unsigned i = 0; i < m_input_args.length (); i++) + delete m_input_args[i]; + + delete m_output_arg; + + m_input_args.release (); + m_input_arg_insns.release (); +} + +/* Constructor of class representing the argument block required to invoke + a call in HSAIL. */ +hsa_insn_arg_block::hsa_insn_arg_block (BrigKind brig_kind, + hsa_insn_call * call) + : hsa_insn_basic (0, HSA_OPCODE_ARG_BLOCK), m_kind (brig_kind), + m_call_insn (call) +{ +} + +/* New operator to allocate argument block instruction from pool alloc. */ + +void * +hsa_insn_arg_block::operator new (size_t) +{ + return hsa_allocp_inst_arg_block->allocate_raw (); +} + +hsa_insn_comment::hsa_insn_comment (const char *s) + : hsa_insn_basic (0, BRIG_KIND_DIRECTIVE_COMMENT) +{ + unsigned l = strlen (s); + + /* Append '// ' to the string. */ + char *buf = XNEWVEC (char, l + 4); + sprintf (buf, "// %s", s); + m_comment = buf; +} + +/* New operator to allocate comment instruction from pool alloc. */ + +void * +hsa_insn_comment::operator new (size_t) +{ + return hsa_allocp_inst_comment->allocate_raw (); +} + +hsa_insn_comment::~hsa_insn_comment () +{ + gcc_checking_assert (m_comment); + free (m_comment); + m_comment = NULL; +} + +/* Constructor of class representing the queue instruction in HSAIL. */ +hsa_insn_queue::hsa_insn_queue (int nops, BrigOpcode opcode) + : hsa_insn_basic (nops, opcode, BRIG_TYPE_U64) +{ +} + +/* New operator to allocate source type instruction from pool alloc. */ + +void * +hsa_insn_srctype::operator new (size_t) +{ + return hsa_allocp_inst_srctype->allocate_raw (); +} + +/* Constructor of class representing the source type instruction in HSAIL. */ + +hsa_insn_srctype::hsa_insn_srctype (int nops, BrigOpcode opcode, + BrigType16_t destt, BrigType16_t srct, + hsa_op_base *arg0, hsa_op_base *arg1, + hsa_op_base *arg2 = NULL) + : hsa_insn_basic (nops, opcode, destt, arg0, arg1, arg2), + m_source_type (srct) +{} + +/* New operator to allocate packed instruction from pool alloc. */ + +void * +hsa_insn_packed::operator new (size_t) +{ + return hsa_allocp_inst_packed->allocate_raw (); +} + +/* Constructor of class representing the packed instruction in HSAIL. */ + +hsa_insn_packed::hsa_insn_packed (int nops, BrigOpcode opcode, + BrigType16_t destt, BrigType16_t srct, + hsa_op_base *arg0, hsa_op_base *arg1, + hsa_op_base *arg2) + : hsa_insn_srctype (nops, opcode, destt, srct, arg0, arg1, arg2) +{ + m_operand_list = new hsa_op_operand_list (nops - 1); +} + +/* New operator to allocate convert instruction from pool alloc. */ + +void * +hsa_insn_cvt::operator new (size_t) +{ + return hsa_allocp_inst_cvt->allocate_raw (); +} + +/* Constructor of class representing the convert instruction in HSAIL. */ + +hsa_insn_cvt::hsa_insn_cvt (hsa_op_with_type *dest, hsa_op_with_type *src) + : hsa_insn_basic (2, BRIG_OPCODE_CVT, dest->m_type, dest, src) +{ +} + +/* New operator to allocate alloca from pool alloc. */ + +void * +hsa_insn_alloca::operator new (size_t) +{ + return hsa_allocp_inst_alloca->allocate_raw (); +} + +/* Constructor of class representing the alloca in HSAIL. */ + +hsa_insn_alloca::hsa_insn_alloca (hsa_op_with_type *dest, + hsa_op_with_type *size, unsigned alignment) + : hsa_insn_basic (2, BRIG_OPCODE_ALLOCA, dest->m_type, dest, size), + m_align (BRIG_ALIGNMENT_8) +{ + gcc_assert (dest->m_type == BRIG_TYPE_U32); + if (alignment) + m_align = hsa_alignment_encoding (alignment); +} + +/* Append an instruction INSN into the basic block. */ + +void +hsa_bb::append_insn (hsa_insn_basic *insn) +{ + gcc_assert (insn->m_opcode != 0 || insn->operand_count () == 0); + gcc_assert (!insn->m_bb); + + insn->m_bb = m_bb; + insn->m_prev = m_last_insn; + insn->m_next = NULL; + if (m_last_insn) + m_last_insn->m_next = insn; + m_last_insn = insn; + if (!m_first_insn) + m_first_insn = insn; +} + +/* Insert HSA instruction NEW_INSN immediately before an existing instruction + OLD_INSN. */ + +static void +hsa_insert_insn_before (hsa_insn_basic *new_insn, hsa_insn_basic *old_insn) +{ + hsa_bb *hbb = hsa_bb_for_bb (old_insn->m_bb); + + if (hbb->m_first_insn == old_insn) + hbb->m_first_insn = new_insn; + new_insn->m_prev = old_insn->m_prev; + new_insn->m_next = old_insn; + if (old_insn->m_prev) + old_insn->m_prev->m_next = new_insn; + old_insn->m_prev = new_insn; +} + +/* Append HSA instruction NEW_INSN immediately after an existing instruction + OLD_INSN. */ + +static void +hsa_append_insn_after (hsa_insn_basic *new_insn, hsa_insn_basic *old_insn) +{ + hsa_bb *hbb = hsa_bb_for_bb (old_insn->m_bb); + + if (hbb->m_last_insn == old_insn) + hbb->m_last_insn = new_insn; + new_insn->m_prev = old_insn; + new_insn->m_next = old_insn->m_next; + if (old_insn->m_next) + old_insn->m_next->m_prev = new_insn; + old_insn->m_next = new_insn; +} + +/* Return a register containing the calculated value of EXP which must be an + expression consisting of PLUS_EXPRs, MULT_EXPRs, NOP_EXPRs, SSA_NAMEs and + integer constants as returned by get_inner_reference. + Newly generated HSA instructions will be appended to HBB. + Perform all calculations in ADDRTYPE. */ + +static hsa_op_with_type * +gen_address_calculation (tree exp, hsa_bb *hbb, BrigType16_t addrtype) +{ + int opcode; + + if (TREE_CODE (exp) == NOP_EXPR) + exp = TREE_OPERAND (exp, 0); + + switch (TREE_CODE (exp)) + { + case SSA_NAME: + return hsa_cfun->reg_for_gimple_ssa (exp)->get_in_type (addrtype, hbb); + + case INTEGER_CST: + { + hsa_op_immed *imm = new hsa_op_immed (exp); + if (addrtype != imm->m_type) + imm->m_type = addrtype; + return imm; + } + + case PLUS_EXPR: + opcode = BRIG_OPCODE_ADD; + break; + + case MULT_EXPR: + opcode = BRIG_OPCODE_MUL; + break; + + default: + gcc_unreachable (); + } + + hsa_op_reg *res = new hsa_op_reg (addrtype); + hsa_insn_basic *insn = new hsa_insn_basic (3, opcode, addrtype); + insn->set_op (0, res); + + hsa_op_with_type *op1 = gen_address_calculation (TREE_OPERAND (exp, 0), hbb, + addrtype); + hsa_op_with_type *op2 = gen_address_calculation (TREE_OPERAND (exp, 1), hbb, + addrtype); + insn->set_op (1, op1); + insn->set_op (2, op2); + + hbb->append_insn (insn); + return res; +} + +/* If R1 is NULL, just return R2, otherwise append an instruction adding them + to HBB and return the register holding the result. */ + +static hsa_op_reg * +add_addr_regs_if_needed (hsa_op_reg *r1, hsa_op_reg *r2, hsa_bb *hbb) +{ + gcc_checking_assert (r2); + if (!r1) + return r2; + + hsa_op_reg *res = new hsa_op_reg (r1->m_type); + gcc_assert (!hsa_needs_cvt (r1->m_type, r2->m_type)); + hsa_insn_basic *insn = new hsa_insn_basic (3, BRIG_OPCODE_ADD, res->m_type); + insn->set_op (0, res); + insn->set_op (1, r1); + insn->set_op (2, r2); + hbb->append_insn (insn); + return res; +} + +/* Helper of gen_hsa_addr. Update *SYMBOL, *ADDRTYPE, *REG and *OFFSET to + reflect BASE which is the first operand of a MEM_REF or a TARGET_MEM_REF. */ + +static void +process_mem_base (tree base, hsa_symbol **symbol, BrigType16_t *addrtype, + hsa_op_reg **reg, offset_int *offset, hsa_bb *hbb) +{ + if (TREE_CODE (base) == SSA_NAME) + { + gcc_assert (!*reg); + hsa_op_with_type *ssa + = hsa_cfun->reg_for_gimple_ssa (base)->get_in_type (*addrtype, hbb); + *reg = dyn_cast <hsa_op_reg *> (ssa); + } + else if (TREE_CODE (base) == ADDR_EXPR) + { + tree decl = TREE_OPERAND (base, 0); + + if (!DECL_P (decl) || TREE_CODE (decl) == FUNCTION_DECL) + { + HSA_SORRY_AT (EXPR_LOCATION (base), + "support for HSA does not implement a memory reference " + "to a non-declaration type"); + return; + } + + gcc_assert (!*symbol); + + *symbol = get_symbol_for_decl (decl); + *addrtype = hsa_get_segment_addr_type ((*symbol)->m_segment); + } + else if (TREE_CODE (base) == INTEGER_CST) + *offset += wi::to_offset (base); + else + gcc_unreachable (); +} + +/* Forward declaration of a function. */ + +static void +gen_hsa_addr_insns (tree val, hsa_op_reg *dest, hsa_bb *hbb); + +/* Generate HSA address operand for a given tree memory reference REF. If + instructions need to be created to calculate the address, they will be added + to the end of HBB. If a caller provider OUTPUT_BITSIZE and OUTPUT_BITPOS, + the function assumes that the caller will handle possible + bit-field references. Otherwise if we reference a bit-field, sorry message + is displayed. */ + +static hsa_op_address * +gen_hsa_addr (tree ref, hsa_bb *hbb, HOST_WIDE_INT *output_bitsize = NULL, + HOST_WIDE_INT *output_bitpos = NULL) +{ + hsa_symbol *symbol = NULL; + hsa_op_reg *reg = NULL; + offset_int offset = 0; + tree origref = ref; + tree varoffset = NULL_TREE; + BrigType16_t addrtype = hsa_get_segment_addr_type (BRIG_SEGMENT_FLAT); + HOST_WIDE_INT bitsize = 0, bitpos = 0; + BrigType16_t flat_addrtype = hsa_get_segment_addr_type (BRIG_SEGMENT_FLAT); + + if (TREE_CODE (ref) == STRING_CST) + { + symbol = hsa_get_string_cst_symbol (ref); + goto out; + } + else if (TREE_CODE (ref) == BIT_FIELD_REF + && ((tree_to_uhwi (TREE_OPERAND (ref, 1)) % BITS_PER_UNIT) != 0 + || (tree_to_uhwi (TREE_OPERAND (ref, 2)) % BITS_PER_UNIT) != 0)) + { + HSA_SORRY_ATV (EXPR_LOCATION (origref), + "support for HSA does not implement " + "bit field references such as %E", ref); + goto out; + } + + if (handled_component_p (ref)) + { + enum machine_mode mode; + int unsignedp, volatilep, preversep; + + ref = get_inner_reference (ref, &bitsize, &bitpos, &varoffset, &mode, + &unsignedp, &preversep, &volatilep, false); + + offset = bitpos; + offset = wi::rshift (offset, LOG2_BITS_PER_UNIT, SIGNED); + } + + switch (TREE_CODE (ref)) + { + case ADDR_EXPR: + { + addrtype = hsa_get_segment_addr_type (BRIG_SEGMENT_PRIVATE); + symbol = hsa_cfun->create_hsa_temporary (flat_addrtype); + hsa_op_reg *r = new hsa_op_reg (flat_addrtype); + gen_hsa_addr_insns (ref, r, hbb); + hbb->append_insn (new hsa_insn_mem (BRIG_OPCODE_ST, r->m_type, + r, new hsa_op_address (symbol))); + + break; + } + case SSA_NAME: + { + addrtype = hsa_get_segment_addr_type (BRIG_SEGMENT_PRIVATE); + symbol = hsa_cfun->create_hsa_temporary (flat_addrtype); + hsa_op_reg *r = hsa_cfun->reg_for_gimple_ssa (ref); + + hbb->append_insn (new hsa_insn_mem (BRIG_OPCODE_ST, r->m_type, + r, new hsa_op_address (symbol))); + + break; + } + case PARM_DECL: + case VAR_DECL: + case RESULT_DECL: + gcc_assert (!symbol); + symbol = get_symbol_for_decl (ref); + addrtype = hsa_get_segment_addr_type (symbol->m_segment); + break; + + case MEM_REF: + process_mem_base (TREE_OPERAND (ref, 0), &symbol, &addrtype, ®, + &offset, hbb); + + if (!integer_zerop (TREE_OPERAND (ref, 1))) + offset += wi::to_offset (TREE_OPERAND (ref, 1)); + break; + + case TARGET_MEM_REF: + process_mem_base (TMR_BASE (ref), &symbol, &addrtype, ®, &offset, hbb); + if (TMR_INDEX (ref)) + { + hsa_op_reg *disp1; + hsa_op_base *idx = hsa_cfun->reg_for_gimple_ssa + (TMR_INDEX (ref))->get_in_type (addrtype, hbb); + if (TMR_STEP (ref) && !integer_onep (TMR_STEP (ref))) + { + disp1 = new hsa_op_reg (addrtype); + hsa_insn_basic *insn = new hsa_insn_basic (3, BRIG_OPCODE_MUL, + addrtype); + + /* As step must respect addrtype, we overwrite the type + of an immediate value. */ + hsa_op_immed *step = new hsa_op_immed (TMR_STEP (ref)); + step->m_type = addrtype; + + insn->set_op (0, disp1); + insn->set_op (1, idx); + insn->set_op (2, step); + hbb->append_insn (insn); + } + else + disp1 = as_a <hsa_op_reg *> (idx); + reg = add_addr_regs_if_needed (reg, disp1, hbb); + } + if (TMR_INDEX2 (ref)) + { + hsa_op_base *disp2 = hsa_cfun->reg_for_gimple_ssa + (TMR_INDEX2 (ref))->get_in_type (addrtype, hbb); + reg = add_addr_regs_if_needed (reg, as_a <hsa_op_reg *> (disp2), hbb); + } + offset += wi::to_offset (TMR_OFFSET (ref)); + break; + case FUNCTION_DECL: + HSA_SORRY_AT (EXPR_LOCATION (origref), + "support for HSA does not implement function pointers"); + goto out; + default: + HSA_SORRY_ATV (EXPR_LOCATION (origref), "support for HSA does " + "not implement memory access to %E", origref); + goto out; + } + + if (varoffset) + { + if (TREE_CODE (varoffset) == INTEGER_CST) + offset += wi::to_offset (varoffset); + else + { + hsa_op_base *off_op = gen_address_calculation (varoffset, hbb, + addrtype); + reg = add_addr_regs_if_needed (reg, as_a <hsa_op_reg *> (off_op), + hbb); + } + } + + gcc_checking_assert ((symbol + && addrtype + == hsa_get_segment_addr_type (symbol->m_segment)) + || (!symbol + && addrtype + == hsa_get_segment_addr_type (BRIG_SEGMENT_FLAT))); +out: + HOST_WIDE_INT hwi_offset = offset.to_shwi (); + + /* Calculate remaining bitsize offset (if presented). */ + bitpos %= BITS_PER_UNIT; + /* If bitsize is a power of two that is greater or equal to BITS_PER_UNIT, it + is not a reason to think this is a bit-field access. */ + if (bitpos == 0 + && (bitsize >= BITS_PER_UNIT) + && !(bitsize & (bitsize - 1))) + bitsize = 0; + + if ((bitpos || bitsize) && (output_bitpos == NULL || output_bitsize == NULL)) + HSA_SORRY_ATV (EXPR_LOCATION (origref), "support for HSA does not " + "implement unhandled bit field reference such as %E", ref); + + if (output_bitsize != NULL && output_bitpos != NULL) + { + *output_bitsize = bitsize; + *output_bitpos = bitpos; + } + + return new hsa_op_address (symbol, reg, hwi_offset); +} + +/* Generate HSA address for a function call argument of given TYPE. + INDEX is used to generate corresponding name of the arguments. + Special value -1 represents fact that result value is created. */ + +static hsa_op_address * +gen_hsa_addr_for_arg (tree tree_type, int index) +{ + hsa_symbol *sym = new hsa_symbol (BRIG_TYPE_NONE, BRIG_SEGMENT_ARG, + BRIG_LINKAGE_ARG); + sym->m_type = hsa_type_for_tree_type (tree_type, &sym->m_dim); + + if (index == -1) /* Function result. */ + sym->m_name = "res"; + else /* Function call arguments. */ + { + sym->m_name = NULL; + sym->m_name_number = index; + } + + return new hsa_op_address (sym); +} + +/* Generate HSA instructions that calculate address of VAL including all + necessary conversions to flat addressing and place the result into DEST. + Instructions are appended to HBB. */ + +static void +gen_hsa_addr_insns (tree val, hsa_op_reg *dest, hsa_bb *hbb) +{ + /* Handle cases like tmp = NULL, where we just emit a move instruction + to a register. */ + if (TREE_CODE (val) == INTEGER_CST) + { + hsa_op_immed *c = new hsa_op_immed (val); + hsa_insn_basic *insn = new hsa_insn_basic (2, BRIG_OPCODE_MOV, + dest->m_type, dest, c); + hbb->append_insn (insn); + return; + } + + hsa_op_address *addr; + + gcc_assert (dest->m_type == hsa_get_segment_addr_type (BRIG_SEGMENT_FLAT)); + if (TREE_CODE (val) == ADDR_EXPR) + val = TREE_OPERAND (val, 0); + addr = gen_hsa_addr (val, hbb); + hsa_insn_basic *insn = new hsa_insn_basic (2, BRIG_OPCODE_LDA); + insn->set_op (1, addr); + if (addr->m_symbol && addr->m_symbol->m_segment != BRIG_SEGMENT_GLOBAL) + { + /* LDA produces segment-relative address, we need to convert + it to the flat one. */ + hsa_op_reg *tmp; + tmp = new hsa_op_reg (hsa_get_segment_addr_type + (addr->m_symbol->m_segment)); + hsa_insn_seg *seg; + seg = new hsa_insn_seg (BRIG_OPCODE_STOF, + hsa_get_segment_addr_type (BRIG_SEGMENT_FLAT), + tmp->m_type, addr->m_symbol->m_segment, dest, + tmp); + + insn->set_op (0, tmp); + insn->m_type = tmp->m_type; + hbb->append_insn (insn); + hbb->append_insn (seg); + } + else + { + insn->set_op (0, dest); + insn->m_type = hsa_get_segment_addr_type (BRIG_SEGMENT_FLAT); + hbb->append_insn (insn); + } +} + +/* Return an HSA register or HSA immediate value operand corresponding to + gimple operand OP. */ + +static hsa_op_with_type * +hsa_reg_or_immed_for_gimple_op (tree op, hsa_bb *hbb) +{ + hsa_op_reg *tmp; + + if (TREE_CODE (op) == SSA_NAME) + tmp = hsa_cfun->reg_for_gimple_ssa (op); + else if (!POINTER_TYPE_P (TREE_TYPE (op))) + return new hsa_op_immed (op); + else + { + tmp = new hsa_op_reg (hsa_get_segment_addr_type (BRIG_SEGMENT_FLAT)); + gen_hsa_addr_insns (op, tmp, hbb); + } + return tmp; +} + +/* Create a simple movement instruction with register destination DEST and + register or immediate source SRC and append it to the end of HBB. */ + +void +hsa_build_append_simple_mov (hsa_op_reg *dest, hsa_op_base *src, hsa_bb *hbb) +{ + hsa_insn_basic *insn = new hsa_insn_basic (2, BRIG_OPCODE_MOV, dest->m_type, + dest, src); + if (hsa_op_reg *sreg = dyn_cast <hsa_op_reg *> (src)) + gcc_assert (hsa_type_bit_size (dest->m_type) + == hsa_type_bit_size (sreg->m_type)); + else + gcc_assert (hsa_type_bit_size (dest->m_type) + == hsa_type_bit_size (as_a <hsa_op_immed *> (src)->m_type)); + + hbb->append_insn (insn); +} + +/* Generate HSAIL instructions loading a bit field into register DEST. + VALUE_REG is a register of a SSA name that is used in the bit field + reference. To identify a bit field BITPOS is offset to the loaded memory + and BITSIZE is number of bits of the bit field. + Add instructions to HBB. */ + +static void +gen_hsa_insns_for_bitfield (hsa_op_reg *dest, hsa_op_reg *value_reg, + HOST_WIDE_INT bitsize, HOST_WIDE_INT bitpos, + hsa_bb *hbb) +{ + unsigned type_bitsize = hsa_type_bit_size (dest->m_type); + unsigned left_shift = type_bitsize - (bitsize + bitpos); + unsigned right_shift = left_shift + bitpos; + + if (left_shift) + { + hsa_op_reg *value_reg_2 = new hsa_op_reg (dest->m_type); + hsa_op_immed *c = new hsa_op_immed (left_shift, BRIG_TYPE_U32); + + hsa_insn_basic *lshift + = new hsa_insn_basic (3, BRIG_OPCODE_SHL, value_reg_2->m_type, + value_reg_2, value_reg, c); + + hbb->append_insn (lshift); + + value_reg = value_reg_2; + } + + if (right_shift) + { + hsa_op_reg *value_reg_2 = new hsa_op_reg (dest->m_type); + hsa_op_immed *c = new hsa_op_immed (right_shift, BRIG_TYPE_U32); + + hsa_insn_basic *rshift + = new hsa_insn_basic (3, BRIG_OPCODE_SHR, value_reg_2->m_type, + value_reg_2, value_reg, c); + + hbb->append_insn (rshift); + + value_reg = value_reg_2; + } + + hsa_insn_basic *assignment + = new hsa_insn_basic (2, BRIG_OPCODE_MOV, dest->m_type, dest, value_reg); + hbb->append_insn (assignment); +} + + +/* Generate HSAIL instructions loading a bit field into register DEST. ADDR is + prepared memory address which is used to load the bit field. To identify a + bit field BITPOS is offset to the loaded memory and BITSIZE is number of + bits of the bit field. Add instructions to HBB. Load must be performed in + alignment ALIGN. */ + +static void +gen_hsa_insns_for_bitfield_load (hsa_op_reg *dest, hsa_op_address *addr, + HOST_WIDE_INT bitsize, HOST_WIDE_INT bitpos, + hsa_bb *hbb, BrigAlignment8_t align) +{ + hsa_op_reg *value_reg = new hsa_op_reg (dest->m_type); + hsa_insn_mem *mem = new hsa_insn_mem (BRIG_OPCODE_LD, dest->m_type, value_reg, + addr); + mem->set_align (align); + hbb->append_insn (mem); + gen_hsa_insns_for_bitfield (dest, value_reg, bitsize, bitpos, hbb); +} + +/* Return the alignment of base memory accesses we issue to perform bit-field + memory access REF. */ + +static BrigAlignment8_t +hsa_bitmemref_alignment (tree ref) +{ + unsigned HOST_WIDE_INT bit_offset = 0; + + while (true) + { + if (TREE_CODE (ref) == BIT_FIELD_REF) + { + if (!tree_fits_uhwi_p (TREE_OPERAND (ref, 2))) + return BRIG_ALIGNMENT_1; + bit_offset += tree_to_uhwi (TREE_OPERAND (ref, 2)); + } + else if (TREE_CODE (ref) == COMPONENT_REF + && DECL_BIT_FIELD (TREE_OPERAND (ref, 1))) + bit_offset += int_bit_position (TREE_OPERAND (ref, 1)); + else + break; + ref = TREE_OPERAND (ref, 0); + } + + unsigned HOST_WIDE_INT bits = bit_offset % BITS_PER_UNIT; + unsigned HOST_WIDE_INT byte_bits = bit_offset - bits; + BrigAlignment8_t base = hsa_alignment_encoding (get_object_alignment (ref)); + if (byte_bits == 0) + return base; + return MIN (base, hsa_alignment_encoding (byte_bits & -byte_bits)); +} + +/* Generate HSAIL instructions loading something into register DEST. RHS is + tree representation of the loaded data, which are loaded as type TYPE. Add + instructions to HBB. */ + +static void +gen_hsa_insns_for_load (hsa_op_reg *dest, tree rhs, tree type, hsa_bb *hbb) +{ + /* The destination SSA name will give us the type. */ + if (TREE_CODE (rhs) == VIEW_CONVERT_EXPR) + rhs = TREE_OPERAND (rhs, 0); + + if (TREE_CODE (rhs) == SSA_NAME) + { + hsa_op_reg *src = hsa_cfun->reg_for_gimple_ssa (rhs); + hsa_build_append_simple_mov (dest, src, hbb); + } + else if (is_gimple_min_invariant (rhs) + || TREE_CODE (rhs) == ADDR_EXPR) + { + if (POINTER_TYPE_P (TREE_TYPE (rhs))) + { + if (dest->m_type != hsa_get_segment_addr_type (BRIG_SEGMENT_FLAT)) + { + HSA_SORRY_ATV (EXPR_LOCATION (rhs), + "support for HSA does not implement conversion " + "of %E to the requested non-pointer type.", rhs); + return; + } + + gen_hsa_addr_insns (rhs, dest, hbb); + } + else if (TREE_CODE (rhs) == COMPLEX_CST) + { + hsa_op_immed *real_part = new hsa_op_immed (TREE_REALPART (rhs)); + hsa_op_immed *imag_part = new hsa_op_immed (TREE_IMAGPART (rhs)); + + hsa_op_reg *real_part_reg + = new hsa_op_reg (hsa_type_for_scalar_tree_type (TREE_TYPE (type), + true)); + hsa_op_reg *imag_part_reg + = new hsa_op_reg (hsa_type_for_scalar_tree_type (TREE_TYPE (type), + true)); + + hsa_build_append_simple_mov (real_part_reg, real_part, hbb); + hsa_build_append_simple_mov (imag_part_reg, imag_part, hbb); + + BrigType16_t src_type = hsa_bittype_for_type (real_part_reg->m_type); + + hsa_insn_packed *insn + = new hsa_insn_packed (3, BRIG_OPCODE_COMBINE, dest->m_type, + src_type, dest, real_part_reg, + imag_part_reg); + hbb->append_insn (insn); + } + else + { + hsa_op_immed *imm = new hsa_op_immed (rhs); + hsa_build_append_simple_mov (dest, imm, hbb); + } + } + else if (TREE_CODE (rhs) == REALPART_EXPR || TREE_CODE (rhs) == IMAGPART_EXPR) + { + tree pack_type = TREE_TYPE (TREE_OPERAND (rhs, 0)); + + hsa_op_reg *packed_reg + = new hsa_op_reg (hsa_type_for_scalar_tree_type (pack_type, true)); + + tree complex_rhs = TREE_OPERAND (rhs, 0); + gen_hsa_insns_for_load (packed_reg, complex_rhs, TREE_TYPE (complex_rhs), + hbb); + + hsa_op_reg *real_reg + = new hsa_op_reg (hsa_type_for_scalar_tree_type (type, true)); + + hsa_op_reg *imag_reg + = new hsa_op_reg (hsa_type_for_scalar_tree_type (type, true)); + + BrigKind16_t brig_type = packed_reg->m_type; + hsa_insn_packed *packed + = new hsa_insn_packed (3, BRIG_OPCODE_EXPAND, + hsa_bittype_for_type (real_reg->m_type), + brig_type, real_reg, imag_reg, packed_reg); + + hbb->append_insn (packed); + + hsa_op_reg *source = TREE_CODE (rhs) == REALPART_EXPR ? + real_reg : imag_reg; + + hsa_insn_basic *insn = new hsa_insn_basic (2, BRIG_OPCODE_MOV, + dest->m_type, dest, source); + + hbb->append_insn (insn); + } + else if (TREE_CODE (rhs) == BIT_FIELD_REF + && TREE_CODE (TREE_OPERAND (rhs, 0)) == SSA_NAME) + { + tree ssa_name = TREE_OPERAND (rhs, 0); + HOST_WIDE_INT bitsize = tree_to_uhwi (TREE_OPERAND (rhs, 1)); + HOST_WIDE_INT bitpos = tree_to_uhwi (TREE_OPERAND (rhs, 2)); + + hsa_op_reg *imm_value = hsa_cfun->reg_for_gimple_ssa (ssa_name); + gen_hsa_insns_for_bitfield (dest, imm_value, bitsize, bitpos, hbb); + } + else if (DECL_P (rhs) || TREE_CODE (rhs) == MEM_REF + || TREE_CODE (rhs) == TARGET_MEM_REF + || handled_component_p (rhs)) + { + HOST_WIDE_INT bitsize, bitpos; + + /* Load from memory. */ + hsa_op_address *addr; + addr = gen_hsa_addr (rhs, hbb, &bitsize, &bitpos); + + /* Handle load of a bit field. */ + if (bitsize > 64) + { + HSA_SORRY_AT (EXPR_LOCATION (rhs), + "support for HSA does not implement load from a bit " + "field bigger than 64 bits"); + return; + } + + if (bitsize || bitpos) + gen_hsa_insns_for_bitfield_load (dest, addr, bitsize, bitpos, hbb, + hsa_bitmemref_alignment (rhs)); + else + { + BrigType16_t mtype; + /* Not dest->m_type, that's possibly extended. */ + mtype = mem_type_for_type (hsa_type_for_scalar_tree_type (type, + false)); + hsa_insn_mem *mem = new hsa_insn_mem (BRIG_OPCODE_LD, mtype, dest, + addr); + mem->set_align (hsa_alignment_encoding (get_object_alignment (rhs))); + hbb->append_insn (mem); + } + } + else + HSA_SORRY_ATV (EXPR_LOCATION (rhs), + "support for HSA does not implement loading " + "of expression %E", + rhs); +} + +/* Return number of bits necessary for representation of a bit field, + starting at BITPOS with size of BITSIZE. */ + +static unsigned +get_bitfield_size (unsigned bitpos, unsigned bitsize) +{ + unsigned s = bitpos + bitsize; + unsigned sizes[] = {8, 16, 32, 64}; + + for (unsigned i = 0; i < 4; i++) + if (s <= sizes[i]) + return sizes[i]; + + gcc_unreachable (); + return 0; +} + +/* Generate HSAIL instructions storing into memory. LHS is the destination of + the store, SRC is the source operand. Add instructions to HBB. */ + +static void +gen_hsa_insns_for_store (tree lhs, hsa_op_base *src, hsa_bb *hbb) +{ + HOST_WIDE_INT bitsize = 0, bitpos = 0; + BrigAlignment8_t req_align; + BrigType16_t mtype; + mtype = mem_type_for_type (hsa_type_for_scalar_tree_type (TREE_TYPE (lhs), + false)); + hsa_op_address *addr; + addr = gen_hsa_addr (lhs, hbb, &bitsize, &bitpos); + + /* Handle store to a bit field. */ + if (bitsize > 64) + { + HSA_SORRY_AT (EXPR_LOCATION (lhs), + "support for HSA does not implement store to a bit field " + "bigger than 64 bits"); + return; + } + + unsigned type_bitsize = get_bitfield_size (bitpos, bitsize); + + /* HSAIL does not support MOV insn with 16-bits integers. */ + if (type_bitsize < 32) + type_bitsize = 32; + + if (bitpos || (bitsize && type_bitsize != bitsize)) + { + unsigned HOST_WIDE_INT mask = 0; + BrigType16_t mem_type + = get_integer_type_by_bytes (type_bitsize / BITS_PER_UNIT, + !TYPE_UNSIGNED (TREE_TYPE (lhs))); + + for (unsigned i = 0; i < type_bitsize; i++) + if (i < bitpos || i >= bitpos + bitsize) + mask |= ((unsigned HOST_WIDE_INT)1 << i); + + hsa_op_reg *value_reg = new hsa_op_reg (mem_type); + + req_align = hsa_bitmemref_alignment (lhs); + /* Load value from memory. */ + hsa_insn_mem *mem = new hsa_insn_mem (BRIG_OPCODE_LD, mem_type, + value_reg, addr); + mem->set_align (req_align); + hbb->append_insn (mem); + + /* AND the loaded value with prepared mask. */ + hsa_op_reg *cleared_reg = new hsa_op_reg (mem_type); + + BrigType16_t t + = get_integer_type_by_bytes (type_bitsize / BITS_PER_UNIT, false); + hsa_op_immed *c = new hsa_op_immed (mask, t); + + hsa_insn_basic *clearing + = new hsa_insn_basic (3, BRIG_OPCODE_AND, mem_type, cleared_reg, + value_reg, c); + hbb->append_insn (clearing); + + /* Shift to left a value that is going to be stored. */ + hsa_op_reg *new_value_reg = new hsa_op_reg (mem_type); + + hsa_insn_basic *basic = new hsa_insn_basic (2, BRIG_OPCODE_MOV, mem_type, + new_value_reg, src); + hbb->append_insn (basic); + + if (bitpos) + { + hsa_op_reg *shifted_value_reg = new hsa_op_reg (mem_type); + c = new hsa_op_immed (bitpos, BRIG_TYPE_U32); + + hsa_insn_basic *basic + = new hsa_insn_basic (3, BRIG_OPCODE_SHL, mem_type, + shifted_value_reg, new_value_reg, c); + hbb->append_insn (basic); + + new_value_reg = shifted_value_reg; + } + + /* OR the prepared value with prepared chunk loaded from memory. */ + hsa_op_reg *prepared_reg= new hsa_op_reg (mem_type); + basic = new hsa_insn_basic (3, BRIG_OPCODE_OR, mem_type, prepared_reg, + new_value_reg, cleared_reg); + hbb->append_insn (basic); + + src = prepared_reg; + mtype = mem_type; + } + else + req_align = hsa_alignment_encoding (get_object_alignment (lhs)); + + hsa_insn_mem *mem = new hsa_insn_mem (BRIG_OPCODE_ST, mtype, src, addr); + mem->set_align (req_align); + + /* The HSAIL verifier has another constraint: if the source is an immediate + then it must match the destination type. If it's a register the low bits + will be used for sub-word stores. We're always allocating new operands so + we can modify the above in place. */ + if (hsa_op_immed *imm = dyn_cast <hsa_op_immed *> (src)) + { + if ((imm->m_type & BRIG_TYPE_PACK_MASK) == BRIG_TYPE_PACK_NONE) + imm->m_type = mem->m_type; + else + { + /* ...and all vector immediates apparently need to be vectors of + unsigned bytes. */ + unsigned bs = hsa_type_bit_size (imm->m_type); + gcc_assert (bs == hsa_type_bit_size (mem->m_type)); + switch (bs) + { + case 32: + imm->m_type = BRIG_TYPE_U8X4; + break; + case 64: + imm->m_type = BRIG_TYPE_U8X8; + break; + case 128: + imm->m_type = BRIG_TYPE_U8X16; + break; + default: + gcc_unreachable (); + } + } + } + + hbb->append_insn (mem); +} + +/* Generate memory copy instructions that are going to be used + for copying a HSA symbol SRC_SYMBOL (or SRC_REG) to TARGET memory, + represented by pointer in a register. */ + +static void +gen_hsa_memory_copy (hsa_bb *hbb, hsa_op_address *target, hsa_op_address *src, + unsigned size) +{ + hsa_op_address *addr; + hsa_insn_mem *mem; + + unsigned offset = 0; + + while (size) + { + unsigned s; + if (size >= 8) + s = 8; + else if (size >= 4) + s = 4; + else if (size >= 2) + s = 2; + else + s = 1; + + BrigType16_t t = get_integer_type_by_bytes (s, false); + + hsa_op_reg *tmp = new hsa_op_reg (t); + addr = new hsa_op_address (src->m_symbol, src->m_reg, + src->m_imm_offset + offset); + mem = new hsa_insn_mem (BRIG_OPCODE_LD, t, tmp, addr); + hbb->append_insn (mem); + + addr = new hsa_op_address (target->m_symbol, target->m_reg, + target->m_imm_offset + offset); + mem = new hsa_insn_mem (BRIG_OPCODE_ST, t, tmp, addr); + hbb->append_insn (mem); + offset += s; + size -= s; + } +} + +/* Create a memset mask that is created by copying a CONSTANT byte value + to an integer of BYTE_SIZE bytes. */ + +static unsigned HOST_WIDE_INT +build_memset_value (unsigned HOST_WIDE_INT constant, unsigned byte_size) +{ + if (constant == 0) + return 0; + + HOST_WIDE_INT v = constant; + + for (unsigned i = 1; i < byte_size; i++) + v |= constant << (8 * i); + + return v; +} + +/* Generate memory set instructions that are going to be used + for setting a CONSTANT byte value to TARGET memory of SIZE bytes. */ + +static void +gen_hsa_memory_set (hsa_bb *hbb, hsa_op_address *target, + unsigned HOST_WIDE_INT constant, + unsigned size) +{ + hsa_op_address *addr; + hsa_insn_mem *mem; + + unsigned offset = 0; + + while (size) + { + unsigned s; + if (size >= 8) + s = 8; + else if (size >= 4) + s = 4; + else if (size >= 2) + s = 2; + else + s = 1; + + addr = new hsa_op_address (target->m_symbol, target->m_reg, + target->m_imm_offset + offset); + + BrigType16_t t = get_integer_type_by_bytes (s, false); + HOST_WIDE_INT c = build_memset_value (constant, s); + + mem = new hsa_insn_mem (BRIG_OPCODE_ST, t, new hsa_op_immed (c, t), + addr); + hbb->append_insn (mem); + offset += s; + size -= s; + } +} + +/* Generate HSAIL instructions for a single assignment + of an empty constructor to an ADDR_LHS. Constructor is passed as a + tree RHS and all instructions are appended to HBB. */ + +void +gen_hsa_ctor_assignment (hsa_op_address *addr_lhs, tree rhs, hsa_bb *hbb) +{ + if (vec_safe_length (CONSTRUCTOR_ELTS (rhs))) + { + HSA_SORRY_AT (EXPR_LOCATION (rhs), + "support for HSA does not implement load from constructor"); + return; + } + + unsigned size = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (rhs))); + gen_hsa_memory_set (hbb, addr_lhs, 0, size); +} + +/* Generate HSA instructions for a single assignment of RHS to LHS. + HBB is the basic block they will be appended to. */ + +static void +gen_hsa_insns_for_single_assignment (tree lhs, tree rhs, hsa_bb *hbb) +{ + if (TREE_CODE (lhs) == SSA_NAME) + { + hsa_op_reg *dest = hsa_cfun->reg_for_gimple_ssa (lhs); + if (hsa_seen_error ()) + return; + + gen_hsa_insns_for_load (dest, rhs, TREE_TYPE (lhs), hbb); + } + else if (TREE_CODE (rhs) == SSA_NAME + || (is_gimple_min_invariant (rhs) && TREE_CODE (rhs) != STRING_CST)) + { + /* Store to memory. */ + hsa_op_base *src = hsa_reg_or_immed_for_gimple_op (rhs, hbb); + if (hsa_seen_error ()) + return; + + gen_hsa_insns_for_store (lhs, src, hbb); + } + else + { + hsa_op_address *addr_lhs = gen_hsa_addr (lhs, hbb); + + if (TREE_CODE (rhs) == CONSTRUCTOR) + gen_hsa_ctor_assignment (addr_lhs, rhs, hbb); + else + { + hsa_op_address *addr_rhs = gen_hsa_addr (rhs, hbb); + + unsigned size = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (rhs))); + gen_hsa_memory_copy (hbb, addr_lhs, addr_rhs, size); + } + } +} + +/* Prepend before INSN a load from spill symbol of SPILL_REG. Return the + register into which we loaded. If this required another register to convert + from a B1 type, return it in *PTMP2, otherwise store NULL into it. We + assume we are out of SSA so the returned register does not have its + definition set. */ + +hsa_op_reg * +hsa_spill_in (hsa_insn_basic *insn, hsa_op_reg *spill_reg, hsa_op_reg **ptmp2) +{ + hsa_symbol *spill_sym = spill_reg->m_spill_sym; + hsa_op_reg *reg = new hsa_op_reg (spill_sym->m_type); + hsa_op_address *addr = new hsa_op_address (spill_sym); + + hsa_insn_mem *mem = new hsa_insn_mem (BRIG_OPCODE_LD, spill_sym->m_type, + reg, addr); + hsa_insert_insn_before (mem, insn); + + *ptmp2 = NULL; + if (spill_reg->m_type == BRIG_TYPE_B1) + { + hsa_insn_basic *cvtinsn; + *ptmp2 = reg; + reg = new hsa_op_reg (spill_reg->m_type); + + cvtinsn = new hsa_insn_cvt (reg, *ptmp2); + hsa_insert_insn_before (cvtinsn, insn); + } + return reg; +} + +/* Append after INSN a store to spill symbol of SPILL_REG. Return the register + from which we stored. If this required another register to convert to a B1 + type, return it in *PTMP2, otherwise store NULL into it. We assume we are + out of SSA so the returned register does not have its use updated. */ + +hsa_op_reg * +hsa_spill_out (hsa_insn_basic *insn, hsa_op_reg *spill_reg, hsa_op_reg **ptmp2) +{ + hsa_symbol *spill_sym = spill_reg->m_spill_sym; + hsa_op_reg *reg = new hsa_op_reg (spill_sym->m_type); + hsa_op_address *addr = new hsa_op_address (spill_sym); + hsa_op_reg *returnreg; + + *ptmp2 = NULL; + returnreg = reg; + if (spill_reg->m_type == BRIG_TYPE_B1) + { + hsa_insn_basic *cvtinsn; + *ptmp2 = new hsa_op_reg (spill_sym->m_type); + reg->m_type = spill_reg->m_type; + + cvtinsn = new hsa_insn_cvt (*ptmp2, returnreg); + hsa_append_insn_after (cvtinsn, insn); + insn = cvtinsn; + reg = *ptmp2; + } + + hsa_insn_mem *mem = new hsa_insn_mem (BRIG_OPCODE_ST, spill_sym->m_type, reg, + addr); + hsa_append_insn_after (mem, insn); + return returnreg; +} + +/* Generate a comparison instruction that will compare LHS and RHS with + comparison specified by CODE and put result into register DEST. DEST has to + have its type set already but must not have its definition set yet. + Generated instructions will be added to HBB. */ + +static void +gen_hsa_cmp_insn_from_gimple (enum tree_code code, tree lhs, tree rhs, + hsa_op_reg *dest, hsa_bb *hbb) +{ + BrigCompareOperation8_t compare; + + switch (code) + { + case LT_EXPR: + compare = BRIG_COMPARE_LT; + break; + case LE_EXPR: + compare = BRIG_COMPARE_LE; + break; + case GT_EXPR: + compare = BRIG_COMPARE_GT; + break; + case GE_EXPR: + compare = BRIG_COMPARE_GE; + break; + case EQ_EXPR: + compare = BRIG_COMPARE_EQ; + break; + case NE_EXPR: + compare = BRIG_COMPARE_NE; + break; + case UNORDERED_EXPR: + compare = BRIG_COMPARE_NAN; + break; + case ORDERED_EXPR: + compare = BRIG_COMPARE_NUM; + break; + case UNLT_EXPR: + compare = BRIG_COMPARE_LTU; + break; + case UNLE_EXPR: + compare = BRIG_COMPARE_LEU; + break; + case UNGT_EXPR: + compare = BRIG_COMPARE_GTU; + break; + case UNGE_EXPR: + compare = BRIG_COMPARE_GEU; + break; + case UNEQ_EXPR: + compare = BRIG_COMPARE_EQU; + break; + case LTGT_EXPR: + compare = BRIG_COMPARE_NEU; + break; + + default: + HSA_SORRY_ATV (EXPR_LOCATION (lhs), + "support for HSA does not implement comparison tree " + "code %s\n", get_tree_code_name (code)); + return; + } + + /* CMP instruction returns e.g. 0xffffffff (for a 32-bit with integer) + as a result of comparison. */ + + BrigType16_t dest_type = hsa_type_integer_p (dest->m_type) + ? (BrigType16_t) BRIG_TYPE_B1 : dest->m_type; + + hsa_insn_cmp *cmp = new hsa_insn_cmp (compare, dest_type); + cmp->set_op (1, hsa_reg_or_immed_for_gimple_op (lhs, hbb)); + cmp->set_op (2, hsa_reg_or_immed_for_gimple_op (rhs, hbb)); + + hbb->append_insn (cmp); + cmp->set_output_in_type (dest, 0, hbb); +} + +/* Generate an unary instruction with OPCODE and append it to a basic block + HBB. The instruction uses DEST as a destination and OP1 + as a single operand. */ + +static void +gen_hsa_unary_operation (BrigOpcode opcode, hsa_op_reg *dest, + hsa_op_with_type *op1, hsa_bb *hbb) +{ + gcc_checking_assert (dest); + hsa_insn_basic *insn; + + if (opcode == BRIG_OPCODE_MOV && hsa_needs_cvt (dest->m_type, op1->m_type)) + insn = new hsa_insn_cvt (dest, op1); + else if (opcode == BRIG_OPCODE_FIRSTBIT || opcode == BRIG_OPCODE_LASTBIT) + insn = new hsa_insn_srctype (2, opcode, BRIG_TYPE_U32, op1->m_type, NULL, + op1); + else + { + insn = new hsa_insn_basic (2, opcode, dest->m_type, dest, op1); + + if (opcode == BRIG_OPCODE_ABS || opcode == BRIG_OPCODE_NEG) + { + /* ABS and NEG only exist in _s form :-/ */ + if (insn->m_type == BRIG_TYPE_U32) + insn->m_type = BRIG_TYPE_S32; + else if (insn->m_type == BRIG_TYPE_U64) + insn->m_type = BRIG_TYPE_S64; + } + } + + hbb->append_insn (insn); + + if (opcode == BRIG_OPCODE_FIRSTBIT || opcode == BRIG_OPCODE_LASTBIT) + insn->set_output_in_type (dest, 0, hbb); +} + +/* Generate a binary instruction with OPCODE and append it to a basic block + HBB. The instruction uses DEST as a destination and operands OP1 + and OP2. */ + +static void +gen_hsa_binary_operation (int opcode, hsa_op_reg *dest, + hsa_op_base *op1, hsa_op_base *op2, hsa_bb *hbb) +{ + gcc_checking_assert (dest); + + if ((opcode == BRIG_OPCODE_SHL || opcode == BRIG_OPCODE_SHR) + && is_a <hsa_op_immed *> (op2)) + { + hsa_op_immed *i = dyn_cast <hsa_op_immed *> (op2); + i->set_type (BRIG_TYPE_U32); + } + if ((opcode == BRIG_OPCODE_OR + || opcode == BRIG_OPCODE_XOR + || opcode == BRIG_OPCODE_AND) + && is_a <hsa_op_immed *> (op2)) + { + hsa_op_immed *i = dyn_cast <hsa_op_immed *> (op2); + i->set_type (hsa_uint_for_bitsize (hsa_type_bit_size (i->m_type))); + } + + hsa_insn_basic *insn = new hsa_insn_basic (3, opcode, dest->m_type, dest, + op1, op2); + hbb->append_insn (insn); +} + +/* Generate HSA instructions for a single assignment. HBB is the basic block + they will be appended to. */ + +static void +gen_hsa_insns_for_operation_assignment (gimple *assign, hsa_bb *hbb) +{ + tree_code code = gimple_assign_rhs_code (assign); + gimple_rhs_class rhs_class = get_gimple_rhs_class (gimple_expr_code (assign)); + + tree lhs = gimple_assign_lhs (assign); + tree rhs1 = gimple_assign_rhs1 (assign); + tree rhs2 = gimple_assign_rhs2 (assign); + tree rhs3 = gimple_assign_rhs3 (assign); + + BrigOpcode opcode; + + switch (code) + { + CASE_CONVERT: + case FLOAT_EXPR: + /* The opcode is changed to BRIG_OPCODE_CVT if BRIG types + needs a conversion. */ + opcode = BRIG_OPCODE_MOV; + break; + + case PLUS_EXPR: + case POINTER_PLUS_EXPR: + opcode = BRIG_OPCODE_ADD; + break; + case MINUS_EXPR: + opcode = BRIG_OPCODE_SUB; + break; + case MULT_EXPR: + opcode = BRIG_OPCODE_MUL; + break; + case MULT_HIGHPART_EXPR: + opcode = BRIG_OPCODE_MULHI; + break; + case RDIV_EXPR: + case TRUNC_DIV_EXPR: + case EXACT_DIV_EXPR: + opcode = BRIG_OPCODE_DIV; + break; + case CEIL_DIV_EXPR: + case FLOOR_DIV_EXPR: + case ROUND_DIV_EXPR: + HSA_SORRY_AT (gimple_location (assign), + "support for HSA does not implement CEIL_DIV_EXPR, " + "FLOOR_DIV_EXPR or ROUND_DIV_EXPR"); + return; + case TRUNC_MOD_EXPR: + opcode = BRIG_OPCODE_REM; + break; + case CEIL_MOD_EXPR: + case FLOOR_MOD_EXPR: + case ROUND_MOD_EXPR: + HSA_SORRY_AT (gimple_location (assign), + "support for HSA does not implement CEIL_MOD_EXPR, " + "FLOOR_MOD_EXPR or ROUND_MOD_EXPR"); + return; + case NEGATE_EXPR: + opcode = BRIG_OPCODE_NEG; + break; + case MIN_EXPR: + opcode = BRIG_OPCODE_MIN; + break; + case MAX_EXPR: + opcode = BRIG_OPCODE_MAX; + break; + case ABS_EXPR: + opcode = BRIG_OPCODE_ABS; + break; + case LSHIFT_EXPR: + opcode = BRIG_OPCODE_SHL; + break; + case RSHIFT_EXPR: + opcode = BRIG_OPCODE_SHR; + break; + case LROTATE_EXPR: + case RROTATE_EXPR: + { + hsa_insn_basic *insn = NULL; + int code1 = code == LROTATE_EXPR ? BRIG_OPCODE_SHL : BRIG_OPCODE_SHR; + int code2 = code != LROTATE_EXPR ? BRIG_OPCODE_SHL : BRIG_OPCODE_SHR; + BrigType16_t btype = hsa_type_for_scalar_tree_type (TREE_TYPE (lhs), + true); + + hsa_op_with_type *src = hsa_reg_or_immed_for_gimple_op (rhs1, hbb); + hsa_op_reg *op1 = new hsa_op_reg (btype); + hsa_op_reg *op2 = new hsa_op_reg (btype); + hsa_op_with_type *shift1 = hsa_reg_or_immed_for_gimple_op (rhs2, hbb); + + tree type = TREE_TYPE (rhs2); + unsigned HOST_WIDE_INT bitsize = TREE_INT_CST_LOW (TYPE_SIZE (type)); + + hsa_op_with_type *shift2 = NULL; + if (TREE_CODE (rhs2) == INTEGER_CST) + shift2 = new hsa_op_immed (bitsize - tree_to_uhwi (rhs2), + BRIG_TYPE_U32); + else if (TREE_CODE (rhs2) == SSA_NAME) + { + hsa_op_reg *s = hsa_cfun->reg_for_gimple_ssa (rhs2); + hsa_op_reg *d = new hsa_op_reg (s->m_type); + hsa_op_immed *size_imm = new hsa_op_immed (bitsize, BRIG_TYPE_U32); + + insn = new hsa_insn_basic (3, BRIG_OPCODE_SUB, d->m_type, + d, s, size_imm); + hbb->append_insn (insn); + + shift2 = d; + } + else + gcc_unreachable (); + + hsa_op_reg *dest = hsa_cfun->reg_for_gimple_ssa (lhs); + gen_hsa_binary_operation (code1, op1, src, shift1, hbb); + gen_hsa_binary_operation (code2, op2, src, shift2, hbb); + gen_hsa_binary_operation (BRIG_OPCODE_OR, dest, op1, op2, hbb); + + return; + } + case BIT_IOR_EXPR: + opcode = BRIG_OPCODE_OR; + break; + case BIT_XOR_EXPR: + opcode = BRIG_OPCODE_XOR; + break; + case BIT_AND_EXPR: + opcode = BRIG_OPCODE_AND; + break; + case BIT_NOT_EXPR: + opcode = BRIG_OPCODE_NOT; + break; + case FIX_TRUNC_EXPR: + { + hsa_op_reg *dest = hsa_cfun->reg_for_gimple_ssa (lhs); + hsa_op_with_type *v = hsa_reg_or_immed_for_gimple_op (rhs1, hbb); + + if (hsa_needs_cvt (dest->m_type, v->m_type)) + { + hsa_op_reg *tmp = new hsa_op_reg (v->m_type); + + hsa_insn_basic *insn = new hsa_insn_basic (2, BRIG_OPCODE_TRUNC, + tmp->m_type, tmp, v); + hbb->append_insn (insn); + + hsa_insn_basic *cvtinsn = new hsa_insn_cvt (dest, tmp); + hbb->append_insn (cvtinsn); + } + else + { + hsa_insn_basic *insn = new hsa_insn_basic (2, BRIG_OPCODE_TRUNC, + dest->m_type, dest, v); + hbb->append_insn (insn); + } + + return; + } + opcode = BRIG_OPCODE_TRUNC; + break; + + case LT_EXPR: + case LE_EXPR: + case GT_EXPR: + case GE_EXPR: + case EQ_EXPR: + case NE_EXPR: + case UNORDERED_EXPR: + case ORDERED_EXPR: + case UNLT_EXPR: + case UNLE_EXPR: + case UNGT_EXPR: + case UNGE_EXPR: + case UNEQ_EXPR: + case LTGT_EXPR: + { + hsa_op_reg *dest + = hsa_cfun->reg_for_gimple_ssa (gimple_assign_lhs (assign)); + + gen_hsa_cmp_insn_from_gimple (code, rhs1, rhs2, dest, hbb); + return; + } + case COND_EXPR: + { + hsa_op_reg *dest + = hsa_cfun->reg_for_gimple_ssa (gimple_assign_lhs (assign)); + hsa_op_with_type *ctrl = NULL; + tree cond = rhs1; + + if (CONSTANT_CLASS_P (cond) || TREE_CODE (cond) == SSA_NAME) + ctrl = hsa_reg_or_immed_for_gimple_op (cond, hbb); + else + { + hsa_op_reg *r = new hsa_op_reg (BRIG_TYPE_B1); + + gen_hsa_cmp_insn_from_gimple (TREE_CODE (cond), + TREE_OPERAND (cond, 0), + TREE_OPERAND (cond, 1), + r, hbb); + + ctrl = r; + } + + hsa_op_with_type *rhs2_reg = hsa_reg_or_immed_for_gimple_op (rhs2, hbb); + hsa_op_with_type *rhs3_reg = hsa_reg_or_immed_for_gimple_op (rhs3, hbb); + + BrigType16_t btype = hsa_bittype_for_type (dest->m_type); + hsa_op_reg *tmp = new hsa_op_reg (btype); + + rhs2_reg->m_type = btype; + rhs3_reg->m_type = btype; + + hsa_insn_basic *insn + = new hsa_insn_basic (4, BRIG_OPCODE_CMOV, tmp->m_type, tmp, ctrl, + rhs2_reg, rhs3_reg); + + hbb->append_insn (insn); + + /* As operands of a CMOV insn must be Bx types, we have to emit + a conversion insn. */ + hsa_insn_basic *mov = new hsa_insn_basic (2, BRIG_OPCODE_MOV, + dest->m_type, dest, tmp); + hbb->append_insn (mov); + + return; + } + case COMPLEX_EXPR: + { + hsa_op_reg *dest + = hsa_cfun->reg_for_gimple_ssa (gimple_assign_lhs (assign)); + hsa_op_with_type *rhs1_reg = hsa_reg_or_immed_for_gimple_op (rhs1, hbb); + hsa_op_with_type *rhs2_reg = hsa_reg_or_immed_for_gimple_op (rhs2, hbb); + + if (hsa_seen_error ()) + return; + + BrigType16_t src_type = hsa_bittype_for_type (rhs1_reg->m_type); + rhs1_reg = rhs1_reg->get_in_type (src_type, hbb); + rhs2_reg = rhs2_reg->get_in_type (src_type, hbb); + + hsa_insn_packed *insn + = new hsa_insn_packed (3, BRIG_OPCODE_COMBINE, dest->m_type, src_type, + dest, rhs1_reg, rhs2_reg); + hbb->append_insn (insn); + + return; + } + default: + /* Implement others as we come across them. */ + HSA_SORRY_ATV (gimple_location (assign), + "support for HSA does not implement operation %s", + get_tree_code_name (code)); + return; + } + + + hsa_op_reg *dest = hsa_cfun->reg_for_gimple_ssa (gimple_assign_lhs (assign)); + + hsa_op_with_type *op1 = hsa_reg_or_immed_for_gimple_op (rhs1, hbb); + hsa_op_with_type *op2 = rhs2 != NULL_TREE ? + hsa_reg_or_immed_for_gimple_op (rhs2, hbb) : NULL; + + if (hsa_seen_error ()) + return; + + switch (rhs_class) + { + case GIMPLE_TERNARY_RHS: + gcc_unreachable (); + return; + + /* Fall through */ + case GIMPLE_BINARY_RHS: + gen_hsa_binary_operation (opcode, dest, op1, op2, hbb); + break; + /* Fall through */ + case GIMPLE_UNARY_RHS: + gen_hsa_unary_operation (opcode, dest, op1, hbb); + break; + default: + gcc_unreachable (); + } +} + +/* Generate HSA instructions for a given gimple condition statement COND. + Instructions will be appended to HBB, which also needs to be the + corresponding structure to the basic_block of COND. */ + +static void +gen_hsa_insns_for_cond_stmt (gimple *cond, hsa_bb *hbb) +{ + hsa_op_reg *ctrl = new hsa_op_reg (BRIG_TYPE_B1); + hsa_insn_br *cbr; + + gen_hsa_cmp_insn_from_gimple (gimple_cond_code (cond), + gimple_cond_lhs (cond), + gimple_cond_rhs (cond), + ctrl, hbb); + + cbr = new hsa_insn_br (ctrl); + hbb->append_insn (cbr); +} + +/* Maximum number of elements in a jump table for an HSA SBR instruction. */ + +#define HSA_MAXIMUM_SBR_LABELS 16 + +/* Return lowest value of a switch S that is handled in a non-default + label. */ + +static tree +get_switch_low (gswitch *s) +{ + unsigned labels = gimple_switch_num_labels (s); + gcc_checking_assert (labels >= 1); + + return CASE_LOW (gimple_switch_label (s, 1)); +} + +/* Return highest value of a switch S that is handled in a non-default + label. */ + +static tree +get_switch_high (gswitch *s) +{ + unsigned labels = gimple_switch_num_labels (s); + + /* Compare last label to maximum number of labels. */ + tree label = gimple_switch_label (s, labels - 1); + tree low = CASE_LOW (label); + tree high = CASE_HIGH (label); + + return high != NULL_TREE ? high : low; +} + +static tree +get_switch_size (gswitch *s) +{ + return int_const_binop (MINUS_EXPR, get_switch_high (s), get_switch_low (s)); +} + +/* Generate HSA instructions for a given gimple switch. + Instructions will be appended to HBB. */ + +static void +gen_hsa_insns_for_switch_stmt (gswitch *s, hsa_bb *hbb) +{ + function *func = DECL_STRUCT_FUNCTION (current_function_decl); + tree index_tree = gimple_switch_index (s); + tree lowest = get_switch_low (s); + + hsa_op_reg *index = hsa_cfun->reg_for_gimple_ssa (index_tree); + hsa_op_reg *sub_index = new hsa_op_reg (index->m_type); + hbb->append_insn (new hsa_insn_basic (3, BRIG_OPCODE_SUB, sub_index->m_type, + sub_index, index, + new hsa_op_immed (lowest))); + + hsa_op_base *tmp = sub_index->get_in_type (BRIG_TYPE_U64, hbb); + sub_index = as_a <hsa_op_reg *> (tmp); + unsigned labels = gimple_switch_num_labels (s); + unsigned HOST_WIDE_INT size = tree_to_uhwi (get_switch_size (s)); + + hsa_insn_sbr *sbr = new hsa_insn_sbr (sub_index, size + 1); + tree default_label = gimple_switch_default_label (s); + basic_block default_label_bb = label_to_block_fn (func, + CASE_LABEL (default_label)); + + sbr->m_default_bb = default_label_bb; + + /* Prepare array with default label destination. */ + for (unsigned HOST_WIDE_INT i = 0; i <= size; i++) + sbr->m_jump_table.safe_push (default_label_bb); + + /* Iterate all labels and fill up the jump table. */ + for (unsigned i = 1; i < labels; i++) + { + tree label = gimple_switch_label (s, i); + basic_block bb = label_to_block_fn (func, CASE_LABEL (label)); + + unsigned HOST_WIDE_INT sub_low + = tree_to_uhwi (int_const_binop (MINUS_EXPR, CASE_LOW (label), lowest)); + + unsigned HOST_WIDE_INT sub_high = sub_low; + tree high = CASE_HIGH (label); + if (high != NULL) + sub_high = tree_to_uhwi (int_const_binop (MINUS_EXPR, high, lowest)); + + for (unsigned HOST_WIDE_INT j = sub_low; j <= sub_high; j++) + sbr->m_jump_table[j] = bb; + } + + hbb->append_insn (sbr); +} + +/* Verify that the function DECL can be handled by HSA. */ + +static void +verify_function_arguments (tree decl) +{ + if (DECL_STATIC_CHAIN (decl)) + { + HSA_SORRY_ATV (EXPR_LOCATION (decl), + "HSA does not support nested functions: %D", decl); + return; + } + else if (!TYPE_ARG_TYPES (TREE_TYPE (decl))) + { + HSA_SORRY_ATV (EXPR_LOCATION (decl), + "HSA does not support functions with variadic arguments " + "(or unknown return type): %D", decl); + return; + } +} + +/* Return BRIG type for FORMAL_ARG_TYPE. If the formal argument type is NULL, + return ACTUAL_ARG_TYPE. */ + +static BrigType16_t +get_format_argument_type (tree formal_arg_type, BrigType16_t actual_arg_type) +{ + if (formal_arg_type == NULL) + return actual_arg_type; + + BrigType16_t decl_type + = hsa_type_for_scalar_tree_type (formal_arg_type, false); + return mem_type_for_type (decl_type); +} + +/* Generate HSA instructions for a direct call instruction. + Instructions will be appended to HBB, which also needs to be the + corresponding structure to the basic_block of STMT. */ + +static void +gen_hsa_insns_for_direct_call (gimple *stmt, hsa_bb *hbb) +{ + tree decl = gimple_call_fndecl (stmt); + verify_function_arguments (decl); + if (hsa_seen_error ()) + return; + + hsa_insn_call *call_insn = new hsa_insn_call (decl); + hsa_cfun->m_called_functions.safe_push (call_insn->m_called_function); + + /* Argument block start. */ + hsa_insn_arg_block *arg_start + = new hsa_insn_arg_block (BRIG_KIND_DIRECTIVE_ARG_BLOCK_START, call_insn); + hbb->append_insn (arg_start); + + tree parm_type_chain = TYPE_ARG_TYPES (gimple_call_fntype (stmt)); + + /* Preparation of arguments that will be passed to function. */ + const unsigned args = gimple_call_num_args (stmt); + for (unsigned i = 0; i < args; ++i) + { + tree parm = gimple_call_arg (stmt, (int)i); + tree parm_decl_type = parm_type_chain != NULL_TREE + ? TREE_VALUE (parm_type_chain) : NULL_TREE; + hsa_op_address *addr; + + if (AGGREGATE_TYPE_P (TREE_TYPE (parm))) + { + addr = gen_hsa_addr_for_arg (TREE_TYPE (parm), i); + hsa_op_address *src = gen_hsa_addr (parm, hbb); + gen_hsa_memory_copy (hbb, addr, src, + addr->m_symbol->total_byte_size ()); + } + else + { + hsa_op_with_type *src = hsa_reg_or_immed_for_gimple_op (parm, hbb); + + if (parm_decl_type != NULL && AGGREGATE_TYPE_P (parm_decl_type)) + { + HSA_SORRY_AT (gimple_location (stmt), + "support for HSA does not implement an aggregate " + "formal argument in a function call, while actual " + "argument is not an aggregate"); + return; + } + + BrigType16_t formal_arg_type + = get_format_argument_type (parm_decl_type, src->m_type); + if (hsa_seen_error ()) + return; + + if (src->m_type != formal_arg_type) + src = src->get_in_type (formal_arg_type, hbb); + + addr + = gen_hsa_addr_for_arg (parm_decl_type != NULL_TREE ? + parm_decl_type: TREE_TYPE (parm), i); + hsa_insn_mem *mem = new hsa_insn_mem (BRIG_OPCODE_ST, formal_arg_type, + src, addr); + + hbb->append_insn (mem); + } + + call_insn->m_input_args.safe_push (addr->m_symbol); + if (parm_type_chain) + parm_type_chain = TREE_CHAIN (parm_type_chain); + } + + call_insn->m_args_code_list = new hsa_op_code_list (args); + hbb->append_insn (call_insn); + + tree result_type = TREE_TYPE (TREE_TYPE (decl)); + + tree result = gimple_call_lhs (stmt); + hsa_insn_mem *result_insn = NULL; + if (!VOID_TYPE_P (result_type)) + { + hsa_op_address *addr = gen_hsa_addr_for_arg (result_type, -1); + + /* Even if result of a function call is unused, we have to emit + declaration for the result. */ + if (result) + { + tree lhs_type = TREE_TYPE (result); + + if (hsa_seen_error ()) + return; + + if (AGGREGATE_TYPE_P (lhs_type)) + { + hsa_op_address *result_addr = gen_hsa_addr (result, hbb); + gen_hsa_memory_copy (hbb, result_addr, addr, + addr->m_symbol->total_byte_size ()); + } + else + { + BrigType16_t mtype + = mem_type_for_type (hsa_type_for_scalar_tree_type (lhs_type, + false)); + + hsa_op_reg *dst = hsa_cfun->reg_for_gimple_ssa (result); + result_insn = new hsa_insn_mem (BRIG_OPCODE_LD, mtype, dst, addr); + hbb->append_insn (result_insn); + } + } + + call_insn->m_output_arg = addr->m_symbol; + call_insn->m_result_code_list = new hsa_op_code_list (1); + } + else + { + if (result) + { + HSA_SORRY_AT (gimple_location (stmt), + "support for HSA does not implement an assignment of " + "return value from a void function"); + return; + } + + call_insn->m_result_code_list = new hsa_op_code_list (0); + } + + /* Argument block end. */ + hsa_insn_arg_block *arg_end + = new hsa_insn_arg_block (BRIG_KIND_DIRECTIVE_ARG_BLOCK_END, call_insn); + hbb->append_insn (arg_end); +} + +/* Generate HSA instructions for a direct call of an internal fn. + Instructions will be appended to HBB, which also needs to be the + corresponding structure to the basic_block of STMT. */ + +static void +gen_hsa_insns_for_call_of_internal_fn (gimple *stmt, hsa_bb *hbb) +{ + tree lhs = gimple_call_lhs (stmt); + if (!lhs) + return; + + tree lhs_type = TREE_TYPE (lhs); + tree rhs1 = gimple_call_arg (stmt, 0); + tree rhs1_type = TREE_TYPE (rhs1); + enum internal_fn fn = gimple_call_internal_fn (stmt); + hsa_internal_fn *ifn + = new hsa_internal_fn (fn, tree_to_uhwi (TYPE_SIZE (rhs1_type))); + hsa_insn_call *call_insn = new hsa_insn_call (ifn); + + gcc_checking_assert (FLOAT_TYPE_P (rhs1_type)); + + if (!hsa_emitted_internal_decls->find (call_insn->m_called_internal_fn)) + hsa_cfun->m_called_internal_fns.safe_push (call_insn->m_called_internal_fn); + + hsa_insn_arg_block *arg_start + = new hsa_insn_arg_block (BRIG_KIND_DIRECTIVE_ARG_BLOCK_START, call_insn); + hbb->append_insn (arg_start); + + unsigned num_args = gimple_call_num_args (stmt); + + /* Function arguments. */ + for (unsigned i = 0; i < num_args; i++) + { + tree parm = gimple_call_arg (stmt, (int)i); + hsa_op_with_type *src = hsa_reg_or_immed_for_gimple_op (parm, hbb); + + hsa_op_address *addr = gen_hsa_addr_for_arg (TREE_TYPE (parm), i); + hsa_insn_mem *mem = new hsa_insn_mem (BRIG_OPCODE_ST, src->m_type, + src, addr); + + call_insn->m_input_args.safe_push (addr->m_symbol); + hbb->append_insn (mem); + } + + call_insn->m_args_code_list = new hsa_op_code_list (num_args); + hbb->append_insn (call_insn); + + /* Assign returned value. */ + hsa_op_address *addr = gen_hsa_addr_for_arg (lhs_type, -1); + + call_insn->m_output_arg = addr->m_symbol; + call_insn->m_result_code_list = new hsa_op_code_list (1); + + /* Argument block end. */ + hsa_insn_arg_block *arg_end + = new hsa_insn_arg_block (BRIG_KIND_DIRECTIVE_ARG_BLOCK_END, call_insn); + hbb->append_insn (arg_end); +} + +/* Generate HSA instructions for a return value instruction. + Instructions will be appended to HBB, which also needs to be the + corresponding structure to the basic_block of STMT. */ + +static void +gen_hsa_insns_for_return (greturn *stmt, hsa_bb *hbb) +{ + tree retval = gimple_return_retval (stmt); + if (retval) + { + hsa_op_address *addr = new hsa_op_address (hsa_cfun->m_output_arg); + + if (AGGREGATE_TYPE_P (TREE_TYPE (retval))) + { + hsa_op_address *retval_addr = gen_hsa_addr (retval, hbb); + gen_hsa_memory_copy (hbb, addr, retval_addr, + hsa_cfun->m_output_arg->total_byte_size ()); + } + else + { + BrigType16_t t = hsa_type_for_scalar_tree_type (TREE_TYPE (retval), + false); + BrigType16_t mtype = mem_type_for_type (t); + + /* Store of return value. */ + hsa_op_with_type *src = hsa_reg_or_immed_for_gimple_op (retval, hbb); + src = src->get_in_type (mtype, hbb); + hsa_insn_mem *mem = new hsa_insn_mem (BRIG_OPCODE_ST, mtype, src, + addr); + hbb->append_insn (mem); + } + } + + /* HSAIL return instruction emission. */ + hsa_insn_basic *ret = new hsa_insn_basic (0, BRIG_OPCODE_RET); + hbb->append_insn (ret); +} + +/* Set OP_INDEX-th operand of the instruction to DEST, as the DEST + can have a different type, conversion instructions are possibly + appended to HBB. */ + +void +hsa_insn_basic::set_output_in_type (hsa_op_reg *dest, unsigned op_index, + hsa_bb *hbb) +{ + hsa_insn_basic *insn; + gcc_checking_assert (op_output_p (op_index)); + + if (dest->m_type == m_type) + { + set_op (op_index, dest); + return; + } + + hsa_op_reg *tmp = new hsa_op_reg (m_type); + set_op (op_index, tmp); + + if (hsa_needs_cvt (dest->m_type, m_type)) + insn = new hsa_insn_cvt (dest, tmp); + else + insn = new hsa_insn_basic (2, BRIG_OPCODE_MOV, dest->m_type, + dest, tmp->get_in_type (dest->m_type, hbb)); + + hbb->append_insn (insn); +} + +/* Generate instruction OPCODE to query a property of HSA grid along the + given DIMENSION. Store result into DEST and append the instruction to + HBB. */ + +static void +query_hsa_grid (hsa_op_reg *dest, BrigType16_t opcode, int dimension, + hsa_bb *hbb) +{ + /* We're using just one-dimensional kernels, so hard-coded + dimension X. */ + hsa_op_immed *imm + = new hsa_op_immed (dimension, (BrigKind16_t) BRIG_TYPE_U32); + hsa_insn_basic *insn = new hsa_insn_basic (2, opcode, BRIG_TYPE_U32, NULL, + imm); + hbb->append_insn (insn); + insn->set_output_in_type (dest, 0, hbb); +} + +/* Generate a special HSA-related instruction for gimple STMT. + Instructions are appended to basic block HBB. */ + +static void +query_hsa_grid (gimple *stmt, BrigOpcode16_t opcode, int dimension, + hsa_bb *hbb) +{ + tree lhs = gimple_call_lhs (dyn_cast <gcall *> (stmt)); + if (lhs == NULL_TREE) + return; + + hsa_op_reg *dest = hsa_cfun->reg_for_gimple_ssa (lhs); + + query_hsa_grid (dest, opcode, dimension, hbb); +} + +/* Emit instructions that set hsa_num_threads according to provided VALUE. + Instructions are appended to basic block HBB. */ + +static void +gen_set_num_threads (tree value, hsa_bb *hbb) +{ + hbb->append_insn (new hsa_insn_comment ("omp_set_num_threads")); + hsa_op_with_type *src = hsa_reg_or_immed_for_gimple_op (value, hbb); + + src = src->get_in_type (hsa_num_threads->m_type, hbb); + hsa_op_address *addr = new hsa_op_address (hsa_num_threads); + + hsa_insn_basic *basic + = new hsa_insn_mem (BRIG_OPCODE_ST, hsa_num_threads->m_type, src, addr); + hbb->append_insn (basic); +} + +static GTY (()) tree hsa_kernel_dispatch_type = NULL; + +/* Return byte offset of a FIELD_NAME in GOMP_hsa_kernel_dispatch which + is defined in plugin-hsa.c. */ + +static HOST_WIDE_INT +get_hsa_kernel_dispatch_offset (const char *field_name) +{ + if (hsa_kernel_dispatch_type == NULL) + { + /* Collection of information needed for a dispatch of a kernel from a + kernel. Keep in sync with libgomp's plugin-hsa.c. */ + + hsa_kernel_dispatch_type = make_node (RECORD_TYPE); + tree id_f1 = build_decl (BUILTINS_LOCATION, FIELD_DECL, + get_identifier ("queue"), ptr_type_node); + DECL_CHAIN (id_f1) = NULL_TREE; + tree id_f2 = build_decl (BUILTINS_LOCATION, FIELD_DECL, + get_identifier ("omp_data_memory"), + ptr_type_node); + DECL_CHAIN (id_f2) = id_f1; + tree id_f3 = build_decl (BUILTINS_LOCATION, FIELD_DECL, + get_identifier ("kernarg_address"), + ptr_type_node); + DECL_CHAIN (id_f3) = id_f2; + tree id_f4 = build_decl (BUILTINS_LOCATION, FIELD_DECL, + get_identifier ("object"), + uint64_type_node); + DECL_CHAIN (id_f4) = id_f3; + tree id_f5 = build_decl (BUILTINS_LOCATION, FIELD_DECL, + get_identifier ("signal"), + uint64_type_node); + DECL_CHAIN (id_f5) = id_f4; + tree id_f6 = build_decl (BUILTINS_LOCATION, FIELD_DECL, + get_identifier ("private_segment_size"), + uint32_type_node); + DECL_CHAIN (id_f6) = id_f5; + tree id_f7 = build_decl (BUILTINS_LOCATION, FIELD_DECL, + get_identifier ("group_segment_size"), + uint32_type_node); + DECL_CHAIN (id_f7) = id_f6; + tree id_f8 = build_decl (BUILTINS_LOCATION, FIELD_DECL, + get_identifier ("kernel_dispatch_count"), + uint64_type_node); + DECL_CHAIN (id_f8) = id_f7; + tree id_f9 = build_decl (BUILTINS_LOCATION, FIELD_DECL, + get_identifier ("debug"), + uint64_type_node); + DECL_CHAIN (id_f9) = id_f8; + tree id_f10 = build_decl (BUILTINS_LOCATION, FIELD_DECL, + get_identifier ("omp_level"), + uint64_type_node); + DECL_CHAIN (id_f10) = id_f9; + tree id_f11 = build_decl (BUILTINS_LOCATION, FIELD_DECL, + get_identifier ("children_dispatches"), + ptr_type_node); + DECL_CHAIN (id_f11) = id_f10; + tree id_f12 = build_decl (BUILTINS_LOCATION, FIELD_DECL, + get_identifier ("omp_num_threads"), + uint32_type_node); + DECL_CHAIN (id_f12) = id_f11; + + + finish_builtin_struct (hsa_kernel_dispatch_type, "__hsa_kernel_dispatch", + id_f12, NULL_TREE); + TYPE_ARTIFICIAL (hsa_kernel_dispatch_type) = 1; + } + + for (tree chain = TYPE_FIELDS (hsa_kernel_dispatch_type); + chain != NULL_TREE; chain = TREE_CHAIN (chain)) + if (strcmp (field_name, IDENTIFIER_POINTER (DECL_NAME (chain))) == 0) + return int_byte_position (chain); + + gcc_unreachable (); +} + +/* Return an HSA register that will contain number of threads for + a future dispatched kernel. Instructions are added to HBB. */ + +static hsa_op_reg * +gen_num_threads_for_dispatch (hsa_bb *hbb) +{ + /* Step 1) Assign to number of threads: + MIN (HSA_DEFAULT_NUM_THREADS, hsa_num_threads). */ + hsa_op_reg *threads = new hsa_op_reg (hsa_num_threads->m_type); + hsa_op_address *addr = new hsa_op_address (hsa_num_threads); + + hbb->append_insn (new hsa_insn_mem (BRIG_OPCODE_LD, threads->m_type, + threads, addr)); + + hsa_op_immed *limit = new hsa_op_immed (HSA_DEFAULT_NUM_THREADS, + BRIG_TYPE_U32); + hsa_op_reg *r = new hsa_op_reg (BRIG_TYPE_B1); + hsa_insn_cmp * cmp + = new hsa_insn_cmp (BRIG_COMPARE_LT, r->m_type, r, threads, limit); + hbb->append_insn (cmp); + + BrigType16_t btype = hsa_bittype_for_type (threads->m_type); + hsa_op_reg *tmp = new hsa_op_reg (threads->m_type); + + hbb->append_insn (new hsa_insn_basic (4, BRIG_OPCODE_CMOV, btype, tmp, r, + threads, limit)); + + /* Step 2) If the number is equal to zero, + return shadow->omp_num_threads. */ + hsa_op_reg *shadow_reg_ptr = hsa_cfun->get_shadow_reg (); + + hsa_op_reg *shadow_thread_count = new hsa_op_reg (BRIG_TYPE_U32); + addr + = new hsa_op_address (shadow_reg_ptr, + get_hsa_kernel_dispatch_offset ("omp_num_threads")); + hsa_insn_basic *basic + = new hsa_insn_mem (BRIG_OPCODE_LD, shadow_thread_count->m_type, + shadow_thread_count, addr); + hbb->append_insn (basic); + + hsa_op_reg *tmp2 = new hsa_op_reg (threads->m_type); + r = new hsa_op_reg (BRIG_TYPE_B1); + hsa_op_immed *imm = new hsa_op_immed (0, shadow_thread_count->m_type); + hbb->append_insn (new hsa_insn_cmp (BRIG_COMPARE_EQ, r->m_type, r, tmp, imm)); + hbb->append_insn (new hsa_insn_basic (4, BRIG_OPCODE_CMOV, btype, tmp2, r, + shadow_thread_count, tmp)); + + hsa_op_base *dest = tmp2->get_in_type (BRIG_TYPE_U16, hbb); + + return as_a <hsa_op_reg *> (dest); +} + + +/* Emit instructions that assign number of teams to lhs of gimple STMT. + Instructions are appended to basic block HBB. */ + +static void +gen_get_num_teams (gimple *stmt, hsa_bb *hbb) +{ + if (gimple_call_lhs (stmt) == NULL_TREE) + return; + + hbb->append_insn (new hsa_insn_comment ("omp_get_num_teams")); + + tree lhs = gimple_call_lhs (stmt); + hsa_op_reg *dest = hsa_cfun->reg_for_gimple_ssa (lhs); + hsa_op_immed *one = new hsa_op_immed (1, dest->m_type); + + hsa_insn_basic *basic + = new hsa_insn_basic (2, BRIG_OPCODE_MOV, dest->m_type, dest, one); + + hbb->append_insn (basic); +} + +/* Emit instructions that assign a team number to lhs of gimple STMT. + Instructions are appended to basic block HBB. */ + +static void +gen_get_team_num (gimple *stmt, hsa_bb *hbb) +{ + if (gimple_call_lhs (stmt) == NULL_TREE) + return; + + hbb->append_insn (new hsa_insn_comment ("omp_get_team_num")); + + tree lhs = gimple_call_lhs (stmt); + hsa_op_reg *dest = hsa_cfun->reg_for_gimple_ssa (lhs); + hsa_op_immed *zero = new hsa_op_immed (0, dest->m_type); + + hsa_insn_basic *basic + = new hsa_insn_basic (2, BRIG_OPCODE_MOV, dest->m_type, dest, zero); + + hbb->append_insn (basic); +} + +/* Emit instructions that get levels-var ICV to lhs of gimple STMT. + Instructions are appended to basic block HBB. */ + +static void +gen_get_level (gimple *stmt, hsa_bb *hbb) +{ + if (gimple_call_lhs (stmt) == NULL_TREE) + return; + + hbb->append_insn (new hsa_insn_comment ("omp_get_level")); + + tree lhs = gimple_call_lhs (stmt); + hsa_op_reg *dest = hsa_cfun->reg_for_gimple_ssa (lhs); + + hsa_op_reg *shadow_reg_ptr = hsa_cfun->get_shadow_reg (); + if (shadow_reg_ptr == NULL) + { + HSA_SORRY_AT (gimple_location (stmt), + "support for HSA does not implement omp_get_level called " + "from a function not being inlined within a kernel"); + return; + } + + hsa_op_address *addr + = new hsa_op_address (shadow_reg_ptr, + get_hsa_kernel_dispatch_offset ("omp_level")); + + hsa_insn_mem *mem = new hsa_insn_mem (BRIG_OPCODE_LD, BRIG_TYPE_U64, + (hsa_op_base *) NULL, addr); + hbb->append_insn (mem); + mem->set_output_in_type (dest, 0, hbb); +} + +/* Emit instruction that implement omp_get_max_threads of gimple STMT. */ + +static void +gen_get_max_threads (gimple *stmt, hsa_bb *hbb) +{ + tree lhs = gimple_call_lhs (stmt); + if (!lhs) + return; + + hbb->append_insn (new hsa_insn_comment ("omp_get_max_threads")); + + hsa_op_reg *dest = hsa_cfun->reg_for_gimple_ssa (lhs); + hsa_op_with_type *num_theads_reg = gen_num_threads_for_dispatch (hbb) + ->get_in_type (dest->m_type, hbb); + hsa_build_append_simple_mov (dest, num_theads_reg, hbb); +} + +/* Emit instructions that implement alloca builtin gimple STMT. + Instructions are appended to basic block HBB. */ + +static void +gen_hsa_alloca (gcall *call, hsa_bb *hbb) +{ + tree lhs = gimple_call_lhs (call); + if (lhs == NULL_TREE) + return; + + built_in_function fn = DECL_FUNCTION_CODE (gimple_call_fndecl (call)); + + gcc_checking_assert (fn == BUILT_IN_ALLOCA + || fn == BUILT_IN_ALLOCA_WITH_ALIGN); + + unsigned bit_alignment = 0; + + if (fn == BUILT_IN_ALLOCA_WITH_ALIGN) + { + tree alignment_tree = gimple_call_arg (call, 1); + if (TREE_CODE (alignment_tree) != INTEGER_CST) + { + HSA_SORRY_ATV (gimple_location (call), + "support for HSA does not implement " + "__builtin_alloca_with_align with a non-constant " + "alignment: %E", alignment_tree); + } + + bit_alignment = tree_to_uhwi (alignment_tree); + } + + tree rhs1 = gimple_call_arg (call, 0); + hsa_op_with_type *size = hsa_reg_or_immed_for_gimple_op (rhs1, hbb) + ->get_in_type (BRIG_TYPE_U32, hbb); + hsa_op_with_type *dest = hsa_cfun->reg_for_gimple_ssa (lhs); + + hsa_op_reg *tmp + = new hsa_op_reg (hsa_get_segment_addr_type (BRIG_SEGMENT_PRIVATE)); + hsa_insn_alloca *a = new hsa_insn_alloca (tmp, size, bit_alignment); + hbb->append_insn (a); + + hsa_insn_seg *seg + = new hsa_insn_seg (BRIG_OPCODE_STOF, + hsa_get_segment_addr_type (BRIG_SEGMENT_FLAT), + tmp->m_type, BRIG_SEGMENT_PRIVATE, dest, tmp); + hbb->append_insn (seg); +} + +/* Emit instructions that implement clrsb builtin STMT: + Returns the number of leading redundant sign bits in x, i.e. the number + of bits following the most significant bit that are identical to it. + There are no special cases for 0 or other values. + Instructions are appended to basic block HBB. */ + +static void +gen_hsa_clrsb (gcall *call, hsa_bb *hbb) +{ + tree lhs = gimple_call_lhs (call); + if (lhs == NULL_TREE) + return; + + hsa_op_reg *dest = hsa_cfun->reg_for_gimple_ssa (lhs); + tree rhs1 = gimple_call_arg (call, 0); + hsa_op_with_type *arg = hsa_reg_or_immed_for_gimple_op (rhs1, hbb); + BrigType16_t bittype = hsa_bittype_for_type (arg->m_type); + unsigned bitsize = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (rhs1))); + gcc_checking_assert (bitsize >= 32); + + /* Set true to MOST_SIG if the most significant bit is set to one. */ + hsa_op_immed *c = new hsa_op_immed (1ul << (bitsize - 1), + hsa_uint_for_bitsize (bitsize)); + + hsa_op_reg *and_reg = new hsa_op_reg (bittype); + gen_hsa_binary_operation (BRIG_OPCODE_AND, and_reg, arg, c, hbb); + + hsa_op_reg *most_sign = new hsa_op_reg (BRIG_TYPE_B1); + hsa_insn_cmp *cmp + = new hsa_insn_cmp (BRIG_COMPARE_EQ, most_sign->m_type, most_sign, + and_reg, c); + hbb->append_insn (cmp); + + /* If the most significant bit is one, negate the input. Otherwise + shift the input value to left by one bit. */ + hsa_op_reg *arg_neg = new hsa_op_reg (arg->m_type); + gen_hsa_unary_operation (BRIG_OPCODE_NEG, arg_neg, arg, hbb); + + hsa_op_reg *shifted_arg = new hsa_op_reg (arg->m_type); + gen_hsa_binary_operation (BRIG_OPCODE_SHL, shifted_arg, arg, + new hsa_op_immed (1, BRIG_TYPE_U64), hbb); + + /* Assign the value that can be used for FIRSTBIT instruction according + to the most significant bit. */ + hsa_op_reg *tmp = new hsa_op_reg (bittype); + hsa_insn_basic *cmov + = new hsa_insn_basic (4, BRIG_OPCODE_CMOV, bittype, tmp, most_sign, + arg_neg, shifted_arg); + hbb->append_insn (cmov); + + hsa_op_reg *leading_bits = new hsa_op_reg (BRIG_TYPE_S32); + gen_hsa_unary_operation (BRIG_OPCODE_FIRSTBIT, leading_bits, + tmp->get_in_type (hsa_uint_for_bitsize (bitsize), + hbb), hbb); + + /* Set flag if the input value is equal to zero. */ + hsa_op_reg *is_zero = new hsa_op_reg (BRIG_TYPE_B1); + cmp = new hsa_insn_cmp (BRIG_COMPARE_EQ, is_zero->m_type, is_zero, arg, + new hsa_op_immed (0, arg->m_type)); + hbb->append_insn (cmp); + + /* Return the number of leading bits, or 31 if the input value is zero. */ + cmov = new hsa_insn_basic (4, BRIG_OPCODE_CMOV, BRIG_TYPE_B32, NULL, is_zero, + new hsa_op_immed (31, BRIG_TYPE_U32), + leading_bits->get_in_type (BRIG_TYPE_B32, hbb)); + hbb->append_insn (cmov); + cmov->set_output_in_type (dest, 0, hbb); +} + +/* Emit instructions that implement ffs builtin STMT: + Returns one plus the index of the least significant 1-bit of x, + or if x is zero, returns zero. + Instructions are appended to basic block HBB. */ + +static void +gen_hsa_ffs (gcall *call, hsa_bb *hbb) +{ + tree lhs = gimple_call_lhs (call); + if (lhs == NULL_TREE) + return; + + hsa_op_reg *dest = hsa_cfun->reg_for_gimple_ssa (lhs); + + tree rhs1 = gimple_call_arg (call, 0); + hsa_op_with_type *arg = hsa_reg_or_immed_for_gimple_op (rhs1, hbb); + + hsa_op_reg *tmp = new hsa_op_reg (BRIG_TYPE_U32); + hsa_insn_srctype *insn = new hsa_insn_srctype (2, BRIG_OPCODE_LASTBIT, + tmp->m_type, arg->m_type, + tmp, arg); + hbb->append_insn (insn); + + hsa_insn_basic *addition + = new hsa_insn_basic (3, BRIG_OPCODE_ADD, tmp->m_type, NULL, tmp, + new hsa_op_immed (1, tmp->m_type)); + hbb->append_insn (addition); + addition->set_output_in_type (dest, 0, hbb); +} + +static void +gen_hsa_popcount_to_dest (hsa_op_reg *dest, hsa_op_with_type *arg, hsa_bb *hbb) +{ + gcc_checking_assert (hsa_type_integer_p (arg->m_type)); + + if (hsa_type_bit_size (arg->m_type) < 32) + arg = arg->get_in_type (BRIG_TYPE_B32, hbb); + + if (!hsa_btype_p (arg->m_type)) + arg = arg->get_in_type (hsa_bittype_for_type (arg->m_type), hbb); + + hsa_insn_srctype *popcount + = new hsa_insn_srctype (2, BRIG_OPCODE_POPCOUNT, BRIG_TYPE_U32, + arg->m_type, NULL, arg); + hbb->append_insn (popcount); + popcount->set_output_in_type (dest, 0, hbb); +} + +/* Emit instructions that implement parity builtin STMT: + Returns the parity of x, i.e. the number of 1-bits in x modulo 2. + Instructions are appended to basic block HBB. */ + +static void +gen_hsa_parity (gcall *call, hsa_bb *hbb) +{ + tree lhs = gimple_call_lhs (call); + if (lhs == NULL_TREE) + return; + + hsa_op_reg *dest = hsa_cfun->reg_for_gimple_ssa (lhs); + tree rhs1 = gimple_call_arg (call, 0); + hsa_op_with_type *arg = hsa_reg_or_immed_for_gimple_op (rhs1, hbb); + + hsa_op_reg *popcount = new hsa_op_reg (BRIG_TYPE_U32); + gen_hsa_popcount_to_dest (popcount, arg, hbb); + + hsa_insn_basic *insn + = new hsa_insn_basic (3, BRIG_OPCODE_REM, popcount->m_type, NULL, popcount, + new hsa_op_immed (2, popcount->m_type)); + hbb->append_insn (insn); + insn->set_output_in_type (dest, 0, hbb); +} + +/* Emit instructions that implement popcount builtin STMT. + Instructions are appended to basic block HBB. */ + +static void +gen_hsa_popcount (gcall *call, hsa_bb *hbb) +{ + tree lhs = gimple_call_lhs (call); + if (lhs == NULL_TREE) + return; + + hsa_op_reg *dest = hsa_cfun->reg_for_gimple_ssa (lhs); + tree rhs1 = gimple_call_arg (call, 0); + hsa_op_with_type *arg = hsa_reg_or_immed_for_gimple_op (rhs1, hbb); + + gen_hsa_popcount_to_dest (dest, arg, hbb); +} + +/* Set VALUE to a shadow kernel debug argument and append a new instruction + to HBB basic block. */ + +static void +set_debug_value (hsa_bb *hbb, hsa_op_with_type *value) +{ + hsa_op_reg *shadow_reg_ptr = hsa_cfun->get_shadow_reg (); + if (shadow_reg_ptr == NULL) + return; + + hsa_op_address *addr + = new hsa_op_address (shadow_reg_ptr, + get_hsa_kernel_dispatch_offset ("debug")); + hsa_insn_mem *mem = new hsa_insn_mem (BRIG_OPCODE_ST, BRIG_TYPE_U64, value, + addr); + hbb->append_insn (mem); +} + +void +omp_simple_builtin::generate (gimple *stmt, hsa_bb *hbb) +{ + if (m_sorry) + { + if (m_warning_message) + HSA_SORRY_AT (gimple_location (stmt), m_warning_message) + else + HSA_SORRY_ATV (gimple_location (stmt), + "Support for HSA does not implement calls to %s\n", + m_name) + } + else if (m_warning_message != NULL) + warning_at (gimple_location (stmt), OPT_Whsa, m_warning_message); + + if (m_return_value != NULL) + { + tree lhs = gimple_call_lhs (stmt); + if (!lhs) + return; + + hbb->append_insn (new hsa_insn_comment (m_name)); + + hsa_op_reg *dest = hsa_cfun->reg_for_gimple_ssa (lhs); + hsa_op_with_type *op = m_return_value->get_in_type (dest->m_type, hbb); + hsa_build_append_simple_mov (dest, op, hbb); + } +} + +/* If STMT is a call of a known library function, generate code to perform + it and return true. */ + +static bool +gen_hsa_insns_for_known_library_call (gimple *stmt, hsa_bb *hbb) +{ + bool handled = false; + const char *name = hsa_get_declaration_name (gimple_call_fndecl (stmt)); + + char *copy = NULL; + size_t len = strlen (name); + if (len > 0 && name[len - 1] == '_') + { + copy = XNEWVEC (char, len + 1); + strcpy (copy, name); + copy[len - 1] = '\0'; + name = copy; + } + + /* Handle omp_* routines. */ + if (strstr (name, "omp_") == name) + { + hsa_init_simple_builtins (); + omp_simple_builtin *builtin = omp_simple_builtins->get (name); + if (builtin) + { + builtin->generate (stmt, hbb); + return true; + } + + handled = true; + if (strcmp (name, "omp_set_num_threads") == 0) + gen_set_num_threads (gimple_call_arg (stmt, 0), hbb); + else if (strcmp (name, "omp_get_thread_num") == 0) + { + hbb->append_insn (new hsa_insn_comment (name)); + query_hsa_grid (stmt, BRIG_OPCODE_WORKITEMABSID, 0, hbb); + } + else if (strcmp (name, "omp_get_num_threads") == 0) + { + hbb->append_insn (new hsa_insn_comment (name)); + query_hsa_grid (stmt, BRIG_OPCODE_GRIDSIZE, 0, hbb); + } + else if (strcmp (name, "omp_get_num_teams") == 0) + gen_get_num_teams (stmt, hbb); + else if (strcmp (name, "omp_get_team_num") == 0) + gen_get_team_num (stmt, hbb); + else if (strcmp (name, "omp_get_level") == 0) + gen_get_level (stmt, hbb); + else if (strcmp (name, "omp_get_active_level") == 0) + gen_get_level (stmt, hbb); + else if (strcmp (name, "omp_in_parallel") == 0) + gen_get_level (stmt, hbb); + else if (strcmp (name, "omp_get_max_threads") == 0) + gen_get_max_threads (stmt, hbb); + else + handled = false; + + if (handled) + { + if (copy) + free (copy); + return true; + } + } + + if (strcmp (name, "__hsa_set_debug_value") == 0) + { + handled = true; + if (hsa_cfun->has_shadow_reg_p ()) + { + tree rhs1 = gimple_call_arg (stmt, 0); + hsa_op_with_type *src = hsa_reg_or_immed_for_gimple_op (rhs1, hbb); + + src = src->get_in_type (BRIG_TYPE_U64, hbb); + set_debug_value (hbb, src); + } + } + + if (copy) + free (copy); + return handled; +} + +/* Helper functions to create a single unary HSA operations out of calls to + builtins. OPCODE is the HSA operation to be generated. STMT is a gimple + call to a builtin. HBB is the HSA BB to which the instruction should be + added. Note that nothing will be created if STMT does not have a LHS. */ + +static void +gen_hsa_unaryop_for_builtin (BrigOpcode opcode, gimple *stmt, hsa_bb *hbb) +{ + tree lhs = gimple_call_lhs (stmt); + if (!lhs) + return; + hsa_op_reg *dest = hsa_cfun->reg_for_gimple_ssa (lhs); + hsa_op_with_type *op + = hsa_reg_or_immed_for_gimple_op (gimple_call_arg (stmt, 0), hbb); + gen_hsa_unary_operation (opcode, dest, op, hbb); +} + +/* Helper functions to create a call to standard library if LHS of the + STMT is used. HBB is the HSA BB to which the instruction should be + added. */ + +static void +gen_hsa_unaryop_builtin_call (gimple *stmt, hsa_bb *hbb) +{ + tree lhs = gimple_call_lhs (stmt); + if (!lhs) + return; + + if (gimple_call_internal_p (stmt)) + gen_hsa_insns_for_call_of_internal_fn (stmt, hbb); + else + gen_hsa_insns_for_direct_call (stmt, hbb); +} + +/* Helper functions to create a single unary HSA operations out of calls to + builtins (if unsafe math optimizations are enable). Otherwise, create + a call to standard library function. + OPCODE is the HSA operation to be generated. STMT is a gimple + call to a builtin. HBB is the HSA BB to which the instruction should be + added. Note that nothing will be created if STMT does not have a LHS. */ + +static void +gen_hsa_unaryop_or_call_for_builtin (BrigOpcode opcode, gimple *stmt, + hsa_bb *hbb) +{ + if (flag_unsafe_math_optimizations) + gen_hsa_unaryop_for_builtin (opcode, stmt, hbb); + else + gen_hsa_unaryop_builtin_call (stmt, hbb); +} + +/* Generate HSA address corresponding to a value VAL (as opposed to a memory + reference tree), for example an SSA_NAME or an ADDR_EXPR. HBB is the HSA BB + to which the instruction should be added. */ + +static hsa_op_address * +get_address_from_value (tree val, hsa_bb *hbb) +{ + switch (TREE_CODE (val)) + { + case SSA_NAME: + { + BrigType16_t addrtype = hsa_get_segment_addr_type (BRIG_SEGMENT_FLAT); + hsa_op_base *reg + = hsa_cfun->reg_for_gimple_ssa (val)->get_in_type (addrtype, hbb); + return new hsa_op_address (NULL, as_a <hsa_op_reg *> (reg), 0); + } + case ADDR_EXPR: + return gen_hsa_addr (TREE_OPERAND (val, 0), hbb); + + case INTEGER_CST: + if (tree_fits_shwi_p (val)) + return new hsa_op_address (NULL, NULL, tree_to_shwi (val)); + /* Otherwise fall-through */ + + default: + HSA_SORRY_ATV (EXPR_LOCATION (val), + "support for HSA does not implement memory access to %E", + val); + return new hsa_op_address (NULL, NULL, 0); + } +} + +/* Return string for MEMMODEL. */ + +static const char * +get_memory_order_name (unsigned memmodel) +{ + switch (memmodel) + { + case __ATOMIC_RELAXED: + return "__ATOMIC_RELAXED"; + case __ATOMIC_CONSUME: + return "__ATOMIC_CONSUME"; + case __ATOMIC_ACQUIRE: + return "__ATOMIC_ACQUIRE"; + case __ATOMIC_RELEASE: + return "__ATOMIC_RELEASE"; + case __ATOMIC_ACQ_REL: + return "__ATOMIC_ACQ_REL"; + case __ATOMIC_SEQ_CST: + return "__ATOMIC_SEQ_CST"; + default: + return NULL; + } +} + +/* Return memory order according to predefined __atomic memory model + constants. LOCATION is provided to locate the problematic statement. */ + +static BrigMemoryOrder +get_memory_order (unsigned memmodel, location_t location) +{ + switch (memmodel) + { + case __ATOMIC_RELAXED: + return BRIG_MEMORY_ORDER_RELAXED; + case __ATOMIC_ACQUIRE: + return BRIG_MEMORY_ORDER_SC_ACQUIRE; + case __ATOMIC_RELEASE: + return BRIG_MEMORY_ORDER_SC_RELEASE; + case __ATOMIC_ACQ_REL: + return BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE; + default: + HSA_SORRY_ATV (location, + "support for HSA does not implement memory model: %s", + get_memory_order_name (memmodel)); + return BRIG_MEMORY_ORDER_NONE; + } +} + +/* Helper function to create an HSA atomic binary operation instruction out of + calls to atomic builtins. RET_ORIG is true if the built-in is the variant + that return s the value before applying operation, and false if it should + return the value after applying the operation (if it returns value at all). + ACODE is the atomic operation code, STMT is a gimple call to a builtin. HBB + is the HSA BB to which the instruction should be added. */ + +static void +gen_hsa_ternary_atomic_for_builtin (bool ret_orig, + enum BrigAtomicOperation acode, + gimple *stmt, + hsa_bb *hbb) +{ + tree lhs = gimple_call_lhs (stmt); + + tree type = TREE_TYPE (gimple_call_arg (stmt, 1)); + BrigType16_t hsa_type = hsa_type_for_scalar_tree_type (type, false); + BrigType16_t mtype = mem_type_for_type (hsa_type); + tree model = gimple_call_arg (stmt, 2); + + if (!tree_fits_uhwi_p (model)) + { + HSA_SORRY_ATV (gimple_location (stmt), + "support for HSA does not implement memory model %E", + model); + return; + } + + unsigned HOST_WIDE_INT mmodel = tree_to_uhwi (model); + + BrigMemoryOrder memorder = get_memory_order (mmodel, gimple_location (stmt)); + + /* Certain atomic insns must have Bx memory types. */ + switch (acode) + { + case BRIG_ATOMIC_LD: + case BRIG_ATOMIC_ST: + case BRIG_ATOMIC_AND: + case BRIG_ATOMIC_OR: + case BRIG_ATOMIC_XOR: + case BRIG_ATOMIC_EXCH: + mtype = hsa_bittype_for_type (mtype); + break; + default: + break; + } + + hsa_op_reg *dest; + int nops, opcode; + if (lhs) + { + if (ret_orig) + dest = hsa_cfun->reg_for_gimple_ssa (lhs); + else + dest = new hsa_op_reg (hsa_type); + opcode = BRIG_OPCODE_ATOMIC; + nops = 3; + } + else + { + dest = NULL; + opcode = BRIG_OPCODE_ATOMICNORET; + nops = 2; + } + + if (acode == BRIG_ATOMIC_ST && memorder != BRIG_MEMORY_ORDER_RELAXED + && memorder != BRIG_MEMORY_ORDER_SC_RELEASE) + { + HSA_SORRY_ATV (gimple_location (stmt), + "support for HSA does not implement memory model for " + "ATOMIC_ST: %s", get_memory_order_name (mmodel)); + return; + } + + hsa_insn_atomic *atominsn = new hsa_insn_atomic (nops, opcode, acode, mtype, + memorder); + + hsa_op_address *addr; + addr = get_address_from_value (gimple_call_arg (stmt, 0), hbb); + /* TODO: Warn if addr has private segment, because the finalizer will not + accept that (and it does not make much sense). */ + hsa_op_base *op = hsa_reg_or_immed_for_gimple_op (gimple_call_arg (stmt, 1), + hbb); + + if (lhs) + { + atominsn->set_op (0, dest); + atominsn->set_op (1, addr); + atominsn->set_op (2, op); + } + else + { + atominsn->set_op (0, addr); + atominsn->set_op (1, op); + } + + hbb->append_insn (atominsn); + + /* HSA does not natively support the variants that return the modified value, + so re-do the operation again non-atomically if that is what was + requested. */ + if (lhs && !ret_orig) + { + int arith; + switch (acode) + { + case BRIG_ATOMIC_ADD: + arith = BRIG_OPCODE_ADD; + break; + case BRIG_ATOMIC_AND: + arith = BRIG_OPCODE_AND; + break; + case BRIG_ATOMIC_OR: + arith = BRIG_OPCODE_OR; + break; + case BRIG_ATOMIC_SUB: + arith = BRIG_OPCODE_SUB; + break; + case BRIG_ATOMIC_XOR: + arith = BRIG_OPCODE_XOR; + break; + default: + gcc_unreachable (); + } + hsa_op_reg *real_dest = hsa_cfun->reg_for_gimple_ssa (lhs); + gen_hsa_binary_operation (arith, real_dest, dest, op, hbb); + } +} + +/* Generate HSA instructions for an internal fn. + Instructions will be appended to HBB, which also needs to be the + corresponding structure to the basic_block of STMT. */ + +static void +gen_hsa_insn_for_internal_fn_call (gcall *stmt, hsa_bb *hbb) +{ + gcc_checking_assert (gimple_call_internal_fn (stmt)); + internal_fn fn = gimple_call_internal_fn (stmt); + + bool is_float_type_p = false; + if (gimple_call_lhs (stmt) != NULL + && TREE_TYPE (gimple_call_lhs (stmt)) == float_type_node) + is_float_type_p = true; + + switch (fn) + { + case IFN_CEIL: + gen_hsa_unaryop_for_builtin (BRIG_OPCODE_CEIL, stmt, hbb); + break; + + case IFN_FLOOR: + gen_hsa_unaryop_for_builtin (BRIG_OPCODE_FLOOR, stmt, hbb); + break; + + case IFN_RINT: + gen_hsa_unaryop_for_builtin (BRIG_OPCODE_RINT, stmt, hbb); + break; + + case IFN_SQRT: + gen_hsa_unaryop_for_builtin (BRIG_OPCODE_SQRT, stmt, hbb); + break; + + case IFN_TRUNC: + gen_hsa_unaryop_for_builtin (BRIG_OPCODE_TRUNC, stmt, hbb); + break; + + case IFN_COS: + { + if (is_float_type_p) + gen_hsa_unaryop_or_call_for_builtin (BRIG_OPCODE_NCOS, stmt, hbb); + else + gen_hsa_unaryop_builtin_call (stmt, hbb); + + break; + } + case IFN_EXP2: + { + if (is_float_type_p) + gen_hsa_unaryop_or_call_for_builtin (BRIG_OPCODE_NEXP2, stmt, hbb); + else + gen_hsa_unaryop_builtin_call (stmt, hbb); + + break; + } + + case IFN_LOG2: + { + if (is_float_type_p) + gen_hsa_unaryop_or_call_for_builtin (BRIG_OPCODE_NLOG2, stmt, hbb); + else + gen_hsa_unaryop_builtin_call (stmt, hbb); + + break; + } + + case IFN_SIN: + { + if (is_float_type_p) + gen_hsa_unaryop_or_call_for_builtin (BRIG_OPCODE_NSIN, stmt, hbb); + else + gen_hsa_unaryop_builtin_call (stmt, hbb); + break; + } + + case IFN_CLRSB: + gen_hsa_clrsb (stmt, hbb); + break; + + case IFN_CLZ: + gen_hsa_unaryop_for_builtin (BRIG_OPCODE_FIRSTBIT, stmt, hbb); + break; + + case IFN_CTZ: + gen_hsa_unaryop_for_builtin (BRIG_OPCODE_LASTBIT, stmt, hbb); + break; + + case IFN_FFS: + gen_hsa_ffs (stmt, hbb); + break; + + case IFN_PARITY: + gen_hsa_parity (stmt, hbb); + break; + + case IFN_POPCOUNT: + gen_hsa_popcount (stmt, hbb); + break; + + case IFN_ACOS: + case IFN_ASIN: + case IFN_ATAN: + case IFN_EXP: + case IFN_EXP10: + case IFN_EXPM1: + case IFN_LOG: + case IFN_LOG10: + case IFN_LOG1P: + case IFN_LOGB: + case IFN_SIGNIFICAND: + case IFN_TAN: + case IFN_NEARBYINT: + case IFN_ROUND: + case IFN_ATAN2: + case IFN_COPYSIGN: + case IFN_FMOD: + case IFN_POW: + case IFN_REMAINDER: + case IFN_SCALB: + case IFN_FMIN: + case IFN_FMAX: + gen_hsa_insns_for_call_of_internal_fn (stmt, hbb); + + default: + HSA_SORRY_ATV (gimple_location (stmt), + "support for HSA does not implement internal function: %s", + internal_fn_name (fn)); + break; + } +} + +#define HSA_MEMORY_BUILTINS_LIMIT 128 + +/* Generate HSA instructions for the given call statement STMT. Instructions + will be appended to HBB. */ + +static void +gen_hsa_insns_for_call (gimple *stmt, hsa_bb *hbb) +{ + gcall *call = as_a <gcall *> (stmt); + tree lhs = gimple_call_lhs (stmt); + hsa_op_reg *dest; + + if (gimple_call_internal_p (stmt)) + { + gen_hsa_insn_for_internal_fn_call (call, hbb); + return; + } + + if (!gimple_call_builtin_p (stmt, BUILT_IN_NORMAL)) + { + tree function_decl = gimple_call_fndecl (stmt); + if (function_decl == NULL_TREE) + { + HSA_SORRY_AT (gimple_location (stmt), + "support for HSA does not implement indirect calls"); + return; + } + + if (hsa_callable_function_p (function_decl)) + gen_hsa_insns_for_direct_call (stmt, hbb); + else if (!gen_hsa_insns_for_known_library_call (stmt, hbb)) + HSA_SORRY_AT (gimple_location (stmt), + "HSA supports only calls of functions marked with pragma " + "omp declare target"); + return; + } + + tree fndecl = gimple_call_fndecl (stmt); + enum built_in_function builtin = DECL_FUNCTION_CODE (fndecl); + switch (builtin) + { + case BUILT_IN_FABS: + case BUILT_IN_FABSF: + gen_hsa_unaryop_for_builtin (BRIG_OPCODE_ABS, stmt, hbb); + break; + + case BUILT_IN_CEIL: + case BUILT_IN_CEILF: + gen_hsa_unaryop_for_builtin (BRIG_OPCODE_CEIL, stmt, hbb); + break; + + case BUILT_IN_FLOOR: + case BUILT_IN_FLOORF: + gen_hsa_unaryop_for_builtin (BRIG_OPCODE_FLOOR, stmt, hbb); + break; + + case BUILT_IN_RINT: + case BUILT_IN_RINTF: + gen_hsa_unaryop_for_builtin (BRIG_OPCODE_RINT, stmt, hbb); + break; + + case BUILT_IN_SQRT: + case BUILT_IN_SQRTF: + gen_hsa_unaryop_for_builtin (BRIG_OPCODE_SQRT, stmt, hbb); + break; + + case BUILT_IN_TRUNC: + case BUILT_IN_TRUNCF: + gen_hsa_unaryop_for_builtin (BRIG_OPCODE_TRUNC, stmt, hbb); + break; + + case BUILT_IN_COS: + case BUILT_IN_SIN: + case BUILT_IN_EXP2: + case BUILT_IN_LOG2: + /* HSAIL does not provide an instruction for double argument type. */ + gen_hsa_unaryop_builtin_call (stmt, hbb); + break; + + case BUILT_IN_COSF: + gen_hsa_unaryop_or_call_for_builtin (BRIG_OPCODE_NCOS, stmt, hbb); + break; + + case BUILT_IN_EXP2F: + gen_hsa_unaryop_or_call_for_builtin (BRIG_OPCODE_NEXP2, stmt, hbb); + break; + + case BUILT_IN_LOG2F: + gen_hsa_unaryop_or_call_for_builtin (BRIG_OPCODE_NLOG2, stmt, hbb); + break; + + case BUILT_IN_SINF: + gen_hsa_unaryop_or_call_for_builtin (BRIG_OPCODE_NSIN, stmt, hbb); + break; + + case BUILT_IN_CLRSB: + case BUILT_IN_CLRSBL: + case BUILT_IN_CLRSBLL: + gen_hsa_clrsb (call, hbb); + break; + + case BUILT_IN_CLZ: + case BUILT_IN_CLZL: + case BUILT_IN_CLZLL: + gen_hsa_unaryop_for_builtin (BRIG_OPCODE_FIRSTBIT, stmt, hbb); + break; + + case BUILT_IN_CTZ: + case BUILT_IN_CTZL: + case BUILT_IN_CTZLL: + gen_hsa_unaryop_for_builtin (BRIG_OPCODE_LASTBIT, stmt, hbb); + break; + + case BUILT_IN_FFS: + case BUILT_IN_FFSL: + case BUILT_IN_FFSLL: + gen_hsa_ffs (call, hbb); + break; + + case BUILT_IN_PARITY: + case BUILT_IN_PARITYL: + case BUILT_IN_PARITYLL: + gen_hsa_parity (call, hbb); + break; + + case BUILT_IN_POPCOUNT: + case BUILT_IN_POPCOUNTL: + case BUILT_IN_POPCOUNTLL: + gen_hsa_popcount (call, hbb); + break; + + case BUILT_IN_ATOMIC_LOAD_1: + case BUILT_IN_ATOMIC_LOAD_2: + case BUILT_IN_ATOMIC_LOAD_4: + case BUILT_IN_ATOMIC_LOAD_8: + case BUILT_IN_ATOMIC_LOAD_16: + { + BrigType16_t mtype; + hsa_op_address *addr; + addr = get_address_from_value (gimple_call_arg (stmt, 0), hbb); + tree model = gimple_call_arg (stmt, 1); + if (!tree_fits_uhwi_p (model)) + { + HSA_SORRY_ATV (gimple_location (stmt), + "support for HSA does not implement " + "memory model: %E", + model); + return; + } + + unsigned HOST_WIDE_INT mmodel = tree_to_uhwi (model); + BrigMemoryOrder memorder = get_memory_order (mmodel, + gimple_location (stmt)); + + if (memorder != BRIG_MEMORY_ORDER_RELAXED + && memorder != BRIG_MEMORY_ORDER_SC_RELEASE) + { + HSA_SORRY_ATV (gimple_location (stmt), + "support for HSA does not implement " + "memory model for ATOMIC_LD: %s", + get_memory_order_name (mmodel)); + return; + } + + if (lhs) + { + BrigType16_t t = hsa_type_for_scalar_tree_type (TREE_TYPE (lhs), + false); + mtype = mem_type_for_type (t); + mtype = hsa_bittype_for_type (mtype); + dest = hsa_cfun->reg_for_gimple_ssa (lhs); + } + else + { + mtype = BRIG_TYPE_B64; + dest = new hsa_op_reg (mtype); + } + + hsa_insn_atomic *atominsn + = new hsa_insn_atomic (2, BRIG_OPCODE_ATOMIC, BRIG_ATOMIC_LD, mtype, + memorder, dest, addr); + + hbb->append_insn (atominsn); + break; + } + + case BUILT_IN_ATOMIC_EXCHANGE_1: + case BUILT_IN_ATOMIC_EXCHANGE_2: + case BUILT_IN_ATOMIC_EXCHANGE_4: + case BUILT_IN_ATOMIC_EXCHANGE_8: + case BUILT_IN_ATOMIC_EXCHANGE_16: + gen_hsa_ternary_atomic_for_builtin (true, BRIG_ATOMIC_EXCH, stmt, hbb); + break; + + case BUILT_IN_ATOMIC_FETCH_ADD_1: + case BUILT_IN_ATOMIC_FETCH_ADD_2: + case BUILT_IN_ATOMIC_FETCH_ADD_4: + case BUILT_IN_ATOMIC_FETCH_ADD_8: + case BUILT_IN_ATOMIC_FETCH_ADD_16: + gen_hsa_ternary_atomic_for_builtin (true, BRIG_ATOMIC_ADD, stmt, hbb); + break; + + case BUILT_IN_ATOMIC_FETCH_SUB_1: + case BUILT_IN_ATOMIC_FETCH_SUB_2: + case BUILT_IN_ATOMIC_FETCH_SUB_4: + case BUILT_IN_ATOMIC_FETCH_SUB_8: + case BUILT_IN_ATOMIC_FETCH_SUB_16: + gen_hsa_ternary_atomic_for_builtin (true, BRIG_ATOMIC_SUB, stmt, hbb); + break; + + case BUILT_IN_ATOMIC_FETCH_AND_1: + case BUILT_IN_ATOMIC_FETCH_AND_2: + case BUILT_IN_ATOMIC_FETCH_AND_4: + case BUILT_IN_ATOMIC_FETCH_AND_8: + case BUILT_IN_ATOMIC_FETCH_AND_16: + gen_hsa_ternary_atomic_for_builtin (true, BRIG_ATOMIC_AND, stmt, hbb); + break; + + case BUILT_IN_ATOMIC_FETCH_XOR_1: + case BUILT_IN_ATOMIC_FETCH_XOR_2: + case BUILT_IN_ATOMIC_FETCH_XOR_4: + case BUILT_IN_ATOMIC_FETCH_XOR_8: + case BUILT_IN_ATOMIC_FETCH_XOR_16: + gen_hsa_ternary_atomic_for_builtin (true, BRIG_ATOMIC_XOR, stmt, hbb); + break; + + case BUILT_IN_ATOMIC_FETCH_OR_1: + case BUILT_IN_ATOMIC_FETCH_OR_2: + case BUILT_IN_ATOMIC_FETCH_OR_4: + case BUILT_IN_ATOMIC_FETCH_OR_8: + case BUILT_IN_ATOMIC_FETCH_OR_16: + gen_hsa_ternary_atomic_for_builtin (true, BRIG_ATOMIC_OR, stmt, hbb); + break; + + case BUILT_IN_ATOMIC_STORE_1: + case BUILT_IN_ATOMIC_STORE_2: + case BUILT_IN_ATOMIC_STORE_4: + case BUILT_IN_ATOMIC_STORE_8: + case BUILT_IN_ATOMIC_STORE_16: + /* Since there cannot be any LHS, the first parameter is meaningless. */ + gen_hsa_ternary_atomic_for_builtin (true, BRIG_ATOMIC_ST, stmt, hbb); + break; + + case BUILT_IN_ATOMIC_ADD_FETCH_1: + case BUILT_IN_ATOMIC_ADD_FETCH_2: + case BUILT_IN_ATOMIC_ADD_FETCH_4: + case BUILT_IN_ATOMIC_ADD_FETCH_8: + case BUILT_IN_ATOMIC_ADD_FETCH_16: + gen_hsa_ternary_atomic_for_builtin (false, BRIG_ATOMIC_ADD, stmt, hbb); + break; + + case BUILT_IN_ATOMIC_SUB_FETCH_1: + case BUILT_IN_ATOMIC_SUB_FETCH_2: + case BUILT_IN_ATOMIC_SUB_FETCH_4: + case BUILT_IN_ATOMIC_SUB_FETCH_8: + case BUILT_IN_ATOMIC_SUB_FETCH_16: + gen_hsa_ternary_atomic_for_builtin (false, BRIG_ATOMIC_SUB, stmt, hbb); + break; + + case BUILT_IN_ATOMIC_AND_FETCH_1: + case BUILT_IN_ATOMIC_AND_FETCH_2: + case BUILT_IN_ATOMIC_AND_FETCH_4: + case BUILT_IN_ATOMIC_AND_FETCH_8: + case BUILT_IN_ATOMIC_AND_FETCH_16: + gen_hsa_ternary_atomic_for_builtin (false, BRIG_ATOMIC_AND, stmt, hbb); + break; + + case BUILT_IN_ATOMIC_XOR_FETCH_1: + case BUILT_IN_ATOMIC_XOR_FETCH_2: + case BUILT_IN_ATOMIC_XOR_FETCH_4: + case BUILT_IN_ATOMIC_XOR_FETCH_8: + case BUILT_IN_ATOMIC_XOR_FETCH_16: + gen_hsa_ternary_atomic_for_builtin (false, BRIG_ATOMIC_XOR, stmt, hbb); + break; + + case BUILT_IN_ATOMIC_OR_FETCH_1: + case BUILT_IN_ATOMIC_OR_FETCH_2: + case BUILT_IN_ATOMIC_OR_FETCH_4: + case BUILT_IN_ATOMIC_OR_FETCH_8: + case BUILT_IN_ATOMIC_OR_FETCH_16: + gen_hsa_ternary_atomic_for_builtin (false, BRIG_ATOMIC_OR, stmt, hbb); + break; + + case BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_1: + case BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_2: + case BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_4: + case BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_8: + case BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_16: + { + /* TODO: Use the appropriate memory model for now. */ + tree type = TREE_TYPE (gimple_call_arg (stmt, 1)); + + BrigType16_t atype + = hsa_bittype_for_type (hsa_type_for_scalar_tree_type (type, false)); + + hsa_insn_atomic *atominsn + = new hsa_insn_atomic (4, BRIG_OPCODE_ATOMIC, BRIG_ATOMIC_CAS, atype, + BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE); + hsa_op_address *addr; + addr = get_address_from_value (gimple_call_arg (stmt, 0), hbb); + + if (lhs != NULL) + dest = hsa_cfun->reg_for_gimple_ssa (lhs); + else + dest = new hsa_op_reg (atype); + + /* Should check what the memory scope is. */ + atominsn->m_memoryscope = BRIG_MEMORY_SCOPE_WORKGROUP; + atominsn->set_op (0, dest); + atominsn->set_op (1, addr); + + hsa_op_with_type *op + = hsa_reg_or_immed_for_gimple_op (gimple_call_arg (stmt, 1), hbb); + atominsn->set_op (2, op); + op = hsa_reg_or_immed_for_gimple_op (gimple_call_arg (stmt, 2), hbb); + atominsn->set_op (3, op); + + hbb->append_insn (atominsn); + break; + } + case BUILT_IN_GOMP_PARALLEL: + HSA_SORRY_AT (gimple_location (stmt), + "support for HSA does not implement non-gridified " + "OpenMP parallel constructs."); + break; + case BUILT_IN_OMP_GET_THREAD_NUM: + { + query_hsa_grid (stmt, BRIG_OPCODE_WORKITEMABSID, 0, hbb); + break; + } + + case BUILT_IN_OMP_GET_NUM_THREADS: + { + query_hsa_grid (stmt, BRIG_OPCODE_GRIDSIZE, 0, hbb); + break; + } + case BUILT_IN_GOMP_TEAMS: + { + gen_set_num_threads (gimple_call_arg (stmt, 1), hbb); + break; + } + case BUILT_IN_OMP_GET_NUM_TEAMS: + { + gen_get_num_teams (stmt, hbb); + break; + } + case BUILT_IN_OMP_GET_TEAM_NUM: + { + gen_get_team_num (stmt, hbb); + break; + } + case BUILT_IN_MEMCPY: + case BUILT_IN_MEMPCPY: + { + tree byte_size = gimple_call_arg (stmt, 2); + + if (!tree_fits_uhwi_p (byte_size)) + { + gen_hsa_insns_for_direct_call (stmt, hbb); + return; + } + + unsigned n = tree_to_uhwi (byte_size); + + if (n > HSA_MEMORY_BUILTINS_LIMIT) + { + gen_hsa_insns_for_direct_call (stmt, hbb); + return; + } + + tree dst = gimple_call_arg (stmt, 0); + tree src = gimple_call_arg (stmt, 1); + + hsa_op_address *dst_addr = get_address_from_value (dst, hbb); + hsa_op_address *src_addr = get_address_from_value (src, hbb); + + gen_hsa_memory_copy (hbb, dst_addr, src_addr, n); + + tree lhs = gimple_call_lhs (stmt); + if (lhs) + { + hsa_op_reg *lhs_reg = hsa_cfun->reg_for_gimple_ssa (lhs); + hsa_op_with_type *dst_reg = hsa_reg_or_immed_for_gimple_op (dst, + hbb); + hsa_op_with_type *tmp; + + if (builtin == BUILT_IN_MEMPCPY) + { + tmp = new hsa_op_reg (dst_reg->m_type); + hsa_insn_basic *add + = new hsa_insn_basic (3, BRIG_OPCODE_ADD, tmp->m_type, + tmp, dst_reg, + new hsa_op_immed (n, dst_reg->m_type)); + hbb->append_insn (add); + } + else + tmp = dst_reg; + + hsa_build_append_simple_mov (lhs_reg, tmp, hbb); + } + + break; + } + case BUILT_IN_MEMSET: + { + tree dst = gimple_call_arg (stmt, 0); + tree c = gimple_call_arg (stmt, 1); + + if (TREE_CODE (c) != INTEGER_CST) + { + gen_hsa_insns_for_direct_call (stmt, hbb); + return; + } + + tree byte_size = gimple_call_arg (stmt, 2); + + if (!tree_fits_uhwi_p (byte_size)) + { + gen_hsa_insns_for_direct_call (stmt, hbb); + return; + } + + unsigned n = tree_to_uhwi (byte_size); + + if (n > HSA_MEMORY_BUILTINS_LIMIT) + { + gen_hsa_insns_for_direct_call (stmt, hbb); + return; + } + + hsa_op_address *dst_addr; + dst_addr = get_address_from_value (dst, hbb); + unsigned HOST_WIDE_INT constant + = tree_to_uhwi (fold_convert (unsigned_char_type_node, c)); + + gen_hsa_memory_set (hbb, dst_addr, constant, n); + + tree lhs = gimple_call_lhs (stmt); + if (lhs) + gen_hsa_insns_for_single_assignment (lhs, dst, hbb); + + break; + } + case BUILT_IN_BZERO: + { + tree dst = gimple_call_arg (stmt, 0); + tree byte_size = gimple_call_arg (stmt, 1); + + if (!tree_fits_uhwi_p (byte_size)) + { + gen_hsa_insns_for_direct_call (stmt, hbb); + return; + } + + unsigned n = tree_to_uhwi (byte_size); + + if (n > HSA_MEMORY_BUILTINS_LIMIT) + { + gen_hsa_insns_for_direct_call (stmt, hbb); + return; + } + + hsa_op_address *dst_addr; + dst_addr = get_address_from_value (dst, hbb); + + gen_hsa_memory_set (hbb, dst_addr, 0, n); + + break; + } + case BUILT_IN_ALLOCA: + case BUILT_IN_ALLOCA_WITH_ALIGN: + { + gen_hsa_alloca (call, hbb); + break; + } + default: + { + gen_hsa_insns_for_direct_call (stmt, hbb); + return; + } + } +} + +/* Generate HSA instructions for a given gimple statement. Instructions will be + appended to HBB. */ + +static void +gen_hsa_insns_for_gimple_stmt (gimple *stmt, hsa_bb *hbb) +{ + switch (gimple_code (stmt)) + { + case GIMPLE_ASSIGN: + if (gimple_clobber_p (stmt)) + break; + + if (gimple_assign_single_p (stmt)) + { + tree lhs = gimple_assign_lhs (stmt); + tree rhs = gimple_assign_rhs1 (stmt); + gen_hsa_insns_for_single_assignment (lhs, rhs, hbb); + } + else + gen_hsa_insns_for_operation_assignment (stmt, hbb); + break; + case GIMPLE_RETURN: + gen_hsa_insns_for_return (as_a <greturn *> (stmt), hbb); + break; + case GIMPLE_COND: + gen_hsa_insns_for_cond_stmt (stmt, hbb); + break; + case GIMPLE_CALL: + gen_hsa_insns_for_call (stmt, hbb); + break; + case GIMPLE_DEBUG: + /* ??? HSA supports some debug facilities. */ + break; + case GIMPLE_LABEL: + { + tree label = gimple_label_label (as_a <glabel *> (stmt)); + if (FORCED_LABEL (label)) + HSA_SORRY_AT (gimple_location (stmt), + "support for HSA does not implement gimple label with " + "address taken"); + + break; + } + case GIMPLE_NOP: + { + hbb->append_insn (new hsa_insn_basic (0, BRIG_OPCODE_NOP)); + break; + } + case GIMPLE_SWITCH: + { + gen_hsa_insns_for_switch_stmt (as_a <gswitch *> (stmt), hbb); + break; + } + default: + HSA_SORRY_ATV (gimple_location (stmt), + "support for HSA does not implement gimple statement %s", + gimple_code_name[(int) gimple_code (stmt)]); + } +} + +/* Generate a HSA PHI from a gimple PHI. */ + +static void +gen_hsa_phi_from_gimple_phi (gimple *phi_stmt, hsa_bb *hbb) +{ + hsa_insn_phi *hphi; + unsigned count = gimple_phi_num_args (phi_stmt); + + hsa_op_reg *dest + = hsa_cfun->reg_for_gimple_ssa (gimple_phi_result (phi_stmt)); + hphi = new hsa_insn_phi (count, dest); + hphi->m_bb = hbb->m_bb; + + tree lhs = gimple_phi_result (phi_stmt); + + for (unsigned i = 0; i < count; i++) + { + tree op = gimple_phi_arg_def (phi_stmt, i); + + if (TREE_CODE (op) == SSA_NAME) + { + hsa_op_reg *hreg = hsa_cfun->reg_for_gimple_ssa (op); + hphi->set_op (i, hreg); + } + else + { + gcc_assert (is_gimple_min_invariant (op)); + tree t = TREE_TYPE (op); + if (!POINTER_TYPE_P (t) + || (TREE_CODE (op) == STRING_CST + && TREE_CODE (TREE_TYPE (t)) == INTEGER_TYPE)) + hphi->set_op (i, new hsa_op_immed (op)); + else if (POINTER_TYPE_P (TREE_TYPE (lhs)) + && TREE_CODE (op) == INTEGER_CST) + { + /* Handle assignment of NULL value to a pointer type. */ + hphi->set_op (i, new hsa_op_immed (op)); + } + else if (TREE_CODE (op) == ADDR_EXPR) + { + edge e = gimple_phi_arg_edge (as_a <gphi *> (phi_stmt), i); + hsa_bb *hbb_src = hsa_init_new_bb (split_edge (e)); + hsa_op_address *addr = gen_hsa_addr (TREE_OPERAND (op, 0), + hbb_src); + + hsa_op_reg *dest = new hsa_op_reg (BRIG_TYPE_U64); + hsa_insn_basic *insn + = new hsa_insn_basic (2, BRIG_OPCODE_LDA, BRIG_TYPE_U64, + dest, addr); + hbb_src->append_insn (insn); + + hphi->set_op (i, dest); + } + else + { + HSA_SORRY_AT (gimple_location (phi_stmt), + "support for HSA does not handle PHI nodes with " + "constant address operands"); + return; + } + } + } + + hphi->m_prev = hbb->m_last_phi; + hphi->m_next = NULL; + if (hbb->m_last_phi) + hbb->m_last_phi->m_next = hphi; + hbb->m_last_phi = hphi; + if (!hbb->m_first_phi) + hbb->m_first_phi = hphi; +} + +/* Constructor of class containing HSA-specific information about a basic + block. CFG_BB is the CFG BB this HSA BB is associated with. IDX is the new + index of this BB (so that the constructor does not attempt to use + hsa_cfun during its construction). */ + +hsa_bb::hsa_bb (basic_block cfg_bb, int idx) + : m_bb (cfg_bb), m_first_insn (NULL), m_last_insn (NULL), m_first_phi (NULL), + m_last_phi (NULL), m_index (idx), m_liveout (BITMAP_ALLOC (NULL)), + m_livein (BITMAP_ALLOC (NULL)) +{ + gcc_assert (!cfg_bb->aux); + cfg_bb->aux = this; +} + +/* Constructor of class containing HSA-specific information about a basic + block. CFG_BB is the CFG BB this HSA BB is associated with. */ + +hsa_bb::hsa_bb (basic_block cfg_bb) + : m_bb (cfg_bb), m_first_insn (NULL), m_last_insn (NULL), m_first_phi (NULL), + m_last_phi (NULL), m_index (hsa_cfun->m_hbb_count++), + m_liveout (BITMAP_ALLOC (NULL)), m_livein (BITMAP_ALLOC (NULL)) +{ + gcc_assert (!cfg_bb->aux); + cfg_bb->aux = this; +} + +/* Destructor of class representing HSA BB. */ + +hsa_bb::~hsa_bb () +{ + BITMAP_FREE (m_livein); + BITMAP_FREE (m_liveout); +} + +/* Create and initialize and return a new hsa_bb structure for a given CFG + basic block BB. */ + +hsa_bb * +hsa_init_new_bb (basic_block bb) +{ + return new (*hsa_allocp_bb) hsa_bb (bb); +} + +/* Initialize OMP in an HSA basic block PROLOGUE. */ + +static void +init_prologue (void) +{ + if (!hsa_cfun->m_kern_p) + return; + + hsa_bb *prologue = hsa_bb_for_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun)); + + /* Create a magic number that is going to be printed by libgomp. */ + unsigned index = hsa_get_number_decl_kernel_mappings (); + + /* Emit store to debug argument. */ + if (PARAM_VALUE (PARAM_HSA_GEN_DEBUG_STORES) > 0) + set_debug_value (prologue, new hsa_op_immed (1000 + index, BRIG_TYPE_U64)); +} + +/* Initialize hsa_num_threads to a default value. */ + +static void +init_hsa_num_threads (void) +{ + hsa_bb *prologue = hsa_bb_for_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun)); + + /* Save the default value to private variable hsa_num_threads. */ + hsa_insn_basic *basic + = new hsa_insn_mem (BRIG_OPCODE_ST, hsa_num_threads->m_type, + new hsa_op_immed (0, hsa_num_threads->m_type), + new hsa_op_address (hsa_num_threads)); + prologue->append_insn (basic); +} + +/* Go over gimple representation and generate our internal HSA one. */ + +static void +gen_body_from_gimple () +{ + basic_block bb; + + /* Verify CFG for complex edges we are unable to handle. */ + edge_iterator ei; + edge e; + + FOR_EACH_BB_FN (bb, cfun) + { + FOR_EACH_EDGE (e, ei, bb->succs) + { + /* Verify all unsupported flags for edges that point + to the same basic block. */ + if (e->flags & EDGE_EH) + { + HSA_SORRY_AT (UNKNOWN_LOCATION, + "support for HSA does not implement exception " + "handling"); + return; + } + } + } + + FOR_EACH_BB_FN (bb, cfun) + { + gimple_stmt_iterator gsi; + hsa_bb *hbb = hsa_bb_for_bb (bb); + if (hbb) + continue; + + hbb = hsa_init_new_bb (bb); + + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gen_hsa_insns_for_gimple_stmt (gsi_stmt (gsi), hbb); + if (hsa_seen_error ()) + return; + } + } + + FOR_EACH_BB_FN (bb, cfun) + { + gimple_stmt_iterator gsi; + hsa_bb *hbb = hsa_bb_for_bb (bb); + gcc_assert (hbb != NULL); + + for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + if (!virtual_operand_p (gimple_phi_result (gsi_stmt (gsi)))) + gen_hsa_phi_from_gimple_phi (gsi_stmt (gsi), hbb); + } + + if (dump_file) + { + fprintf (dump_file, "------- Generated SSA form -------\n"); + dump_hsa_cfun (dump_file); + } +} + +static void +gen_function_decl_parameters (hsa_function_representation *f, + tree decl) +{ + tree parm; + unsigned i; + + for (parm = TYPE_ARG_TYPES (TREE_TYPE (decl)), i = 0; + parm; + parm = TREE_CHAIN (parm), i++) + { + /* Result type if last in the tree list. */ + if (TREE_CHAIN (parm) == NULL) + break; + + tree v = TREE_VALUE (parm); + + hsa_symbol *arg = new hsa_symbol (BRIG_TYPE_NONE, BRIG_SEGMENT_ARG, + BRIG_LINKAGE_NONE); + arg->m_type = hsa_type_for_tree_type (v, &arg->m_dim); + arg->m_name_number = i; + + f->m_input_args.safe_push (arg); + } + + tree result_type = TREE_TYPE (TREE_TYPE (decl)); + if (!VOID_TYPE_P (result_type)) + { + f->m_output_arg = new hsa_symbol (BRIG_TYPE_NONE, BRIG_SEGMENT_ARG, + BRIG_LINKAGE_NONE); + f->m_output_arg->m_type + = hsa_type_for_tree_type (result_type, &f->m_output_arg->m_dim); + f->m_output_arg->m_name = "res"; + } +} + +/* Generate the vector of parameters of the HSA representation of the current + function. This also includes the output parameter representing the + result. */ + +static void +gen_function_def_parameters () +{ + tree parm; + + hsa_bb *prologue = hsa_bb_for_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun)); + + for (parm = DECL_ARGUMENTS (cfun->decl); parm; + parm = DECL_CHAIN (parm)) + { + struct hsa_symbol **slot; + + hsa_symbol *arg + = new hsa_symbol (BRIG_TYPE_NONE, hsa_cfun->m_kern_p + ? BRIG_SEGMENT_KERNARG : BRIG_SEGMENT_ARG, + BRIG_LINKAGE_FUNCTION); + arg->fillup_for_decl (parm); + + hsa_cfun->m_input_args.safe_push (arg); + + if (hsa_seen_error ()) + return; + + arg->m_name = hsa_get_declaration_name (parm); + + /* Copy all input arguments and create corresponding private symbols + for them. */ + hsa_symbol *private_arg; + hsa_op_address *parm_addr = new hsa_op_address (arg); + + if (TREE_ADDRESSABLE (parm) + || (!is_gimple_reg (parm) && !TREE_READONLY (parm))) + { + private_arg = hsa_cfun->create_hsa_temporary (arg->m_type); + private_arg->fillup_for_decl (parm); + + hsa_op_address *private_arg_addr = new hsa_op_address (private_arg); + gen_hsa_memory_copy (prologue, private_arg_addr, parm_addr, + arg->total_byte_size ()); + } + else + private_arg = arg; + + slot = hsa_cfun->m_local_symbols->find_slot (private_arg, INSERT); + gcc_assert (!*slot); + *slot = private_arg; + + if (is_gimple_reg (parm)) + { + tree ddef = ssa_default_def (cfun, parm); + if (ddef && !has_zero_uses (ddef)) + { + BrigType16_t t = hsa_type_for_scalar_tree_type (TREE_TYPE (ddef), + false); + BrigType16_t mtype = mem_type_for_type (t); + hsa_op_reg *dest = hsa_cfun->reg_for_gimple_ssa (ddef); + hsa_insn_mem *mem = new hsa_insn_mem (BRIG_OPCODE_LD, mtype, + dest, parm_addr); + gcc_assert (!parm_addr->m_reg); + prologue->append_insn (mem); + } + } + } + + if (!VOID_TYPE_P (TREE_TYPE (TREE_TYPE (cfun->decl)))) + { + struct hsa_symbol **slot; + + hsa_cfun->m_output_arg = new hsa_symbol (BRIG_TYPE_NONE, BRIG_SEGMENT_ARG, + BRIG_LINKAGE_FUNCTION); + hsa_cfun->m_output_arg->fillup_for_decl (DECL_RESULT (cfun->decl)); + + if (hsa_seen_error ()) + return; + + hsa_cfun->m_output_arg->m_name = "res"; + slot = hsa_cfun->m_local_symbols->find_slot (hsa_cfun->m_output_arg, + INSERT); + gcc_assert (!*slot); + *slot = hsa_cfun->m_output_arg; + } +} + +/* Generate function representation that corresponds to + a function declaration. */ + +hsa_function_representation * +hsa_generate_function_declaration (tree decl) +{ + hsa_function_representation *fun + = new hsa_function_representation (decl, false, 0); + + fun->m_declaration_p = true; + fun->m_name = get_brig_function_name (decl); + gen_function_decl_parameters (fun, decl); + + return fun; +} + + +/* Generate function representation that corresponds to + an internal FN. */ + +hsa_function_representation * +hsa_generate_internal_fn_decl (hsa_internal_fn *fn) +{ + hsa_function_representation *fun = new hsa_function_representation (fn); + + fun->m_name = fn->name (); + + for (unsigned i = 0; i < fn->get_arity (); i++) + { + hsa_symbol *arg + = new hsa_symbol (fn->get_argument_type (i), BRIG_SEGMENT_ARG, + BRIG_LINKAGE_NONE); + arg->m_name_number = i; + fun->m_input_args.safe_push (arg); + } + + fun->m_output_arg = new hsa_symbol (fn->get_argument_type (-1), + BRIG_SEGMENT_ARG, BRIG_LINKAGE_NONE); + fun->m_output_arg->m_name = "res"; + + return fun; +} + +/* Return true if switch statement S can be transformed + to a SBR instruction in HSAIL. */ + +static bool +transformable_switch_to_sbr_p (gswitch *s) +{ + /* Identify if a switch statement can be transformed to + SBR instruction, like: + + sbr_u32 $s1 [@label1, @label2, @label3]; + */ + + tree size = get_switch_size (s); + if (!tree_fits_uhwi_p (size)) + return false; + + if (tree_to_uhwi (size) > HSA_MAXIMUM_SBR_LABELS) + return false; + + return true; +} + +/* Structure hold connection between PHI nodes and immediate + values hold by there nodes. */ + +struct phi_definition +{ + phi_definition (unsigned phi_i, unsigned label_i, tree imm): + phi_index (phi_i), label_index (label_i), phi_value (imm) + {} + + unsigned phi_index; + unsigned label_index; + tree phi_value; +}; + +/* Sum slice of a vector V, starting from index START and ending + at the index END - 1. */ + +template <typename T> +static +T sum_slice (const auto_vec <T> &v, unsigned start, unsigned end) +{ + T s = 0; + + for (unsigned i = start; i < end; i++) + s += v[i]; + + return s; +} + +/* Function transforms GIMPLE SWITCH statements to a series of IF statements. + Let's assume following example: + +L0: + switch (index) + case C1: +L1: hard_work_1 (); + break; + case C2..C3: +L2: hard_work_2 (); + break; + default: +LD: hard_work_3 (); + break; + + The transformation encompasses following steps: + 1) all immediate values used by edges coming from the switch basic block + are saved + 2) all these edges are removed + 3) the switch statement (in L0) is replaced by: + if (index == C1) + goto L1; + else + goto L1'; + + 4) newly created basic block Lx' is used for generation of + a next condition + 5) else branch of the last condition goes to LD + 6) fix all immediate values in PHI nodes that were propagated though + edges that were removed in step 2 + + Note: if a case is made by a range C1..C2, then process + following transformation: + + switch_cond_op1 = C1 <= index; + switch_cond_op2 = index <= C2; + switch_cond_and = switch_cond_op1 & switch_cond_op2; + if (switch_cond_and != 0) + goto Lx; + else + goto Ly; + +*/ + +static void +convert_switch_statements () +{ + function *func = DECL_STRUCT_FUNCTION (current_function_decl); + basic_block bb; + + bool need_update = false; + + FOR_EACH_BB_FN (bb, func) + { + gimple_stmt_iterator gsi = gsi_last_bb (bb); + if (gsi_end_p (gsi)) + continue; + + gimple *stmt = gsi_stmt (gsi); + + if (gimple_code (stmt) == GIMPLE_SWITCH) + { + gswitch *s = as_a <gswitch *> (stmt); + + /* If the switch can utilize SBR insn, skip the statement. */ + if (transformable_switch_to_sbr_p (s)) + continue; + + need_update = true; + + unsigned labels = gimple_switch_num_labels (s); + tree index = gimple_switch_index (s); + tree index_type = TREE_TYPE (index); + tree default_label = gimple_switch_default_label (s); + basic_block default_label_bb + = label_to_block_fn (func, CASE_LABEL (default_label)); + basic_block cur_bb = bb; + + auto_vec <edge> new_edges; + auto_vec <phi_definition *> phi_todo_list; + auto_vec <gcov_type> edge_counts; + auto_vec <int> edge_probabilities; + + /* Investigate all labels that and PHI nodes in these edges which + should be fixed after we add new collection of edges. */ + for (unsigned i = 0; i < labels; i++) + { + tree label = gimple_switch_label (s, i); + basic_block label_bb = label_to_block_fn (func, CASE_LABEL (label)); + edge e = find_edge (bb, label_bb); + edge_counts.safe_push (e->count); + edge_probabilities.safe_push (e->probability); + gphi_iterator phi_gsi; + + /* Save PHI definitions that will be destroyed because of an edge + is going to be removed. */ + unsigned phi_index = 0; + for (phi_gsi = gsi_start_phis (e->dest); + !gsi_end_p (phi_gsi); gsi_next (&phi_gsi)) + { + gphi *phi = phi_gsi.phi (); + for (unsigned j = 0; j < gimple_phi_num_args (phi); j++) + { + if (gimple_phi_arg_edge (phi, j) == e) + { + tree imm = gimple_phi_arg_def (phi, j); + phi_definition *p = new phi_definition (phi_index, i, + imm); + phi_todo_list.safe_push (p); + break; + } + } + phi_index++; + } + } + + /* Remove all edges for the current basic block. */ + for (int i = EDGE_COUNT (bb->succs) - 1; i >= 0; i--) + { + edge e = EDGE_SUCC (bb, i); + remove_edge (e); + } + + /* Iterate all non-default labels. */ + for (unsigned i = 1; i < labels; i++) + { + tree label = gimple_switch_label (s, i); + tree low = CASE_LOW (label); + tree high = CASE_HIGH (label); + + if (!useless_type_conversion_p (TREE_TYPE (low), index_type)) + low = fold_convert (index_type, low); + + gimple_stmt_iterator cond_gsi = gsi_last_bb (cur_bb); + gimple *c = NULL; + if (high) + { + tree tmp1 = make_temp_ssa_name (boolean_type_node, NULL, + "switch_cond_op1"); + + gimple *assign1 = gimple_build_assign (tmp1, LE_EXPR, low, + index); + + tree tmp2 = make_temp_ssa_name (boolean_type_node, NULL, + "switch_cond_op2"); + + if (!useless_type_conversion_p (TREE_TYPE (high), index_type)) + high = fold_convert (index_type, high); + gimple *assign2 = gimple_build_assign (tmp2, LE_EXPR, index, + high); + + tree tmp3 = make_temp_ssa_name (boolean_type_node, NULL, + "switch_cond_and"); + gimple *assign3 = gimple_build_assign (tmp3, BIT_AND_EXPR, tmp1, + tmp2); + + gsi_insert_before (&cond_gsi, assign1, GSI_SAME_STMT); + gsi_insert_before (&cond_gsi, assign2, GSI_SAME_STMT); + gsi_insert_before (&cond_gsi, assign3, GSI_SAME_STMT); + + tree b = constant_boolean_node (false, boolean_type_node); + c = gimple_build_cond (NE_EXPR, tmp3, b, NULL, NULL); + } + else + c = gimple_build_cond (EQ_EXPR, index, low, NULL, NULL); + + gimple_set_location (c, gimple_location (stmt)); + + gsi_insert_before (&cond_gsi, c, GSI_SAME_STMT); + + basic_block label_bb + = label_to_block_fn (func, CASE_LABEL (label)); + edge new_edge = make_edge (cur_bb, label_bb, EDGE_TRUE_VALUE); + int prob_sum = sum_slice <int> (edge_probabilities, i, labels) + + edge_probabilities[0]; + + if (prob_sum) + new_edge->probability + = RDIV (REG_BR_PROB_BASE * edge_probabilities[i], prob_sum); + + new_edge->count = edge_counts[i]; + new_edges.safe_push (new_edge); + + if (i < labels - 1) + { + /* Prepare another basic block that will contain + next condition. */ + basic_block next_bb = create_empty_bb (cur_bb); + if (current_loops) + { + add_bb_to_loop (next_bb, cur_bb->loop_father); + loops_state_set (LOOPS_NEED_FIXUP); + } + + edge next_edge = make_edge (cur_bb, next_bb, EDGE_FALSE_VALUE); + next_edge->probability + = inverse_probability (new_edge->probability); + next_edge->count = edge_counts[0] + + sum_slice <gcov_type> (edge_counts, i, labels); + next_bb->frequency = EDGE_FREQUENCY (next_edge); + cur_bb = next_bb; + } + else /* Link last IF statement and default label + of the switch. */ + { + edge e = make_edge (cur_bb, default_label_bb, EDGE_FALSE_VALUE); + e->probability = inverse_probability (new_edge->probability); + e->count = edge_counts[0]; + new_edges.safe_insert (0, e); + } + } + + /* Restore original PHI immediate value. */ + for (unsigned i = 0; i < phi_todo_list.length (); i++) + { + phi_definition *phi_def = phi_todo_list[i]; + edge new_edge = new_edges[phi_def->label_index]; + + gphi_iterator it = gsi_start_phis (new_edge->dest); + for (unsigned i = 0; i < phi_def->phi_index; i++) + gsi_next (&it); + + gphi *phi = it.phi (); + add_phi_arg (phi, phi_def->phi_value, new_edge, UNKNOWN_LOCATION); + delete phi_def; + } + + /* Remove the original GIMPLE switch statement. */ + gsi_remove (&gsi, true); + } + } + + if (dump_file) + dump_function_to_file (current_function_decl, dump_file, TDF_DETAILS); + + if (need_update) + { + free_dominance_info (CDI_DOMINATORS); + calculate_dominance_info (CDI_DOMINATORS); + } +} + +/* Expand builtins that can't be handled by HSA back-end. */ + +static void +expand_builtins () +{ + function *func = DECL_STRUCT_FUNCTION (current_function_decl); + basic_block bb; + + FOR_EACH_BB_FN (bb, func) + { + for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); + gsi_next (&gsi)) + { + gimple *stmt = gsi_stmt (gsi); + + if (gimple_code (stmt) != GIMPLE_CALL) + continue; + + gcall *call = as_a <gcall *> (stmt); + + if (!gimple_call_builtin_p (call, BUILT_IN_NORMAL)) + continue; + + tree fndecl = gimple_call_fndecl (stmt); + enum built_in_function fn = DECL_FUNCTION_CODE (fndecl); + switch (fn) + { + case BUILT_IN_CEXPF: + case BUILT_IN_CEXPIF: + case BUILT_IN_CEXPI: + { + /* Similar to builtins.c (expand_builtin_cexpi), the builtin + can be transformed to: cexp(I * z) = ccos(z) + I * csin(z). */ + tree lhs = gimple_call_lhs (stmt); + tree rhs = gimple_call_arg (stmt, 0); + tree rhs_type = TREE_TYPE (rhs); + bool float_type_p = rhs_type == float_type_node; + tree real_part = make_temp_ssa_name (rhs_type, NULL, + "cexp_real_part"); + tree imag_part = make_temp_ssa_name (rhs_type, NULL, + "cexp_imag_part"); + + tree cos_fndecl + = mathfn_built_in (rhs_type, fn == float_type_p + ? BUILT_IN_COSF : BUILT_IN_COS); + gcall *cos = gimple_build_call (cos_fndecl, 1, rhs); + gimple_call_set_lhs (cos, real_part); + gsi_insert_before (&gsi, cos, GSI_SAME_STMT); + + tree sin_fndecl + = mathfn_built_in (rhs_type, fn == float_type_p + ? BUILT_IN_SINF : BUILT_IN_SIN); + gcall *sin = gimple_build_call (sin_fndecl, 1, rhs); + gimple_call_set_lhs (sin, imag_part); + gsi_insert_before (&gsi, sin, GSI_SAME_STMT); + + + gassign *assign = gimple_build_assign (lhs, COMPLEX_EXPR, + real_part, imag_part); + gsi_insert_before (&gsi, assign, GSI_SAME_STMT); + gsi_remove (&gsi, true); + + break; + } + default: + break; + } + } + } +} + +/* Emit HSA module variables that are global for the entire module. */ + +static void +emit_hsa_module_variables (void) +{ + hsa_num_threads = new hsa_symbol (BRIG_TYPE_U32, BRIG_SEGMENT_PRIVATE, + BRIG_LINKAGE_MODULE, true); + + hsa_num_threads->m_name = "hsa_num_threads"; + + hsa_brig_emit_omp_symbols (); +} + +/* Generate HSAIL representation of the current function and write into a + special section of the output file. If KERNEL is set, the function will be + considered an HSA kernel callable from the host, otherwise it will be + compiled as an HSA function callable from other HSA code. */ + +static void +generate_hsa (bool kernel) +{ + hsa_init_data_for_cfun (); + + if (hsa_num_threads == NULL) + emit_hsa_module_variables (); + + /* Initialize hsa_cfun. */ + hsa_cfun = new hsa_function_representation (cfun->decl, kernel, + SSANAMES (cfun)->length ()); + hsa_cfun->init_extra_bbs (); + + if (flag_tm) + { + HSA_SORRY_AT (UNKNOWN_LOCATION, + "support for HSA does not implement transactional memory"); + goto fail; + } + + verify_function_arguments (cfun->decl); + if (hsa_seen_error ()) + goto fail; + + hsa_cfun->m_name = get_brig_function_name (cfun->decl); + + gen_function_def_parameters (); + if (hsa_seen_error ()) + goto fail; + + init_prologue (); + + gen_body_from_gimple (); + if (hsa_seen_error ()) + goto fail; + + if (hsa_cfun->m_kernel_dispatch_count) + init_hsa_num_threads (); + + if (hsa_cfun->m_kern_p) + { + hsa_function_summary *s + = hsa_summaries->get (cgraph_node::get (hsa_cfun->m_decl)); + hsa_add_kern_decl_mapping (current_function_decl, hsa_cfun->m_name, + hsa_cfun->m_maximum_omp_data_size, + s->m_gridified_kernel_p); + } + +#ifdef ENABLE_CHECKING + for (unsigned i = 0; i < hsa_cfun->m_ssa_map.length (); i++) + if (hsa_cfun->m_ssa_map[i]) + hsa_cfun->m_ssa_map[i]->verify_ssa (); + + basic_block bb; + FOR_EACH_BB_FN (bb, cfun) + { + hsa_bb *hbb = hsa_bb_for_bb (bb); + + for (hsa_insn_basic *insn = hbb->m_first_insn; insn; insn = insn->m_next) + insn->verify (); + } + +#endif + + hsa_regalloc (); + hsa_brig_emit_function (); + + fail: + hsa_deinit_data_for_cfun (); +} + +namespace { + +const pass_data pass_data_gen_hsail = +{ + GIMPLE_PASS, + "hsagen", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + TV_NONE, /* tv_id */ + PROP_cfg | PROP_ssa, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + 0 /* todo_flags_finish */ +}; + +class pass_gen_hsail : public gimple_opt_pass +{ +public: + pass_gen_hsail (gcc::context *ctxt) + : gimple_opt_pass(pass_data_gen_hsail, ctxt) + {} + + /* opt_pass methods: */ + bool gate (function *); + unsigned int execute (function *); + +}; // class pass_gen_hsail + +/* Determine whether or not to run generation of HSAIL. */ + +bool +pass_gen_hsail::gate (function *f) +{ + return hsa_gen_requested_p () + && hsa_gpu_implementation_p (f->decl); +} + +unsigned int +pass_gen_hsail::execute (function *) +{ + hsa_function_summary *s + = hsa_summaries->get (cgraph_node::get_create (current_function_decl)); + + convert_switch_statements (); + expand_builtins (); + generate_hsa (s->m_kind == HSA_KERNEL); + TREE_ASM_WRITTEN (current_function_decl) = 1; + return TODO_discard_function; +} + +} // anon namespace + +/* Create the instance of hsa gen pass. */ + +gimple_opt_pass * +make_pass_gen_hsail (gcc::context *ctxt) +{ + return new pass_gen_hsail (ctxt); +} diff --git a/gcc/hsa-regalloc.c b/gcc/hsa-regalloc.c new file mode 100644 index 0000000..f8e83ecf --- /dev/null +++ b/gcc/hsa-regalloc.c @@ -0,0 +1,719 @@ +/* HSAIL IL Register allocation and out-of-SSA. + Copyright (C) 2013-2016 Free Software Foundation, Inc. + Contributed by Michael Matz <matz@suse.de> + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "is-a.h" +#include "vec.h" +#include "tree.h" +#include "dominance.h" +#include "cfg.h" +#include "cfganal.h" +#include "function.h" +#include "bitmap.h" +#include "dumpfile.h" +#include "cgraph.h" +#include "print-tree.h" +#include "cfghooks.h" +#include "symbol-summary.h" +#include "hsa.h" + + +/* Process a PHI node PHI of basic block BB as a part of naive out-f-ssa. */ + +static void +naive_process_phi (hsa_insn_phi *phi) +{ + unsigned count = phi->operand_count (); + for (unsigned i = 0; i < count; i++) + { + gcc_checking_assert (phi->get_op (i)); + hsa_op_base *op = phi->get_op (i); + hsa_bb *hbb; + edge e; + + if (!op) + break; + + e = EDGE_PRED (phi->m_bb, i); + if (single_succ_p (e->src)) + hbb = hsa_bb_for_bb (e->src); + else + { + basic_block old_dest = e->dest; + hbb = hsa_init_new_bb (split_edge (e)); + + /* If switch insn used this edge, fix jump table. */ + hsa_bb *source = hsa_bb_for_bb (e->src); + hsa_insn_sbr *sbr; + if (source->m_last_insn + && (sbr = dyn_cast <hsa_insn_sbr *> (source->m_last_insn))) + sbr->replace_all_labels (old_dest, hbb->m_bb); + } + + hsa_build_append_simple_mov (phi->m_dest, op, hbb); + } +} + +/* Naive out-of SSA. */ + +static void +naive_outof_ssa (void) +{ + basic_block bb; + + hsa_cfun->m_in_ssa = false; + + FOR_ALL_BB_FN (bb, cfun) + { + hsa_bb *hbb = hsa_bb_for_bb (bb); + hsa_insn_phi *phi; + + for (phi = hbb->m_first_phi; + phi; + phi = phi->m_next ? as_a <hsa_insn_phi *> (phi->m_next) : NULL) + naive_process_phi (phi); + + /* Zap PHI nodes, they will be deallocated when everything else will. */ + hbb->m_first_phi = NULL; + hbb->m_last_phi = NULL; + } +} + +/* Return register class number for the given HSA TYPE. 0 means the 'c' one + bit register class, 1 means 's' 32 bit class, 2 stands for 'd' 64 bit class + and 3 for 'q' 128 bit class. */ + +static int +m_reg_class_for_type (BrigType16_t type) +{ + switch (type) + { + case BRIG_TYPE_B1: + return 0; + + case BRIG_TYPE_U8: + case BRIG_TYPE_U16: + case BRIG_TYPE_U32: + case BRIG_TYPE_S8: + case BRIG_TYPE_S16: + case BRIG_TYPE_S32: + case BRIG_TYPE_F16: + case BRIG_TYPE_F32: + case BRIG_TYPE_B8: + case BRIG_TYPE_B16: + case BRIG_TYPE_B32: + case BRIG_TYPE_U8X4: + case BRIG_TYPE_S8X4: + case BRIG_TYPE_U16X2: + case BRIG_TYPE_S16X2: + case BRIG_TYPE_F16X2: + return 1; + + case BRIG_TYPE_U64: + case BRIG_TYPE_S64: + case BRIG_TYPE_F64: + case BRIG_TYPE_B64: + case BRIG_TYPE_U8X8: + case BRIG_TYPE_S8X8: + case BRIG_TYPE_U16X4: + case BRIG_TYPE_S16X4: + case BRIG_TYPE_F16X4: + case BRIG_TYPE_U32X2: + case BRIG_TYPE_S32X2: + case BRIG_TYPE_F32X2: + return 2; + + case BRIG_TYPE_B128: + case BRIG_TYPE_U8X16: + case BRIG_TYPE_S8X16: + case BRIG_TYPE_U16X8: + case BRIG_TYPE_S16X8: + case BRIG_TYPE_F16X8: + case BRIG_TYPE_U32X4: + case BRIG_TYPE_U64X2: + case BRIG_TYPE_S32X4: + case BRIG_TYPE_S64X2: + case BRIG_TYPE_F32X4: + case BRIG_TYPE_F64X2: + return 3; + + default: + gcc_unreachable (); + } +} + +/* If the Ith operands of INSN is or contains a register (in an address), + return the address of that register operand. If not return NULL. */ + +static hsa_op_reg ** +insn_reg_addr (hsa_insn_basic *insn, int i) +{ + hsa_op_base *op = insn->get_op (i); + if (!op) + return NULL; + hsa_op_reg *reg = dyn_cast <hsa_op_reg *> (op); + if (reg) + return (hsa_op_reg **) insn->get_op_addr (i); + hsa_op_address *addr = dyn_cast <hsa_op_address *> (op); + if (addr && addr->m_reg) + return &addr->m_reg; + return NULL; +} + +struct m_reg_class_desc +{ + unsigned next_avail, max_num; + unsigned used_num, max_used; + uint64_t used[2]; + char cl_char; +}; + +/* Rewrite the instructions in BB to observe spilled live ranges. + CLASSES is the global register class state. */ + +static void +rewrite_code_bb (basic_block bb, struct m_reg_class_desc *classes) +{ + hsa_bb *hbb = hsa_bb_for_bb (bb); + hsa_insn_basic *insn, *next_insn; + + for (insn = hbb->m_first_insn; insn; insn = next_insn) + { + next_insn = insn->m_next; + unsigned count = insn->operand_count (); + for (unsigned i = 0; i < count; i++) + { + gcc_checking_assert (insn->get_op (i)); + hsa_op_reg **regaddr = insn_reg_addr (insn, i); + + if (regaddr) + { + hsa_op_reg *reg = *regaddr; + if (reg->m_reg_class) + continue; + gcc_assert (reg->m_spill_sym); + + int cl = m_reg_class_for_type (reg->m_type); + hsa_op_reg *tmp, *tmp2; + if (insn->op_output_p (i)) + tmp = hsa_spill_out (insn, reg, &tmp2); + else + tmp = hsa_spill_in (insn, reg, &tmp2); + + *regaddr = tmp; + + tmp->m_reg_class = classes[cl].cl_char; + tmp->m_hard_num = (char) (classes[cl].max_num + i); + if (tmp2) + { + gcc_assert (cl == 0); + tmp2->m_reg_class = classes[1].cl_char; + tmp2->m_hard_num = (char) (classes[1].max_num + i); + } + } + } + } +} + +/* Dump current function to dump file F, with info specific + to register allocation. */ + +void +dump_hsa_cfun_regalloc (FILE *f) +{ + basic_block bb; + + fprintf (f, "\nHSAIL IL for %s\n", hsa_cfun->m_name); + + FOR_ALL_BB_FN (bb, cfun) + { + hsa_bb *hbb = (struct hsa_bb *) bb->aux; + bitmap_print (dump_file, hbb->m_livein, "m_livein ", "\n"); + dump_hsa_bb (f, hbb); + bitmap_print (dump_file, hbb->m_liveout, "m_liveout ", "\n"); + } +} + +/* Given the global register allocation state CLASSES and a + register REG, try to give it a hardware register. If successful, + store that hardreg in REG and return it, otherwise return -1. + Also changes CLASSES to accommodate for the allocated register. */ + +static int +try_alloc_reg (struct m_reg_class_desc *classes, hsa_op_reg *reg) +{ + int cl = m_reg_class_for_type (reg->m_type); + int ret = -1; + if (classes[1].used_num + classes[2].used_num * 2 + classes[3].used_num * 4 + >= 128 - 5) + return -1; + if (classes[cl].used_num < classes[cl].max_num) + { + unsigned int i; + classes[cl].used_num++; + if (classes[cl].used_num > classes[cl].max_used) + classes[cl].max_used = classes[cl].used_num; + for (i = 0; i < classes[cl].used_num; i++) + if (! (classes[cl].used[i / 64] & (((uint64_t)1) << (i & 63)))) + break; + ret = i; + classes[cl].used[i / 64] |= (((uint64_t)1) << (i & 63)); + reg->m_reg_class = classes[cl].cl_char; + reg->m_hard_num = i; + } + return ret; +} + +/* Free up hardregs used by REG, into allocation state CLASSES. */ + +static void +free_reg (struct m_reg_class_desc *classes, hsa_op_reg *reg) +{ + int cl = m_reg_class_for_type (reg->m_type); + int ret = reg->m_hard_num; + gcc_assert (reg->m_reg_class == classes[cl].cl_char); + classes[cl].used_num--; + classes[cl].used[ret / 64] &= ~(((uint64_t)1) << (ret & 63)); +} + +/* Note that the live range for REG ends at least at END. */ + +static void +note_lr_end (hsa_op_reg *reg, int end) +{ + if (reg->m_lr_end < end) + reg->m_lr_end = end; +} + +/* Note that the live range for REG starts at least at BEGIN. */ + +static void +note_lr_begin (hsa_op_reg *reg, int begin) +{ + if (reg->m_lr_begin > begin) + reg->m_lr_begin = begin; +} + +/* Given two registers A and B, return -1, 0 or 1 if A's live range + starts before, at or after B's live range. */ + +static int +cmp_begin (const void *a, const void *b) +{ + const hsa_op_reg * const *rega = (const hsa_op_reg * const *)a; + const hsa_op_reg * const *regb = (const hsa_op_reg * const *)b; + int ret; + if (rega == regb) + return 0; + ret = (*rega)->m_lr_begin - (*regb)->m_lr_begin; + if (ret) + return ret; + return ((*rega)->m_order - (*regb)->m_order); +} + +/* Given two registers REGA and REGB, return true if REGA's + live range ends after REGB's. This results in a sorting order + with earlier end points at the end. */ + +static bool +cmp_end (hsa_op_reg * const ®a, hsa_op_reg * const ®b) +{ + int ret; + if (rega == regb) + return false; + ret = (regb)->m_lr_end - (rega)->m_lr_end; + if (ret) + return ret < 0; + return (((regb)->m_order - (rega)->m_order)) < 0; +} + +/* Expire all old intervals in ACTIVE (a per-regclass vector), + that is, those that end before the interval REG starts. Give + back resources freed so into the state CLASSES. */ + +static void +expire_old_intervals (hsa_op_reg *reg, vec<hsa_op_reg*> *active, + struct m_reg_class_desc *classes) +{ + for (int i = 0; i < 4; i++) + while (!active[i].is_empty ()) + { + hsa_op_reg *a = active[i].pop (); + if (a->m_lr_end > reg->m_lr_begin) + { + active[i].quick_push (a); + break; + } + free_reg (classes, a); + } +} + +/* The interval REG didn't get a hardreg. Spill it or one of those + from ACTIVE (if the latter, then REG will become allocated to the + hardreg that formerly was used by it). */ + +static void +spill_at_interval (hsa_op_reg *reg, vec<hsa_op_reg*> *active) +{ + int cl = m_reg_class_for_type (reg->m_type); + gcc_assert (!active[cl].is_empty ()); + hsa_op_reg *cand = active[cl][0]; + if (cand->m_lr_end > reg->m_lr_end) + { + reg->m_reg_class = cand->m_reg_class; + reg->m_hard_num = cand->m_hard_num; + active[cl].ordered_remove (0); + unsigned place = active[cl].lower_bound (reg, cmp_end); + active[cl].quick_insert (place, reg); + } + else + cand = reg; + + gcc_assert (!cand->m_spill_sym); + BrigType16_t type = cand->m_type; + if (type == BRIG_TYPE_B1) + type = BRIG_TYPE_U8; + cand->m_reg_class = 0; + cand->m_spill_sym = hsa_get_spill_symbol (type); + cand->m_spill_sym->m_name_number = cand->m_order; +} + +/* Given the global register state CLASSES allocate all HSA virtual + registers either to hardregs or to a spill symbol. */ + +static void +linear_scan_regalloc (struct m_reg_class_desc *classes) +{ + /* Compute liveness. */ + bool changed; + int i, n; + int insn_order; + int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun)); + bitmap work = BITMAP_ALLOC (NULL); + vec<hsa_op_reg*> ind2reg = vNULL; + vec<hsa_op_reg*> active[4] = {vNULL, vNULL, vNULL, vNULL}; + hsa_insn_basic *m_last_insn; + + /* We will need the reverse post order for linearization, + and the post order for liveness analysis, which is the same + backward. */ + n = pre_and_rev_post_order_compute (NULL, bbs, true); + ind2reg.safe_grow_cleared (hsa_cfun->m_reg_count); + + /* Give all instructions a linearized number, at the same time + build a mapping from register index to register. */ + insn_order = 1; + for (i = 0; i < n; i++) + { + basic_block bb = BASIC_BLOCK_FOR_FN (cfun, bbs[i]); + hsa_bb *hbb = hsa_bb_for_bb (bb); + hsa_insn_basic *insn; + for (insn = hbb->m_first_insn; insn; insn = insn->m_next) + { + unsigned opi; + insn->m_number = insn_order++; + for (opi = 0; opi < insn->operand_count (); opi++) + { + gcc_checking_assert (insn->get_op (opi)); + hsa_op_reg **regaddr = insn_reg_addr (insn, opi); + if (regaddr) + ind2reg[(*regaddr)->m_order] = *regaddr; + } + } + } + + /* Initialize all live ranges to [after-end, 0). */ + for (i = 0; i < hsa_cfun->m_reg_count; i++) + if (ind2reg[i]) + ind2reg[i]->m_lr_begin = insn_order, ind2reg[i]->m_lr_end = 0; + + /* Classic liveness analysis, as long as something changes: + m_liveout is union (m_livein of successors) + m_livein is m_liveout minus defs plus uses. */ + do + { + changed = false; + for (i = n - 1; i >= 0; i--) + { + edge e; + edge_iterator ei; + basic_block bb = BASIC_BLOCK_FOR_FN (cfun, bbs[i]); + hsa_bb *hbb = hsa_bb_for_bb (bb); + + /* Union of successors m_livein (or empty if none). */ + bool first = true; + FOR_EACH_EDGE (e, ei, bb->succs) + if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)) + { + hsa_bb *succ = hsa_bb_for_bb (e->dest); + if (first) + { + bitmap_copy (work, succ->m_livein); + first = false; + } + else + bitmap_ior_into (work, succ->m_livein); + } + if (first) + bitmap_clear (work); + + bitmap_copy (hbb->m_liveout, work); + + /* Remove defs, include uses in a backward insn walk. */ + hsa_insn_basic *insn; + for (insn = hbb->m_last_insn; insn; insn = insn->m_prev) + { + unsigned opi; + unsigned ndefs = insn->input_count (); + for (opi = 0; opi < ndefs && insn->get_op (opi); opi++) + { + gcc_checking_assert (insn->get_op (opi)); + hsa_op_reg **regaddr = insn_reg_addr (insn, opi); + if (regaddr) + bitmap_clear_bit (work, (*regaddr)->m_order); + } + for (; opi < insn->operand_count (); opi++) + { + gcc_checking_assert (insn->get_op (opi)); + hsa_op_reg **regaddr = insn_reg_addr (insn, opi); + if (regaddr) + bitmap_set_bit (work, (*regaddr)->m_order); + } + } + + /* Note if that changed something. */ + if (bitmap_ior_into (hbb->m_livein, work)) + changed = true; + } + } + while (changed); + + /* Make one pass through all instructions in linear order, + noting and merging possible live range start and end points. */ + m_last_insn = NULL; + for (i = n - 1; i >= 0; i--) + { + basic_block bb = BASIC_BLOCK_FOR_FN (cfun, bbs[i]); + hsa_bb *hbb = hsa_bb_for_bb (bb); + hsa_insn_basic *insn; + int after_end_number; + unsigned bit; + bitmap_iterator bi; + + if (m_last_insn) + after_end_number = m_last_insn->m_number; + else + after_end_number = insn_order; + /* Everything live-out in this BB has at least an end point + after us. */ + EXECUTE_IF_SET_IN_BITMAP (hbb->m_liveout, 0, bit, bi) + note_lr_end (ind2reg[bit], after_end_number); + + for (insn = hbb->m_last_insn; insn; insn = insn->m_prev) + { + unsigned opi; + unsigned ndefs = insn->input_count (); + for (opi = 0; opi < insn->operand_count (); opi++) + { + gcc_checking_assert (insn->get_op (opi)); + hsa_op_reg **regaddr = insn_reg_addr (insn, opi); + if (regaddr) + { + hsa_op_reg *reg = *regaddr; + if (opi < ndefs) + note_lr_begin (reg, insn->m_number); + else + note_lr_end (reg, insn->m_number); + } + } + } + + /* Everything live-in in this BB has a start point before + our first insn. */ + int before_start_number; + if (hbb->m_first_insn) + before_start_number = hbb->m_first_insn->m_number; + else + before_start_number = after_end_number; + before_start_number--; + EXECUTE_IF_SET_IN_BITMAP (hbb->m_livein, 0, bit, bi) + note_lr_begin (ind2reg[bit], before_start_number); + + if (hbb->m_first_insn) + m_last_insn = hbb->m_first_insn; + } + + for (i = 0; i < hsa_cfun->m_reg_count; i++) + if (ind2reg[i]) + { + /* All regs that have still their start at after all code actually + are defined at the start of the routine (prologue). */ + if (ind2reg[i]->m_lr_begin == insn_order) + ind2reg[i]->m_lr_begin = 0; + /* All regs that have no use but a def will have lr_end == 0, + they are actually live from def until after the insn they are + defined in. */ + if (ind2reg[i]->m_lr_end == 0) + ind2reg[i]->m_lr_end = ind2reg[i]->m_lr_begin + 1; + } + + /* Sort all intervals by increasing start point. */ + gcc_assert (ind2reg.length () == (size_t) hsa_cfun->m_reg_count); + +#ifdef ENABLE_CHECKING + for (unsigned i = 0; i < ind2reg.length (); i++) + gcc_assert (ind2reg[i]); +#endif + + ind2reg.qsort (cmp_begin); + for (i = 0; i < 4; i++) + active[i].reserve_exact (hsa_cfun->m_reg_count); + + /* Now comes the linear scan allocation. */ + for (i = 0; i < hsa_cfun->m_reg_count; i++) + { + hsa_op_reg *reg = ind2reg[i]; + if (!reg) + continue; + expire_old_intervals (reg, active, classes); + int cl = m_reg_class_for_type (reg->m_type); + if (try_alloc_reg (classes, reg) >= 0) + { + unsigned place = active[cl].lower_bound (reg, cmp_end); + active[cl].quick_insert (place, reg); + } + else + spill_at_interval (reg, active); + + /* Some interesting dumping as we go. */ + if (dump_file) + { + fprintf (dump_file, " reg%d: [%5d, %5d)->", + reg->m_order, reg->m_lr_begin, reg->m_lr_end); + if (reg->m_reg_class) + fprintf (dump_file, "$%c%i", reg->m_reg_class, reg->m_hard_num); + else + fprintf (dump_file, "[%%__%s_%i]", + hsa_seg_name (reg->m_spill_sym->m_segment), + reg->m_spill_sym->m_name_number); + for (int cl = 0; cl < 4; cl++) + { + bool first = true; + hsa_op_reg *r; + fprintf (dump_file, " {"); + for (int j = 0; active[cl].iterate (j, &r); j++) + if (first) + { + fprintf (dump_file, "%d", r->m_order); + first = false; + } + else + fprintf (dump_file, ", %d", r->m_order); + fprintf (dump_file, "}"); + } + fprintf (dump_file, "\n"); + } + } + + BITMAP_FREE (work); + free (bbs); + + if (dump_file) + { + fprintf (dump_file, "------- After liveness: -------\n"); + dump_hsa_cfun_regalloc (dump_file); + fprintf (dump_file, " ----- Intervals:\n"); + for (i = 0; i < hsa_cfun->m_reg_count; i++) + { + hsa_op_reg *reg = ind2reg[i]; + if (!reg) + continue; + fprintf (dump_file, " reg%d: [%5d, %5d)->", reg->m_order, + reg->m_lr_begin, reg->m_lr_end); + if (reg->m_reg_class) + fprintf (dump_file, "$%c%i\n", reg->m_reg_class, reg->m_hard_num); + else + fprintf (dump_file, "[%%__%s_%i]\n", + hsa_seg_name (reg->m_spill_sym->m_segment), + reg->m_spill_sym->m_name_number); + } + } + + for (i = 0; i < 4; i++) + active[i].release (); + ind2reg.release (); +} + +/* Entry point for register allocation. */ + +static void +regalloc (void) +{ + basic_block bb; + m_reg_class_desc classes[4]; + + /* If there are no registers used in the function, exit right away. */ + if (hsa_cfun->m_reg_count == 0) + return; + + memset (classes, 0, sizeof (classes)); + classes[0].next_avail = 0; + classes[0].max_num = 7; + classes[0].cl_char = 'c'; + classes[1].cl_char = 's'; + classes[2].cl_char = 'd'; + classes[3].cl_char = 'q'; + + for (int i = 1; i < 4; i++) + { + classes[i].next_avail = 0; + classes[i].max_num = 20; + } + + linear_scan_regalloc (classes); + + FOR_ALL_BB_FN (bb, cfun) + rewrite_code_bb (bb, classes); +} + +/* Out of SSA and register allocation on HSAIL IL. */ + +void +hsa_regalloc (void) +{ + naive_outof_ssa (); + + if (dump_file) + { + fprintf (dump_file, "------- After out-of-SSA: -------\n"); + dump_hsa_cfun (dump_file); + } + + regalloc (); + + if (dump_file) + { + fprintf (dump_file, "------- After register allocation: -------\n"); + dump_hsa_cfun (dump_file); + } +} diff --git a/gcc/hsa.c b/gcc/hsa.c new file mode 100644 index 0000000..ec23f81 --- /dev/null +++ b/gcc/hsa.c @@ -0,0 +1,947 @@ +/* Implementation of commonly needed HSAIL related functions and methods. + Copyright (C) 2013-2016 Free Software Foundation, Inc. + Contributed by Martin Jambor <mjambor@suse.cz> and + Martin Liska <mliska@suse.cz>. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "is-a.h" +#include "hash-set.h" +#include "hash-map.h" +#include "vec.h" +#include "tree.h" +#include "dumpfile.h" +#include "gimple-pretty-print.h" +#include "diagnostic-core.h" +#include "alloc-pool.h" +#include "cgraph.h" +#include "print-tree.h" +#include "stringpool.h" +#include "symbol-summary.h" +#include "hsa.h" +#include "internal-fn.h" +#include "ctype.h" + +/* Structure containing intermediate HSA representation of the generated + function. */ +class hsa_function_representation *hsa_cfun; + +/* Element of the mapping vector between a host decl and an HSA kernel. */ + +struct GTY(()) hsa_decl_kernel_map_element +{ + /* The decl of the host function. */ + tree decl; + /* Name of the HSA kernel in BRIG. */ + char * GTY((skip)) name; + /* Size of OMP data, if the kernel contains a kernel dispatch. */ + unsigned omp_data_size; + /* True if the function is gridified kernel. */ + bool gridified_kernel_p; +}; + +/* Mapping between decls and corresponding HSA kernels in this compilation + unit. */ + +static GTY (()) vec<hsa_decl_kernel_map_element, va_gc> + *hsa_decl_kernel_mapping; + +/* Mapping between decls and corresponding HSA kernels + called by the function. */ +hash_map <tree, vec <const char *> *> *hsa_decl_kernel_dependencies; + +/* Hash function to lookup a symbol for a decl. */ +hash_table <hsa_noop_symbol_hasher> *hsa_global_variable_symbols; + +/* HSA summaries. */ +hsa_summary_t *hsa_summaries = NULL; + +/* HSA number of threads. */ +hsa_symbol *hsa_num_threads = NULL; + +/* HSA function that cannot be expanded to HSAIL. */ +hash_set <tree> *hsa_failed_functions = NULL; + +/* True if compilation unit-wide data are already allocated and initialized. */ +static bool compilation_unit_data_initialized; + +/* Return true if FNDECL represents an HSA-callable function. */ + +bool +hsa_callable_function_p (tree fndecl) +{ + return (lookup_attribute ("omp declare target", DECL_ATTRIBUTES (fndecl)) + && !lookup_attribute ("oacc function", DECL_ATTRIBUTES (fndecl))); +} + +/* Allocate HSA structures that are are used when dealing with different + functions. */ + +void +hsa_init_compilation_unit_data (void) +{ + if (compilation_unit_data_initialized) + return; + + compilation_unit_data_initialized = true; + + hsa_global_variable_symbols = new hash_table <hsa_noop_symbol_hasher> (8); + hsa_failed_functions = new hash_set <tree> (); + hsa_emitted_internal_decls = new hash_table <hsa_internal_fn_hasher> (2); +} + +/* Free data structures that are used when dealing with different + functions. */ + +void +hsa_deinit_compilation_unit_data (void) +{ + gcc_assert (compilation_unit_data_initialized); + + delete hsa_failed_functions; + delete hsa_emitted_internal_decls; + + for (hash_table <hsa_noop_symbol_hasher>::iterator it + = hsa_global_variable_symbols->begin (); + it != hsa_global_variable_symbols->end (); + ++it) + { + hsa_symbol *sym = *it; + delete sym; + } + + delete hsa_global_variable_symbols; + + if (hsa_num_threads) + { + delete hsa_num_threads; + hsa_num_threads = NULL; + } + + compilation_unit_data_initialized = false; +} + +/* Return true if we are generating large HSA machine model. */ + +bool +hsa_machine_large_p (void) +{ + /* FIXME: I suppose this is technically wrong but should work for me now. */ + return (GET_MODE_BITSIZE (Pmode) == 64); +} + +/* Return the HSA profile we are using. */ + +bool +hsa_full_profile_p (void) +{ + return true; +} + +/* Return true if a register in operand number OPNUM of instruction + is an output. False if it is an input. */ + +bool +hsa_insn_basic::op_output_p (unsigned opnum) +{ + switch (m_opcode) + { + case HSA_OPCODE_PHI: + case BRIG_OPCODE_CBR: + case BRIG_OPCODE_SBR: + case BRIG_OPCODE_ST: + case BRIG_OPCODE_SIGNALNORET: + /* FIXME: There are probably missing cases here, double check. */ + return false; + case BRIG_OPCODE_EXPAND: + /* Example: expand_v4_b32_b128 (dest0, dest1, dest2, dest3), src0. */ + return opnum < operand_count () - 1; + default: + return opnum == 0; + } +} + +/* Return true if OPCODE is an floating-point bit instruction opcode. */ + +bool +hsa_opcode_floating_bit_insn_p (BrigOpcode16_t opcode) +{ + switch (opcode) + { + case BRIG_OPCODE_NEG: + case BRIG_OPCODE_ABS: + case BRIG_OPCODE_CLASS: + case BRIG_OPCODE_COPYSIGN: + return true; + default: + return false; + } +} + +/* Return the number of destination operands for this INSN. */ + +unsigned +hsa_insn_basic::input_count () +{ + switch (m_opcode) + { + default: + return 1; + + case BRIG_OPCODE_NOP: + return 0; + + case BRIG_OPCODE_EXPAND: + return 2; + + case BRIG_OPCODE_LD: + /* ld_v[234] not yet handled. */ + return 1; + + case BRIG_OPCODE_ST: + return 0; + + case BRIG_OPCODE_ATOMICNORET: + return 0; + + case BRIG_OPCODE_SIGNAL: + return 1; + + case BRIG_OPCODE_SIGNALNORET: + return 0; + + case BRIG_OPCODE_MEMFENCE: + return 0; + + case BRIG_OPCODE_RDIMAGE: + case BRIG_OPCODE_LDIMAGE: + case BRIG_OPCODE_STIMAGE: + case BRIG_OPCODE_QUERYIMAGE: + case BRIG_OPCODE_QUERYSAMPLER: + sorry ("HSA image ops not handled"); + return 0; + + case BRIG_OPCODE_CBR: + case BRIG_OPCODE_BR: + return 0; + + case BRIG_OPCODE_SBR: + return 0; /* ??? */ + + case BRIG_OPCODE_WAVEBARRIER: + return 0; /* ??? */ + + case BRIG_OPCODE_BARRIER: + case BRIG_OPCODE_ARRIVEFBAR: + case BRIG_OPCODE_INITFBAR: + case BRIG_OPCODE_JOINFBAR: + case BRIG_OPCODE_LEAVEFBAR: + case BRIG_OPCODE_RELEASEFBAR: + case BRIG_OPCODE_WAITFBAR: + return 0; + + case BRIG_OPCODE_LDF: + return 1; + + case BRIG_OPCODE_ACTIVELANECOUNT: + case BRIG_OPCODE_ACTIVELANEID: + case BRIG_OPCODE_ACTIVELANEMASK: + case BRIG_OPCODE_ACTIVELANEPERMUTE: + return 1; /* ??? */ + + case BRIG_OPCODE_CALL: + case BRIG_OPCODE_SCALL: + case BRIG_OPCODE_ICALL: + return 0; + + case BRIG_OPCODE_RET: + return 0; + + case BRIG_OPCODE_ALLOCA: + return 1; + + case BRIG_OPCODE_CLEARDETECTEXCEPT: + return 0; + + case BRIG_OPCODE_SETDETECTEXCEPT: + return 0; + + case BRIG_OPCODE_PACKETCOMPLETIONSIG: + case BRIG_OPCODE_PACKETID: + case BRIG_OPCODE_CASQUEUEWRITEINDEX: + case BRIG_OPCODE_LDQUEUEREADINDEX: + case BRIG_OPCODE_LDQUEUEWRITEINDEX: + case BRIG_OPCODE_STQUEUEREADINDEX: + case BRIG_OPCODE_STQUEUEWRITEINDEX: + return 1; /* ??? */ + + case BRIG_OPCODE_ADDQUEUEWRITEINDEX: + return 1; + + case BRIG_OPCODE_DEBUGTRAP: + return 0; + + case BRIG_OPCODE_GROUPBASEPTR: + case BRIG_OPCODE_KERNARGBASEPTR: + return 1; /* ??? */ + + case HSA_OPCODE_ARG_BLOCK: + return 0; + + case BRIG_KIND_DIRECTIVE_COMMENT: + return 0; + } +} + +/* Return the number of source operands for this INSN. */ + +unsigned +hsa_insn_basic::num_used_ops () +{ + gcc_checking_assert (input_count () <= operand_count ()); + + return operand_count () - input_count (); +} + +/* Set alignment to VALUE. */ + +void +hsa_insn_mem::set_align (BrigAlignment8_t value) +{ + /* TODO: Perhaps remove this dump later on: */ + if (dump_file && (dump_flags & TDF_DETAILS) && value < m_align) + { + fprintf (dump_file, "Decreasing alignment to %u in instruction ", value); + dump_hsa_insn (dump_file, this); + } + m_align = value; +} + +/* Return size of HSA type T in bits. */ + +unsigned +hsa_type_bit_size (BrigType16_t t) +{ + switch (t) + { + case BRIG_TYPE_B1: + return 1; + + case BRIG_TYPE_U8: + case BRIG_TYPE_S8: + case BRIG_TYPE_B8: + return 8; + + case BRIG_TYPE_U16: + case BRIG_TYPE_S16: + case BRIG_TYPE_B16: + case BRIG_TYPE_F16: + return 16; + + case BRIG_TYPE_U32: + case BRIG_TYPE_S32: + case BRIG_TYPE_B32: + case BRIG_TYPE_F32: + case BRIG_TYPE_U8X4: + case BRIG_TYPE_U16X2: + case BRIG_TYPE_S8X4: + case BRIG_TYPE_S16X2: + case BRIG_TYPE_F16X2: + return 32; + + case BRIG_TYPE_U64: + case BRIG_TYPE_S64: + case BRIG_TYPE_F64: + case BRIG_TYPE_B64: + case BRIG_TYPE_U8X8: + case BRIG_TYPE_U16X4: + case BRIG_TYPE_U32X2: + case BRIG_TYPE_S8X8: + case BRIG_TYPE_S16X4: + case BRIG_TYPE_S32X2: + case BRIG_TYPE_F16X4: + case BRIG_TYPE_F32X2: + + return 64; + + case BRIG_TYPE_B128: + case BRIG_TYPE_U8X16: + case BRIG_TYPE_U16X8: + case BRIG_TYPE_U32X4: + case BRIG_TYPE_U64X2: + case BRIG_TYPE_S8X16: + case BRIG_TYPE_S16X8: + case BRIG_TYPE_S32X4: + case BRIG_TYPE_S64X2: + case BRIG_TYPE_F16X8: + case BRIG_TYPE_F32X4: + case BRIG_TYPE_F64X2: + return 128; + + default: + gcc_assert (hsa_seen_error ()); + return t; + } +} + +/* Return BRIG bit-type with BITSIZE length. */ + +BrigType16_t +hsa_bittype_for_bitsize (unsigned bitsize) +{ + switch (bitsize) + { + case 1: + return BRIG_TYPE_B1; + case 8: + return BRIG_TYPE_B8; + case 16: + return BRIG_TYPE_B16; + case 32: + return BRIG_TYPE_B32; + case 64: + return BRIG_TYPE_B64; + case 128: + return BRIG_TYPE_B128; + default: + gcc_unreachable (); + } +} + +/* Return BRIG unsigned int type with BITSIZE length. */ + +BrigType16_t +hsa_uint_for_bitsize (unsigned bitsize) +{ + switch (bitsize) + { + case 8: + return BRIG_TYPE_U8; + case 16: + return BRIG_TYPE_U16; + case 32: + return BRIG_TYPE_U32; + case 64: + return BRIG_TYPE_U64; + default: + gcc_unreachable (); + } +} + +/* Return BRIG float type with BITSIZE length. */ + +BrigType16_t +hsa_float_for_bitsize (unsigned bitsize) +{ + switch (bitsize) + { + case 16: + return BRIG_TYPE_F16; + case 32: + return BRIG_TYPE_F32; + case 64: + return BRIG_TYPE_F64; + default: + gcc_unreachable (); + } +} + +/* Return HSA bit-type with the same size as the type T. */ + +BrigType16_t +hsa_bittype_for_type (BrigType16_t t) +{ + return hsa_bittype_for_bitsize (hsa_type_bit_size (t)); +} + +/* Return true if and only if TYPE is a floating point number type. */ + +bool +hsa_type_float_p (BrigType16_t type) +{ + switch (type & BRIG_TYPE_BASE_MASK) + { + case BRIG_TYPE_F16: + case BRIG_TYPE_F32: + case BRIG_TYPE_F64: + return true; + default: + return false; + } +} + +/* Return true if and only if TYPE is an integer number type. */ + +bool +hsa_type_integer_p (BrigType16_t type) +{ + switch (type & BRIG_TYPE_BASE_MASK) + { + case BRIG_TYPE_U8: + case BRIG_TYPE_U16: + case BRIG_TYPE_U32: + case BRIG_TYPE_U64: + case BRIG_TYPE_S8: + case BRIG_TYPE_S16: + case BRIG_TYPE_S32: + case BRIG_TYPE_S64: + return true; + default: + return false; + } +} + +/* Return true if and only if TYPE is an bit-type. */ + +bool +hsa_btype_p (BrigType16_t type) +{ + switch (type & BRIG_TYPE_BASE_MASK) + { + case BRIG_TYPE_B8: + case BRIG_TYPE_B16: + case BRIG_TYPE_B32: + case BRIG_TYPE_B64: + case BRIG_TYPE_B128: + return true; + default: + return false; + } +} + + +/* Return HSA alignment encoding alignment to N bits. */ + +BrigAlignment8_t +hsa_alignment_encoding (unsigned n) +{ + gcc_assert (n >= 8 && !(n & (n - 1))); + if (n >= 256) + return BRIG_ALIGNMENT_32; + + switch (n) + { + case 8: + return BRIG_ALIGNMENT_1; + case 16: + return BRIG_ALIGNMENT_2; + case 32: + return BRIG_ALIGNMENT_4; + case 64: + return BRIG_ALIGNMENT_8; + case 128: + return BRIG_ALIGNMENT_16; + default: + gcc_unreachable (); + } +} + +/* Return natural alignment of HSA TYPE. */ + +BrigAlignment8_t +hsa_natural_alignment (BrigType16_t type) +{ + return hsa_alignment_encoding (hsa_type_bit_size (type & ~BRIG_TYPE_ARRAY)); +} + +/* Call the correct destructor of a HSA instruction. */ + +void +hsa_destroy_insn (hsa_insn_basic *insn) +{ + if (hsa_insn_phi *phi = dyn_cast <hsa_insn_phi *> (insn)) + phi->~hsa_insn_phi (); + else if (hsa_insn_br *br = dyn_cast <hsa_insn_br *> (insn)) + br->~hsa_insn_br (); + else if (hsa_insn_cmp *cmp = dyn_cast <hsa_insn_cmp *> (insn)) + cmp->~hsa_insn_cmp (); + else if (hsa_insn_mem *mem = dyn_cast <hsa_insn_mem *> (insn)) + mem->~hsa_insn_mem (); + else if (hsa_insn_atomic *atomic = dyn_cast <hsa_insn_atomic *> (insn)) + atomic->~hsa_insn_atomic (); + else if (hsa_insn_seg *seg = dyn_cast <hsa_insn_seg *> (insn)) + seg->~hsa_insn_seg (); + else if (hsa_insn_call *call = dyn_cast <hsa_insn_call *> (insn)) + call->~hsa_insn_call (); + else if (hsa_insn_arg_block *block = dyn_cast <hsa_insn_arg_block *> (insn)) + block->~hsa_insn_arg_block (); + else if (hsa_insn_sbr *sbr = dyn_cast <hsa_insn_sbr *> (insn)) + sbr->~hsa_insn_sbr (); + else if (hsa_insn_comment *comment = dyn_cast <hsa_insn_comment *> (insn)) + comment->~hsa_insn_comment (); + else + insn->~hsa_insn_basic (); +} + +/* Call the correct destructor of a HSA operand. */ + +void +hsa_destroy_operand (hsa_op_base *op) +{ + if (hsa_op_code_list *list = dyn_cast <hsa_op_code_list *> (op)) + list->~hsa_op_code_list (); + else if (hsa_op_operand_list *list = dyn_cast <hsa_op_operand_list *> (op)) + list->~hsa_op_operand_list (); + else if (hsa_op_reg *reg = dyn_cast <hsa_op_reg *> (op)) + reg->~hsa_op_reg (); + else if (hsa_op_immed *immed = dyn_cast <hsa_op_immed *> (op)) + immed->~hsa_op_immed (); + else + op->~hsa_op_base (); +} + +/* Create a mapping between the original function DECL and kernel name NAME. */ + +void +hsa_add_kern_decl_mapping (tree decl, char *name, unsigned omp_data_size, + bool gridified_kernel_p) +{ + hsa_decl_kernel_map_element dkm; + dkm.decl = decl; + dkm.name = name; + dkm.omp_data_size = omp_data_size; + dkm.gridified_kernel_p = gridified_kernel_p; + vec_safe_push (hsa_decl_kernel_mapping, dkm); +} + +/* Return the number of kernel decl name mappings. */ + +unsigned +hsa_get_number_decl_kernel_mappings (void) +{ + return vec_safe_length (hsa_decl_kernel_mapping); +} + +/* Return the decl in the Ith kernel decl name mapping. */ + +tree +hsa_get_decl_kernel_mapping_decl (unsigned i) +{ + return (*hsa_decl_kernel_mapping)[i].decl; +} + +/* Return the name in the Ith kernel decl name mapping. */ + +char * +hsa_get_decl_kernel_mapping_name (unsigned i) +{ + return (*hsa_decl_kernel_mapping)[i].name; +} + +/* Return maximum OMP size for kernel decl name mapping. */ + +unsigned +hsa_get_decl_kernel_mapping_omp_size (unsigned i) +{ + return (*hsa_decl_kernel_mapping)[i].omp_data_size; +} + +/* Return if the function is gridified kernel in decl name mapping. */ + +bool +hsa_get_decl_kernel_mapping_gridified (unsigned i) +{ + return (*hsa_decl_kernel_mapping)[i].gridified_kernel_p; +} + +/* Free the mapping between original decls and kernel names. */ + +void +hsa_free_decl_kernel_mapping (void) +{ + if (hsa_decl_kernel_mapping == NULL) + return; + + for (unsigned i = 0; i < hsa_decl_kernel_mapping->length (); ++i) + free ((*hsa_decl_kernel_mapping)[i].name); + ggc_free (hsa_decl_kernel_mapping); +} + +/* Add new kernel dependency. */ + +void +hsa_add_kernel_dependency (tree caller, const char *called_function) +{ + if (hsa_decl_kernel_dependencies == NULL) + hsa_decl_kernel_dependencies = new hash_map<tree, vec<const char *> *> (); + + vec <const char *> *s = NULL; + vec <const char *> **slot = hsa_decl_kernel_dependencies->get (caller); + if (slot == NULL) + { + s = new vec <const char *> (); + hsa_decl_kernel_dependencies->put (caller, s); + } + else + s = *slot; + + s->safe_push (called_function); +} + +/* Modify the name P in-place so that it is a valid HSA identifier. */ + +void +hsa_sanitize_name (char *p) +{ + for (; *p; p++) + if (*p == '.' || *p == '-') + *p = '_'; +} + +/* Clone the name P, set trailing ampersand and sanitize the name. */ + +char * +hsa_brig_function_name (const char *p) +{ + unsigned len = strlen (p); + char *buf = XNEWVEC (char, len + 2); + + buf[0] = '&'; + buf[len + 1] = '\0'; + memcpy (buf + 1, p, len); + + hsa_sanitize_name (buf); + return buf; +} + +/* Return declaration name if exists. */ + +const char * +hsa_get_declaration_name (tree decl) +{ + if (!DECL_NAME (decl)) + { + char buf[64]; + snprintf (buf, 64, "__hsa_anonymous_%i", DECL_UID (decl)); + const char *ggc_str = ggc_strdup (buf); + return ggc_str; + } + + tree name_tree; + if (TREE_CODE (decl) == FUNCTION_DECL + || (TREE_CODE (decl) == VAR_DECL && is_global_var (decl))) + name_tree = DECL_ASSEMBLER_NAME (decl); + else + name_tree = DECL_NAME (decl); + + const char *name = IDENTIFIER_POINTER (name_tree); + /* User-defined assembly names have prepended asterisk symbol. */ + if (name[0] == '*') + name++; + + return name; +} + +void +hsa_summary_t::link_functions (cgraph_node *gpu, cgraph_node *host, + hsa_function_kind kind, bool gridified_kernel_p) +{ + hsa_function_summary *gpu_summary = get (gpu); + hsa_function_summary *host_summary = get (host); + + gpu_summary->m_kind = kind; + host_summary->m_kind = kind; + + gpu_summary->m_gpu_implementation_p = true; + host_summary->m_gpu_implementation_p = false; + + gpu_summary->m_gridified_kernel_p = gridified_kernel_p; + host_summary->m_gridified_kernel_p = gridified_kernel_p; + + gpu_summary->m_binded_function = host; + host_summary->m_binded_function = gpu; + + tree gdecl = gpu->decl; + DECL_ATTRIBUTES (gdecl) + = tree_cons (get_identifier ("flatten"), NULL_TREE, + DECL_ATTRIBUTES (gdecl)); + + tree fn_opts = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (gdecl); + if (fn_opts == NULL_TREE) + fn_opts = optimization_default_node; + fn_opts = copy_node (fn_opts); + TREE_OPTIMIZATION (fn_opts)->x_flag_tree_loop_vectorize = false; + TREE_OPTIMIZATION (fn_opts)->x_flag_tree_slp_vectorize = false; + DECL_FUNCTION_SPECIFIC_OPTIMIZATION (gdecl) = fn_opts; +} + +/* Add a HOST function to HSA summaries. */ + +void +hsa_register_kernel (cgraph_node *host) +{ + if (hsa_summaries == NULL) + hsa_summaries = new hsa_summary_t (symtab); + hsa_function_summary *s = hsa_summaries->get (host); + s->m_kind = HSA_KERNEL; +} + +/* Add a pair of functions to HSA summaries. GPU is an HSA implementation of + a HOST function. */ + +void +hsa_register_kernel (cgraph_node *gpu, cgraph_node *host) +{ + if (hsa_summaries == NULL) + hsa_summaries = new hsa_summary_t (symtab); + hsa_summaries->link_functions (gpu, host, HSA_KERNEL, true); +} + +/* Return true if expansion of the current HSA function has already failed. */ + +bool +hsa_seen_error (void) +{ + return hsa_cfun->m_seen_error; +} + +/* Mark current HSA function as failed. */ + +void +hsa_fail_cfun (void) +{ + hsa_failed_functions->add (hsa_cfun->m_decl); + hsa_cfun->m_seen_error = true; +} + +char * +hsa_internal_fn::name () +{ + char *name = xstrdup (internal_fn_name (m_fn)); + for (char *ptr = name; *ptr; ptr++) + *ptr = TOLOWER (*ptr); + + const char *suffix = NULL; + if (m_type_bit_size == 32) + suffix = "f"; + + if (suffix) + { + char *name2 = concat (name, suffix, NULL); + free (name); + name = name2; + } + + hsa_sanitize_name (name); + return name; +} + +unsigned +hsa_internal_fn::get_arity () +{ + switch (m_fn) + { + case IFN_ACOS: + case IFN_ASIN: + case IFN_ATAN: + case IFN_COS: + case IFN_EXP: + case IFN_EXP10: + case IFN_EXP2: + case IFN_EXPM1: + case IFN_LOG: + case IFN_LOG10: + case IFN_LOG1P: + case IFN_LOG2: + case IFN_LOGB: + case IFN_SIGNIFICAND: + case IFN_SIN: + case IFN_SQRT: + case IFN_TAN: + case IFN_CEIL: + case IFN_FLOOR: + case IFN_NEARBYINT: + case IFN_RINT: + case IFN_ROUND: + case IFN_TRUNC: + return 1; + case IFN_ATAN2: + case IFN_COPYSIGN: + case IFN_FMOD: + case IFN_POW: + case IFN_REMAINDER: + case IFN_SCALB: + case IFN_LDEXP: + return 2; + break; + case IFN_CLRSB: + case IFN_CLZ: + case IFN_CTZ: + case IFN_FFS: + case IFN_PARITY: + case IFN_POPCOUNT: + default: + /* As we produce sorry message for unknown internal functions, + reaching this label is definitely a bug. */ + gcc_unreachable (); + } +} + +BrigType16_t +hsa_internal_fn::get_argument_type (int n) +{ + switch (m_fn) + { + case IFN_ACOS: + case IFN_ASIN: + case IFN_ATAN: + case IFN_COS: + case IFN_EXP: + case IFN_EXP10: + case IFN_EXP2: + case IFN_EXPM1: + case IFN_LOG: + case IFN_LOG10: + case IFN_LOG1P: + case IFN_LOG2: + case IFN_LOGB: + case IFN_SIGNIFICAND: + case IFN_SIN: + case IFN_SQRT: + case IFN_TAN: + case IFN_CEIL: + case IFN_FLOOR: + case IFN_NEARBYINT: + case IFN_RINT: + case IFN_ROUND: + case IFN_TRUNC: + case IFN_ATAN2: + case IFN_COPYSIGN: + case IFN_FMOD: + case IFN_POW: + case IFN_REMAINDER: + case IFN_SCALB: + return hsa_float_for_bitsize (m_type_bit_size); + case IFN_LDEXP: + { + if (n == -1 || n == 0) + return hsa_float_for_bitsize (m_type_bit_size); + else + return BRIG_TYPE_S32; + } + default: + /* As we produce sorry message for unknown internal functions, + reaching this label is definitely a bug. */ + gcc_unreachable (); + } +} + +#include "gt-hsa.h" diff --git a/gcc/hsa.h b/gcc/hsa.h new file mode 100644 index 0000000..f0436f3 --- /dev/null +++ b/gcc/hsa.h @@ -0,0 +1,1402 @@ +/* HSAIL and BRIG related macros and definitions. + Copyright (C) 2013-2016 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#ifndef HSA_H +#define HSA_H + +#include "hsa-brig-format.h" +#include "is-a.h" +#include "predict.h" +#include "tree.h" +#include "vec.h" +#include "hash-table.h" +#include "basic-block.h" + + +/* Return true if the compiler should produce HSAIL. */ + +static inline bool +hsa_gen_requested_p (void) +{ +#ifndef ENABLE_HSA + return false; +#endif + return !flag_disable_hsa; +} + +/* Standard warning message if we failed to generate HSAIL for a function. */ + +#define HSA_SORRY_MSG "could not emit HSAIL for the function" + +class hsa_op_immed; +class hsa_op_cst_list; +class hsa_insn_basic; +class hsa_op_address; +class hsa_op_reg; +class hsa_bb; +typedef hsa_insn_basic *hsa_insn_basic_p; + +/* Class representing an input argument, output argument (result) or a + variable, that will eventually end up being a symbol directive. */ + +struct hsa_symbol +{ + /* Constructor. */ + hsa_symbol (BrigType16_t type, BrigSegment8_t segment, + BrigLinkage8_t linkage, bool global_scope_p = false, + BrigAllocation allocation = BRIG_ALLOCATION_AUTOMATIC); + + /* Return total size of the symbol. */ + unsigned HOST_WIDE_INT total_byte_size (); + + /* Fill in those values into the symbol according to DECL, which are + determined independently from whether it is parameter, result, + or a variable, local or global. */ + void fillup_for_decl (tree decl); + + /* Pointer to the original tree, which is PARM_DECL for input parameters and + RESULT_DECL for the output parameters. */ + tree m_decl; + + /* Name of the symbol, that will be written into output and dumps. Can be + NULL, see name_number below. */ + const char *m_name; + + /* If name is NULL, artificial name will be formed from the segment name and + this number. */ + int m_name_number; + + /* Once written, this is the offset of the associated symbol directive. Zero + means the symbol has not been written yet. */ + unsigned m_directive_offset; + + /* HSA type of the parameter. */ + BrigType16_t m_type; + + /* The HSA segment this will eventually end up in. */ + BrigSegment8_t m_segment; + + /* The HSA kind of linkage. */ + BrigLinkage8_t m_linkage; + + /* Array dimension, if non-zero. */ + unsigned HOST_WIDE_INT m_dim; + + /* Constant value, used for string constants. */ + hsa_op_immed *m_cst_value; + + /* Is in global scope. */ + bool m_global_scope_p; + + /* True if an error has been seen for the symbol. */ + bool m_seen_error; + + /* Symbol allocation. */ + BrigAllocation m_allocation; + +private: + /* Default constructor. */ + hsa_symbol (); +}; + +/* Abstract class for HSA instruction operands. */ + +class hsa_op_base +{ +public: + /* Next operand scheduled to be written when writing BRIG operand + section. */ + hsa_op_base *m_next; + + /* Offset to which the associated operand structure will be written. Zero if + yet not scheduled for writing. */ + unsigned m_brig_op_offset; + + /* The type of a particular operand. */ + BrigKind16_t m_kind; + +protected: + hsa_op_base (BrigKind16_t k); +private: + /* Make the default constructor inaccessible. */ + hsa_op_base () {} +}; + +/* Common abstract ancestor for operands which have a type. */ + +class hsa_op_with_type : public hsa_op_base +{ +public: + /* The type. */ + BrigType16_t m_type; + + /* Convert an operand to a destination type DTYPE and attach insns + to HBB if needed. */ + hsa_op_with_type *get_in_type (BrigType16_t dtype, hsa_bb *hbb); + +protected: + hsa_op_with_type (BrigKind16_t k, BrigType16_t t); +private: + /* Make the default constructor inaccessible. */ + hsa_op_with_type () : hsa_op_base (BRIG_KIND_NONE) {} +}; + +/* An immediate HSA operand. */ + +class hsa_op_immed : public hsa_op_with_type +{ +public: + hsa_op_immed (tree tree_val, bool min32int = true); + hsa_op_immed (HOST_WIDE_INT int_value, BrigType16_t type); + void *operator new (size_t); + ~hsa_op_immed (); + void set_type (BrigKind16_t t); + + /* Value as represented by middle end. */ + tree m_tree_value; + + /* Integer value representation. */ + HOST_WIDE_INT m_int_value; + + /* Brig data representation. */ + char *m_brig_repr; + + /* Brig data representation size in bytes. */ + unsigned m_brig_repr_size; + +private: + /* Make the default constructor inaccessible. */ + hsa_op_immed (); + /* All objects are deallocated by destroying their pool, so make delete + inaccessible too. */ + void operator delete (void *) {} + void emit_to_buffer (tree value); +}; + +/* Report whether or not P is a an immediate operand. */ + +template <> +template <> +inline bool +is_a_helper <hsa_op_immed *>::test (hsa_op_base *p) +{ + return p->m_kind == BRIG_KIND_OPERAND_CONSTANT_BYTES; +} + +/* HSA register operand. */ + +class hsa_op_reg : public hsa_op_with_type +{ + friend class hsa_insn_basic; + friend class hsa_insn_phi; +public: + hsa_op_reg (BrigType16_t t); + void *operator new (size_t); + + /* Verify register operand. */ + void verify_ssa (); + + /* If NON-NULL, gimple SSA that we come from. NULL if none. */ + tree m_gimple_ssa; + + /* Defining instruction while still in the SSA. */ + hsa_insn_basic *m_def_insn; + + /* If the register allocator decides to spill the register, this is the + appropriate spill symbol. */ + hsa_symbol *m_spill_sym; + + /* Number of this register structure in the order in which they were + allocated. */ + int m_order; + int m_lr_begin, m_lr_end; + + /* Zero if the register is not yet allocated. After, allocation, this must + be 'c', 's', 'd' or 'q'. */ + char m_reg_class; + /* If allocated, the number of the HW register (within its HSA register + class). */ + char m_hard_num; + +private: + /* Make the default constructor inaccessible. */ + hsa_op_reg () : hsa_op_with_type (BRIG_KIND_NONE, BRIG_TYPE_NONE) {} + /* All objects are deallocated by destroying their pool, so make delete + inaccessible too. */ + void operator delete (void *) {} + /* Set definition where the register is defined. */ + void set_definition (hsa_insn_basic *insn); + /* Uses of the value while still in SSA. */ + auto_vec <hsa_insn_basic_p> m_uses; +}; + +typedef class hsa_op_reg *hsa_op_reg_p; + +/* Report whether or not P is a register operand. */ + +template <> +template <> +inline bool +is_a_helper <hsa_op_reg *>::test (hsa_op_base *p) +{ + return p->m_kind == BRIG_KIND_OPERAND_REGISTER; +} + +/* Report whether or not P is a register operand. */ + +template <> +template <> +inline bool +is_a_helper <hsa_op_reg *>::test (hsa_op_with_type *p) +{ + return p->m_kind == BRIG_KIND_OPERAND_REGISTER; +} + +/* An address HSA operand. */ + +class hsa_op_address : public hsa_op_base +{ +public: + /* set up a new address operand consisting of base symbol SYM, register R and + immediate OFFSET. If the machine model is not large and offset is 64 bit, + the upper, 32 bits have to be zero. */ + hsa_op_address (hsa_symbol *sym, hsa_op_reg *reg, + HOST_WIDE_INT offset = 0); + + void *operator new (size_t); + + /* Set up a new address operand consisting of base symbol SYM and + immediate OFFSET. If the machine model is not large and offset is 64 bit, + the upper, 32 bits have to be zero. */ + hsa_op_address (hsa_symbol *sym, HOST_WIDE_INT offset = 0); + + /* Set up a new address operand consisting of register R and + immediate OFFSET. If the machine model is not large and offset is 64 bit, + the upper, 32 bits have to be zero. */ + hsa_op_address (hsa_op_reg *reg, HOST_WIDE_INT offset = 0); + + /* Symbol base of the address. Can be NULL if there is none. */ + hsa_symbol *m_symbol; + + /* Register offset. Can be NULL if there is none. */ + hsa_op_reg *m_reg; + + /* Immediate byte offset. */ + HOST_WIDE_INT m_imm_offset; + +private: + /* Make the default constructor inaccessible. */ + hsa_op_address () : hsa_op_base (BRIG_KIND_NONE) {} + /* All objects are deallocated by destroying their pool, so make delete + inaccessible too. */ + void operator delete (void *) {} +}; + +/* Report whether or not P is an address operand. */ + +template <> +template <> +inline bool +is_a_helper <hsa_op_address *>::test (hsa_op_base *p) +{ + return p->m_kind == BRIG_KIND_OPERAND_ADDRESS; +} + +/* A reference to code HSA operand. It can be either reference + to a start of a BB or a start of a function. */ + +class hsa_op_code_ref : public hsa_op_base +{ +public: + hsa_op_code_ref (); + + /* Offset in the code section that this refers to. */ + unsigned m_directive_offset; +}; + +/* Report whether or not P is a code reference operand. */ + +template <> +template <> +inline bool +is_a_helper <hsa_op_code_ref *>::test (hsa_op_base *p) +{ + return p->m_kind == BRIG_KIND_OPERAND_CODE_REF; +} + +/* Code list HSA operand. */ + +class hsa_op_code_list: public hsa_op_base +{ +public: + hsa_op_code_list (unsigned elements); + void *operator new (size_t); + + /* Offset to variable-sized array in hsa_data section, where + are offsets to entries in the hsa_code section. */ + auto_vec<unsigned> m_offsets; +private: + /* Make the default constructor inaccessible. */ + hsa_op_code_list () : hsa_op_base (BRIG_KIND_NONE) {} + /* All objects are deallocated by destroying their pool, so make delete + inaccessible too. */ + void operator delete (void *) {} +}; + +/* Report whether or not P is a code list operand. */ + +template <> +template <> +inline bool +is_a_helper <hsa_op_code_list *>::test (hsa_op_base *p) +{ + return p->m_kind == BRIG_KIND_OPERAND_CODE_LIST; +} + +/* Operand list HSA operand. */ + +class hsa_op_operand_list: public hsa_op_base +{ +public: + hsa_op_operand_list (unsigned elements); + ~hsa_op_operand_list (); + void *operator new (size_t); + + /* Offset to variable-sized array in hsa_data section, where + are offsets to entries in the hsa_code section. */ + auto_vec<unsigned> m_offsets; +private: + /* Make the default constructor inaccessible. */ + hsa_op_operand_list () : hsa_op_base (BRIG_KIND_NONE) {} + /* All objects are deallocated by destroying their pool, so make delete + inaccessible too. */ + void operator delete (void *) {} +}; + +/* Report whether or not P is a code list operand. */ + +template <> +template <> +inline bool +is_a_helper <hsa_op_operand_list *>::test (hsa_op_base *p) +{ + return p->m_kind == BRIG_KIND_OPERAND_OPERAND_LIST; +} + +/* Opcodes of instructions that are not part of HSA but that we use to + represent it nevertheless. */ + +#define HSA_OPCODE_PHI (-1) +#define HSA_OPCODE_ARG_BLOCK (-2) + +/* The number of operand pointers we can directly in an instruction. */ +#define HSA_BRIG_INT_STORAGE_OPERANDS 5 + +/* Class representing an HSA instruction. Unlike typical ancestors for + specialized classes, this one is also directly used for all instructions + that are then represented as BrigInstBasic. */ + +class hsa_insn_basic +{ +public: + hsa_insn_basic (unsigned nops, int opc); + hsa_insn_basic (unsigned nops, int opc, BrigType16_t t, + hsa_op_base *arg0 = NULL, + hsa_op_base *arg1 = NULL, + hsa_op_base *arg2 = NULL, + hsa_op_base *arg3 = NULL); + + void *operator new (size_t); + void set_op (int index, hsa_op_base *op); + hsa_op_base *get_op (int index); + hsa_op_base **get_op_addr (int index); + unsigned int operand_count (); + void verify (); + unsigned input_count (); + unsigned num_used_ops (); + void set_output_in_type (hsa_op_reg *dest, unsigned op_index, hsa_bb *hbb); + bool op_output_p (unsigned opnum); + + /* The previous and next instruction in the basic block. */ + hsa_insn_basic *m_prev, *m_next; + + /* Basic block this instruction belongs to. */ + basic_block m_bb; + + /* Operand code distinguishing different types of instructions. Eventually + these should only be BRIG_INST_* values from the BrigOpcode16_t range but + initially we use negative values for PHI nodes and such. */ + int m_opcode; + + /* Linearized number assigned to the instruction by HSA RA. */ + int m_number; + + /* Type of the destination of the operations. */ + BrigType16_t m_type; + + /* BRIG offset of the instruction in code section. */ + unsigned int m_brig_offset; + +private: + /* Make the default constructor inaccessible. */ + hsa_insn_basic () {} + /* All objects are deallocated by destroying their pool, so make delete + inaccessible too. */ + void operator delete (void *) {} + /* The individual operands. All instructions but PHI nodes have five or + fewer instructions and so will fit the internal storage. */ + /* TODO: Vast majority of instructions have three or fewer operands, so we + may actually try reducing it. */ + auto_vec<hsa_op_base *, HSA_BRIG_INT_STORAGE_OPERANDS> m_operands; +}; + +/* Class representing a PHI node of the SSA form of HSA virtual + registers. */ + +class hsa_insn_phi : public hsa_insn_basic +{ +public: + hsa_insn_phi (unsigned nops, hsa_op_reg *dst); + + void *operator new (size_t); + + /* Destination. */ + hsa_op_reg *m_dest; + +private: + /* Make the default constructor inaccessible. */ + hsa_insn_phi () : hsa_insn_basic (1, HSA_OPCODE_PHI) {} + /* All objects are deallocated by destroying their pool, so make delete + inaccessible too. */ + void operator delete (void *) {} +}; + +/* Report whether or not P is a PHI node. */ + +template <> +template <> +inline bool +is_a_helper <hsa_insn_phi *>::test (hsa_insn_basic *p) +{ + return p->m_opcode == HSA_OPCODE_PHI; +} + +/* HSA instruction for branches. Currently we explicitely represent only + conditional branches. */ + +class hsa_insn_br : public hsa_insn_basic +{ +public: + hsa_insn_br (hsa_op_reg *ctrl); + + void *operator new (size_t); + + /* Width as described in HSA documentation. */ + BrigWidth8_t m_width; +private: + /* Make the default constructor inaccessible. */ + hsa_insn_br () : hsa_insn_basic (1, BRIG_OPCODE_CBR) {} + /* All objects are deallocated by destroying their pool, so make delete + inaccessible too. */ + void operator delete (void *) {} +}; + +/* Report whether P is a branching instruction. */ + +template <> +template <> +inline bool +is_a_helper <hsa_insn_br *>::test (hsa_insn_basic *p) +{ + return p->m_opcode == BRIG_OPCODE_BR + || p->m_opcode == BRIG_OPCODE_CBR; +} + +/* HSA instruction for switch branches. */ + +class hsa_insn_sbr : public hsa_insn_basic +{ +public: + hsa_insn_sbr (hsa_op_reg *index, unsigned jump_count); + + /* Default destructor. */ + ~hsa_insn_sbr (); + + void *operator new (size_t); + + void replace_all_labels (basic_block old_bb, basic_block new_bb); + + /* Width as described in HSA documentation. */ + BrigWidth8_t m_width; + + /* Jump table. */ + vec <basic_block> m_jump_table; + + /* Default label basic block. */ + basic_block m_default_bb; + + /* Code list for label references. */ + hsa_op_code_list *m_label_code_list; + +private: + /* Make the default constructor inaccessible. */ + hsa_insn_sbr () : hsa_insn_basic (1, BRIG_OPCODE_SBR) {} + /* All objects are deallocated by destroying their pool, so make delete + inaccessible too. */ + void operator delete (void *) {} +}; + +/* Report whether P is a switch branching instruction. */ + +template <> +template <> +inline bool +is_a_helper <hsa_insn_sbr *>::test (hsa_insn_basic *p) +{ + return p->m_opcode == BRIG_OPCODE_SBR; +} + +/* HSA instruction for comparisons. */ + +class hsa_insn_cmp : public hsa_insn_basic +{ +public: + hsa_insn_cmp (BrigCompareOperation8_t cmp, BrigType16_t t, + hsa_op_base *arg0 = NULL, hsa_op_base *arg1 = NULL, + hsa_op_base *arg2 = NULL); + + void *operator new (size_t); + + /* Source type should be derived from operand types. */ + + /* The comparison operation. */ + BrigCompareOperation8_t m_compare; + + /* TODO: Modifiers and packing control are missing but so are everywhere + else. */ +private: + /* Make the default constructor inaccessible. */ + hsa_insn_cmp () : hsa_insn_basic (1, BRIG_OPCODE_CMP) {} + /* All objects are deallocated by destroying their pool, so make delete + inaccessible too. */ + void operator delete (void *) {} +}; + +/* Report whether or not P is a comparison instruction. */ + +template <> +template <> +inline bool +is_a_helper <hsa_insn_cmp *>::test (hsa_insn_basic *p) +{ + return p->m_opcode == BRIG_OPCODE_CMP; +} + +/* HSA instruction for memory operations. */ + +class hsa_insn_mem : public hsa_insn_basic +{ +public: + hsa_insn_mem (int opc, BrigType16_t t, hsa_op_base *arg0, hsa_op_base *arg1); + + void *operator new (size_t); + + /* Set alignment to VALUE. */ + + void set_align (BrigAlignment8_t value); + + /* The segment is of the memory access is either the segment of the symbol in + the address operand or flat address is there is no symbol there. */ + + /* Required alignment of the memory operation. */ + BrigAlignment8_t m_align; + + /* HSA equiv class, basically an alias set number. */ + uint8_t m_equiv_class; + + /* TODO: Add width modifier, perhaps also other things. */ +protected: + hsa_insn_mem (unsigned nops, int opc, BrigType16_t t, + hsa_op_base *arg0 = NULL, hsa_op_base *arg1 = NULL, + hsa_op_base *arg2 = NULL, hsa_op_base *arg3 = NULL); + +private: + /* Make the default constructor inaccessible. */ + hsa_insn_mem () : hsa_insn_basic (1, BRIG_OPCODE_LD) {} + /* All objects are deallocated by destroying their pool, so make delete + inaccessible too. */ + void operator delete (void *) {} +}; + +/* Report whether or not P is a memory instruction. */ + +template <> +template <> +inline bool +is_a_helper <hsa_insn_mem *>::test (hsa_insn_basic *p) +{ + return (p->m_opcode == BRIG_OPCODE_LD + || p->m_opcode == BRIG_OPCODE_ST); +} + +/* HSA instruction for atomic operations. */ + +class hsa_insn_atomic : public hsa_insn_mem +{ +public: + hsa_insn_atomic (int nops, int opc, enum BrigAtomicOperation aop, + BrigType16_t t, BrigMemoryOrder memorder, + hsa_op_base *arg0 = NULL, hsa_op_base *arg1 = NULL, + hsa_op_base *arg2 = NULL, hsa_op_base *arg3 = NULL); + void *operator new (size_t); + + /* The operation itself. */ + enum BrigAtomicOperation m_atomicop; + + /* Things like acquire/release/aligned. */ + enum BrigMemoryOrder m_memoryorder; + + /* Scope of the atomic operation. */ + enum BrigMemoryScope m_memoryscope; + +private: + /* Make the default constructor inaccessible. */ + hsa_insn_atomic () : hsa_insn_mem (1, BRIG_KIND_NONE, BRIG_TYPE_NONE) {} + /* All objects are deallocated by destroying their pool, so make delete + inaccessible too. */ + void operator delete (void *) {} +}; + +/* Report whether or not P is an atomic instruction. */ + +template <> +template <> +inline bool +is_a_helper <hsa_insn_atomic *>::test (hsa_insn_basic *p) +{ + return (p->m_opcode == BRIG_OPCODE_ATOMIC + || p->m_opcode == BRIG_OPCODE_ATOMICNORET); +} + +/* HSA instruction for signal operations. */ + +class hsa_insn_signal : public hsa_insn_atomic +{ +public: + hsa_insn_signal (int nops, int opc, enum BrigAtomicOperation sop, + BrigType16_t t, hsa_op_base *arg0 = NULL, + hsa_op_base *arg1 = NULL, + hsa_op_base *arg2 = NULL, hsa_op_base *arg3 = NULL); + + void *operator new (size_t); + +private: + /* All objects are deallocated by destroying their pool, so make delete + inaccessible too. */ + void operator delete (void *) {} +}; + +/* Report whether or not P is a signal instruction. */ + +template <> +template <> +inline bool +is_a_helper <hsa_insn_signal *>::test (hsa_insn_basic *p) +{ + return (p->m_opcode == BRIG_OPCODE_SIGNAL + || p->m_opcode == BRIG_OPCODE_SIGNALNORET); +} + +/* HSA instruction to convert between flat addressing and segments. */ + +class hsa_insn_seg : public hsa_insn_basic +{ +public: + hsa_insn_seg (int opc, BrigType16_t destt, BrigType16_t srct, + BrigSegment8_t seg, hsa_op_base *arg0, hsa_op_base *arg1); + + void *operator new (size_t); + + /* Source type. Depends on the source addressing/segment. */ + BrigType16_t m_src_type; + /* The segment we are converting from or to. */ + BrigSegment8_t m_segment; +private: + /* Make the default constructor inaccessible. */ + hsa_insn_seg () : hsa_insn_basic (1, BRIG_OPCODE_STOF) {} + /* All objects are deallocated by destroying their pool, so make delete + inaccessible too. */ + void operator delete (void *) {} +}; + +/* Report whether or not P is a segment conversion instruction. */ + +template <> +template <> +inline bool +is_a_helper <hsa_insn_seg *>::test (hsa_insn_basic *p) +{ + return (p->m_opcode == BRIG_OPCODE_STOF + || p->m_opcode == BRIG_OPCODE_FTOS); +} + +/* Class for internal functions for purpose of HSA emission. */ + +class hsa_internal_fn +{ +public: + hsa_internal_fn (enum internal_fn fn, unsigned type_bit_size): + m_fn (fn), m_type_bit_size (type_bit_size), m_offset (0) {} + + hsa_internal_fn (const hsa_internal_fn *f): + m_fn (f->m_fn), m_type_bit_size (f->m_type_bit_size), + m_offset (f->m_offset) {} + + /* Return arity of the internal function. */ + unsigned get_arity (); + + /* Return BRIG type of N-th argument, if -1 is passed, return value type + is received. */ + BrigType16_t get_argument_type (int n); + + /* Return function name. The memory must be released by a caller. */ + char *name (); + + /* Internal function. */ + enum internal_fn m_fn; + + /* Bit width of return type. */ + unsigned m_type_bit_size; + + /* BRIG offset of declaration of the function. */ + BrigCodeOffset32_t m_offset; +}; + +/* HSA instruction for function call. */ + +class hsa_insn_call : public hsa_insn_basic +{ +public: + hsa_insn_call (tree callee); + hsa_insn_call (hsa_internal_fn *fn); + + /* Default destructor. */ + ~hsa_insn_call (); + + void *operator new (size_t); + + /* Called function. */ + tree m_called_function; + + /* Called internal function. */ + hsa_internal_fn *m_called_internal_fn; + + /* Input formal arguments. */ + auto_vec <hsa_symbol *> m_input_args; + + /* Input arguments store instructions. */ + auto_vec <hsa_insn_mem *> m_input_arg_insns; + + /* Output argument, can be NULL for void functions. */ + hsa_symbol *m_output_arg; + + /* Called function code reference. */ + hsa_op_code_ref m_func; + + /* Code list for arguments of the function. */ + hsa_op_code_list *m_args_code_list; + + /* Code list for result of the function. */ + hsa_op_code_list *m_result_code_list; +private: + /* Make the default constructor inaccessible. */ + hsa_insn_call () : hsa_insn_basic (0, BRIG_OPCODE_CALL) {} + /* All objects are deallocated by destroying their pool, so make delete + inaccessible too. */ + void operator delete (void *) {} +}; + +/* Report whether or not P is a call instruction. */ + +template <> +template <> +inline bool +is_a_helper <hsa_insn_call *>::test (hsa_insn_basic *p) +{ + return (p->m_opcode == BRIG_OPCODE_CALL); +} + +/* HSA call instruction block encapsulates definition of arguments, + result type, corresponding loads and a possible store. + Moreover, it contains a single call instruction. + Emission of the instruction will produce multiple + HSAIL instructions. */ + +class hsa_insn_arg_block : public hsa_insn_basic +{ +public: + hsa_insn_arg_block (BrigKind brig_kind, hsa_insn_call * call); + + void *operator new (size_t); + + /* Kind of argument block. */ + BrigKind m_kind; + + /* Call instruction. */ + hsa_insn_call *m_call_insn; +private: + /* All objects are deallocated by destroying their pool, so make delete + inaccessible too. */ + void operator delete (void *) {} +}; + +/* Report whether or not P is a call block instruction. */ + +template <> +template <> +inline bool +is_a_helper <hsa_insn_arg_block *>::test (hsa_insn_basic *p) +{ + return (p->m_opcode == HSA_OPCODE_ARG_BLOCK); +} + +/* HSA comment instruction. */ + +class hsa_insn_comment: public hsa_insn_basic +{ +public: + /* Constructor of class representing the comment in HSAIL. */ + hsa_insn_comment (const char *s); + + /* Default destructor. */ + ~hsa_insn_comment (); + + void *operator new (size_t); + + char *m_comment; +}; + +/* Report whether or not P is a call block instruction. */ + +template <> +template <> +inline bool +is_a_helper <hsa_insn_comment *>::test (hsa_insn_basic *p) +{ + return (p->m_opcode == BRIG_KIND_DIRECTIVE_COMMENT); +} + +/* HSA queue instruction. */ + +class hsa_insn_queue: public hsa_insn_basic +{ +public: + hsa_insn_queue (int nops, BrigOpcode opcode); + + /* Destructor. */ + ~hsa_insn_queue (); +}; + +/* Report whether or not P is a queue instruction. */ + +template <> +template <> +inline bool +is_a_helper <hsa_insn_queue *>::test (hsa_insn_basic *p) +{ + return (p->m_opcode == BRIG_OPCODE_ADDQUEUEWRITEINDEX); +} + +/* HSA source type instruction. */ + +class hsa_insn_srctype: public hsa_insn_basic +{ +public: + hsa_insn_srctype (int nops, BrigOpcode opcode, BrigType16_t destt, + BrigType16_t srct, hsa_op_base *arg0, hsa_op_base *arg1, + hsa_op_base *arg2); + + /* Pool allocator. */ + void *operator new (size_t); + + /* Source type. */ + BrigType16_t m_source_type; + + /* Destructor. */ + ~hsa_insn_srctype (); +}; + +/* Report whether or not P is a source type instruction. */ + +template <> +template <> +inline bool +is_a_helper <hsa_insn_srctype *>::test (hsa_insn_basic *p) +{ + return (p->m_opcode == BRIG_OPCODE_POPCOUNT + || p->m_opcode == BRIG_OPCODE_FIRSTBIT + || p->m_opcode == BRIG_OPCODE_LASTBIT); +} + +/* HSA packed instruction. */ + +class hsa_insn_packed : public hsa_insn_srctype +{ +public: + hsa_insn_packed (int nops, BrigOpcode opcode, BrigType16_t destt, + BrigType16_t srct, hsa_op_base *arg0, hsa_op_base *arg1, + hsa_op_base *arg2); + + /* Pool allocator. */ + void *operator new (size_t); + + /* Operand list for an operand of the instruction. */ + hsa_op_operand_list *m_operand_list; + + /* Destructor. */ + ~hsa_insn_packed (); +}; + +/* Report whether or not P is a combine instruction. */ + +template <> +template <> +inline bool +is_a_helper <hsa_insn_packed *>::test (hsa_insn_basic *p) +{ + return (p->m_opcode == BRIG_OPCODE_COMBINE + || p->m_opcode == BRIG_OPCODE_EXPAND); +} + +/* HSA convert instruction. */ + +class hsa_insn_cvt: public hsa_insn_basic +{ +public: + hsa_insn_cvt (hsa_op_with_type *dest, hsa_op_with_type *src); + + /* Pool allocator. */ + void *operator new (size_t); +}; + +/* Report whether or not P is a convert instruction. */ + +template <> +template <> +inline bool +is_a_helper <hsa_insn_cvt *>::test (hsa_insn_basic *p) +{ + return (p->m_opcode == BRIG_OPCODE_CVT); +} + +/* HSA alloca instruction. */ + +class hsa_insn_alloca: public hsa_insn_basic +{ +public: + hsa_insn_alloca (hsa_op_with_type *dest, hsa_op_with_type *size, + unsigned alignment = 0); + + /* Required alignment of the allocation. */ + BrigAlignment8_t m_align; + + /* Pool allocator. */ + void *operator new (size_t); +}; + +/* Report whether or not P is an alloca instruction. */ + +template <> +template <> +inline bool +is_a_helper <hsa_insn_alloca *>::test (hsa_insn_basic *p) +{ + return (p->m_opcode == BRIG_OPCODE_ALLOCA); +} + +/* Basic block of HSA instructions. */ + +class hsa_bb +{ +public: + hsa_bb (basic_block cfg_bb); + hsa_bb (basic_block cfg_bb, int idx); + ~hsa_bb (); + + /* Append an instruction INSN into the basic block. */ + void append_insn (hsa_insn_basic *insn); + + /* The real CFG BB that this HBB belongs to. */ + basic_block m_bb; + + /* The operand that refers to the label to this BB. */ + hsa_op_code_ref m_label_ref; + + /* The first and last instruction. */ + hsa_insn_basic *m_first_insn, *m_last_insn; + /* The first and last phi node. */ + hsa_insn_phi *m_first_phi, *m_last_phi; + + /* Just a number to construct names from. */ + int m_index; + + bitmap m_liveout, m_livein; +private: + /* Make the default constructor inaccessible. */ + hsa_bb (); + /* All objects are deallocated by destroying their pool, so make delete + inaccessible too. */ + void operator delete (void *) {} +}; + +/* Return the corresponding HSA basic block structure for the given control + flow basic_block BB. */ + +static inline hsa_bb * +hsa_bb_for_bb (basic_block bb) +{ + return (struct hsa_bb *) bb->aux; +} + +/* Class for hashing local hsa_symbols. */ + +struct hsa_noop_symbol_hasher : nofree_ptr_hash <hsa_symbol> +{ + static inline hashval_t hash (const value_type); + static inline bool equal (const value_type, const compare_type); +}; + +/* Hash hsa_symbol. */ + +inline hashval_t +hsa_noop_symbol_hasher::hash (const value_type item) +{ + return DECL_UID (item->m_decl); +} + +/* Return true if the DECL_UIDs of decls both symbols refer to are equal. */ + +inline bool +hsa_noop_symbol_hasher::equal (const value_type a, const compare_type b) +{ + return (DECL_UID (a->m_decl) == DECL_UID (b->m_decl)); +} + +/* Structure that encapsulates intermediate representation of a HSA + function. */ + +class hsa_function_representation +{ +public: + hsa_function_representation (tree fdecl, bool kernel_p, + unsigned ssa_names_count); + hsa_function_representation (hsa_internal_fn *fn); + ~hsa_function_representation (); + + /* Builds a shadow register that is utilized to a kernel dispatch. */ + hsa_op_reg *get_shadow_reg (); + + /* Return true if we are in a function that has kernel dispatch + shadow register. */ + bool has_shadow_reg_p (); + + /* The entry/exit blocks don't contain incoming code, + but the HSA generator might use them to put code into, + so we need hsa_bb instances of them. */ + void init_extra_bbs (); + + /* Return linkage of the representation. */ + BrigLinkage8_t get_linkage (); + + /* Create a private symbol of requested TYPE. */ + hsa_symbol *create_hsa_temporary (BrigType16_t type); + + /* Lookup or create a HSA pseudo register for a given gimple SSA name. */ + hsa_op_reg *reg_for_gimple_ssa (tree ssa); + + /* Name of the function. */ + char *m_name; + + /* Number of allocated register structures. */ + int m_reg_count; + + /* Input arguments. */ + vec <hsa_symbol *> m_input_args; + + /* Output argument or NULL if there is none. */ + hsa_symbol *m_output_arg; + + /* Hash table of local variable symbols. */ + hash_table <hsa_noop_symbol_hasher> *m_local_symbols; + + /* Hash map for string constants. */ + hash_map <tree, hsa_symbol *> m_string_constants_map; + + /* Vector of pointers to spill symbols. */ + vec <struct hsa_symbol *> m_spill_symbols; + + /* Vector of pointers to global variables and transformed string constants + that are used by the function. */ + vec <struct hsa_symbol *> m_global_symbols; + + /* Private function artificial variables. */ + vec <struct hsa_symbol *> m_private_variables; + + /* Vector of called function declarations. */ + vec <tree> m_called_functions; + + /* Vector of used internal functions. */ + vec <hsa_internal_fn *> m_called_internal_fns; + + /* Number of HBB BBs. */ + int m_hbb_count; + + /* Whether or not we could check and enforce SSA properties. */ + bool m_in_ssa; + + /* True if the function is kernel function. */ + bool m_kern_p; + + /* True if the function representation is a declaration. */ + bool m_declaration_p; + + /* Function declaration tree. */ + tree m_decl; + + /* Internal function info is used for declarations of internal functions. */ + hsa_internal_fn *m_internal_fn; + + /* Runtime shadow register. */ + hsa_op_reg *m_shadow_reg; + + /* Number of kernel dispatched which take place in the function. */ + unsigned m_kernel_dispatch_count; + + /* If the function representation contains a kernel dispatch, + OMP data size is necessary memory that is used for copying before + a kernel dispatch. */ + unsigned m_maximum_omp_data_size; + + /* Return true if there's an HSA-specific warning already seen. */ + bool m_seen_error; + + /* Counter for temporary symbols created in the function representation. */ + unsigned m_temp_symbol_count; + + /* SSA names mapping. */ + vec <hsa_op_reg_p> m_ssa_map; +}; + +enum hsa_function_kind +{ + HSA_NONE, + HSA_KERNEL, + HSA_FUNCTION +}; + +struct hsa_function_summary +{ + /* Default constructor. */ + hsa_function_summary (); + + /* Kind of GPU/host function. */ + hsa_function_kind m_kind; + + /* Pointer to a cgraph node which is a HSA implementation of the function. + In case of the function is a HSA function, the binded function points + to the host function. */ + cgraph_node *m_binded_function; + + /* Identifies if the function is an HSA function or a host function. */ + bool m_gpu_implementation_p; + + /* True if the function is a gridified kernel. */ + bool m_gridified_kernel_p; +}; + +inline +hsa_function_summary::hsa_function_summary (): m_kind (HSA_NONE), + m_binded_function (NULL), m_gpu_implementation_p (false) +{ +} + +/* Function summary for HSA functions. */ +class hsa_summary_t: public function_summary <hsa_function_summary *> +{ +public: + hsa_summary_t (symbol_table *table): + function_summary<hsa_function_summary *> (table) { } + + /* Couple GPU and HOST as gpu-specific and host-specific implementation of + the same function. KIND determines whether GPU is a host-invokable kernel + or gpu-callable function and GRIDIFIED_KERNEL_P is set if the function was + gridified in OMP. */ + + void link_functions (cgraph_node *gpu, cgraph_node *host, + hsa_function_kind kind, bool gridified_kernel_p); +}; + +/* OMP simple builtin describes behavior that should be done for + the routine. */ +class omp_simple_builtin +{ +public: + omp_simple_builtin (const char *name, const char *warning_message, + bool sorry, hsa_op_immed *return_value = NULL): + m_name (name), m_warning_message (warning_message), m_sorry (sorry), + m_return_value (return_value) + {} + + /* Generate HSAIL instructions for the builtin or produce warning message. */ + void generate (gimple *stmt, hsa_bb *hbb); + + /* Name of function. */ + const char *m_name; + + /* Warning message. */ + const char *m_warning_message; + + /* Flag if we should sorry after the warning message is printed. */ + bool m_sorry; + + /* Return value of the function. */ + hsa_op_immed *m_return_value; + + /* Emission function. */ + void (*m_emit_func) (gimple *stmt, hsa_bb *); +}; + +/* Class for hashing hsa_internal_fn. */ + +struct hsa_internal_fn_hasher: free_ptr_hash <hsa_internal_fn> +{ + static inline hashval_t hash (const value_type); + static inline bool equal (const value_type, const compare_type); +}; + +/* Hash hsa_symbol. */ + +inline hashval_t +hsa_internal_fn_hasher::hash (const value_type item) +{ + return item->m_fn; +} + +/* Return true if the DECL_UIDs of decls both symbols refer to are equal. */ + +inline bool +hsa_internal_fn_hasher::equal (const value_type a, const compare_type b) +{ + return a->m_fn == b->m_fn && a->m_type_bit_size == b->m_type_bit_size; +} + +/* in hsa.c */ +extern struct hsa_function_representation *hsa_cfun; +extern hash_map <tree, vec <const char *> *> *hsa_decl_kernel_dependencies; +extern hsa_summary_t *hsa_summaries; +extern hsa_symbol *hsa_num_threads; +extern unsigned hsa_kernel_calls_counter; +extern hash_set <tree> *hsa_failed_functions; +extern hash_table <hsa_noop_symbol_hasher> *hsa_global_variable_symbols; + +bool hsa_callable_function_p (tree fndecl); +void hsa_init_compilation_unit_data (void); +void hsa_deinit_compilation_unit_data (void); +bool hsa_machine_large_p (void); +bool hsa_full_profile_p (void); +bool hsa_opcode_floating_bit_insn_p (BrigOpcode16_t); +unsigned hsa_type_bit_size (BrigType16_t t); +BrigType16_t hsa_bittype_for_bitsize (unsigned bitsize); +BrigType16_t hsa_uint_for_bitsize (unsigned bitsize); +BrigType16_t hsa_float_for_bitsize (unsigned bitsize); +BrigType16_t hsa_bittype_for_type (BrigType16_t t); +bool hsa_type_float_p (BrigType16_t type); +bool hsa_type_integer_p (BrigType16_t type); +bool hsa_btype_p (BrigType16_t type); +BrigAlignment8_t hsa_alignment_encoding (unsigned n); +BrigAlignment8_t hsa_natural_alignment (BrigType16_t type); +void hsa_destroy_operand (hsa_op_base *op); +void hsa_destroy_insn (hsa_insn_basic *insn); +void hsa_add_kern_decl_mapping (tree decl, char *name, unsigned, bool); +unsigned hsa_get_number_decl_kernel_mappings (void); +tree hsa_get_decl_kernel_mapping_decl (unsigned i); +char *hsa_get_decl_kernel_mapping_name (unsigned i); +unsigned hsa_get_decl_kernel_mapping_omp_size (unsigned i); +bool hsa_get_decl_kernel_mapping_gridified (unsigned i); +void hsa_free_decl_kernel_mapping (void); +void hsa_add_kernel_dependency (tree caller, const char *called_function); +void hsa_sanitize_name (char *p); +char *hsa_brig_function_name (const char *p); +const char *hsa_get_declaration_name (tree decl); +void hsa_register_kernel (cgraph_node *host); +void hsa_register_kernel (cgraph_node *gpu, cgraph_node *host); +bool hsa_seen_error (void); +void hsa_fail_cfun (void); + +/* In hsa-gen.c. */ +void hsa_build_append_simple_mov (hsa_op_reg *, hsa_op_base *, hsa_bb *); +hsa_symbol *hsa_get_spill_symbol (BrigType16_t); +hsa_symbol *hsa_get_string_cst_symbol (BrigType16_t); +hsa_op_reg *hsa_spill_in (hsa_insn_basic *, hsa_op_reg *, hsa_op_reg **); +hsa_op_reg *hsa_spill_out (hsa_insn_basic *, hsa_op_reg *, hsa_op_reg **); +hsa_bb *hsa_init_new_bb (basic_block); +hsa_function_representation *hsa_generate_function_declaration (tree decl); +hsa_function_representation *hsa_generate_internal_fn_decl (hsa_internal_fn *); +tree hsa_get_host_function (tree decl); + +/* In hsa-regalloc.c. */ +void hsa_regalloc (void); + +/* In hsa-brig.c. */ +extern hash_table <hsa_internal_fn_hasher> *hsa_emitted_internal_decls; +void hsa_brig_emit_function (void); +void hsa_output_brig (void); +unsigned hsa_get_imm_brig_type_len (BrigType16_t type); +void hsa_brig_emit_omp_symbols (void); + +/* In hsa-dump.c. */ +const char *hsa_seg_name (BrigSegment8_t); +void dump_hsa_insn (FILE *f, hsa_insn_basic *insn); +void dump_hsa_bb (FILE *, hsa_bb *); +void dump_hsa_cfun (FILE *); +DEBUG_FUNCTION void debug_hsa_operand (hsa_op_base *opc); +DEBUG_FUNCTION void debug_hsa_insn (hsa_insn_basic *insn); + +union hsa_bytes +{ + uint8_t b8; + uint16_t b16; + uint32_t b32; + uint64_t b64; +}; + +/* Return true if a function DECL is an HSA implementation. */ + +static inline bool +hsa_gpu_implementation_p (tree decl) +{ + if (hsa_summaries == NULL) + return false; + + hsa_function_summary *s = hsa_summaries->get (cgraph_node::get_create (decl)); + + return s->m_gpu_implementation_p; +} + +#endif /* HSA_H */ diff --git a/gcc/ipa-hsa.c b/gcc/ipa-hsa.c new file mode 100644 index 0000000..769657f --- /dev/null +++ b/gcc/ipa-hsa.c @@ -0,0 +1,331 @@ +/* Callgraph based analysis of static variables. + Copyright (C) 2015-2016 Free Software Foundation, Inc. + Contributed by Martin Liska <mliska@suse.cz> + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +/* Interprocedural HSA pass is responsible for creation of HSA clones. + For all these HSA clones, we emit HSAIL instructions and pass processing + is terminated. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "is-a.h" +#include "hash-set.h" +#include "vec.h" +#include "tree.h" +#include "tree-pass.h" +#include "function.h" +#include "basic-block.h" +#include "gimple.h" +#include "dumpfile.h" +#include "gimple-pretty-print.h" +#include "tree-streamer.h" +#include "stringpool.h" +#include "cgraph.h" +#include "print-tree.h" +#include "symbol-summary.h" +#include "hsa.h" + +namespace { + +/* If NODE is not versionable, warn about not emiting HSAIL and return false. + Otherwise return true. */ + +static bool +check_warn_node_versionable (cgraph_node *node) +{ + if (!node->local.versionable) + { + warning_at (EXPR_LOCATION (node->decl), OPT_Whsa, + "could not emit HSAIL for function %s: function cannot be " + "cloned", node->name ()); + return false; + } + return true; +} + +/* The function creates HSA clones for all functions that were either + marked as HSA kernels or are callable HSA functions. Apart from that, + we redirect all edges that come from an HSA clone and end in another + HSA clone to connect these two functions. */ + +static unsigned int +process_hsa_functions (void) +{ + struct cgraph_node *node; + + if (hsa_summaries == NULL) + hsa_summaries = new hsa_summary_t (symtab); + + FOR_EACH_DEFINED_FUNCTION (node) + { + hsa_function_summary *s = hsa_summaries->get (node); + + /* A linked function is skipped. */ + if (s->m_binded_function != NULL) + continue; + + if (s->m_kind != HSA_NONE) + { + if (!check_warn_node_versionable (node)) + continue; + cgraph_node *clone + = node->create_virtual_clone (vec <cgraph_edge *> (), + NULL, NULL, "hsa"); + TREE_PUBLIC (clone->decl) = TREE_PUBLIC (node->decl); + + clone->force_output = true; + hsa_summaries->link_functions (clone, node, s->m_kind, false); + + if (dump_file) + fprintf (dump_file, "Created a new HSA clone: %s, type: %s\n", + clone->name (), + s->m_kind == HSA_KERNEL ? "kernel" : "function"); + } + else if (hsa_callable_function_p (node->decl)) + { + if (!check_warn_node_versionable (node)) + continue; + cgraph_node *clone + = node->create_virtual_clone (vec <cgraph_edge *> (), + NULL, NULL, "hsa"); + TREE_PUBLIC (clone->decl) = TREE_PUBLIC (node->decl); + + if (!cgraph_local_p (node)) + clone->force_output = true; + hsa_summaries->link_functions (clone, node, HSA_FUNCTION, false); + + if (dump_file) + fprintf (dump_file, "Created a new HSA function clone: %s\n", + clone->name ()); + } + } + + /* Redirect all edges that are between HSA clones. */ + FOR_EACH_DEFINED_FUNCTION (node) + { + cgraph_edge *e = node->callees; + + while (e) + { + hsa_function_summary *src = hsa_summaries->get (node); + if (src->m_kind != HSA_NONE && src->m_gpu_implementation_p) + { + hsa_function_summary *dst = hsa_summaries->get (e->callee); + if (dst->m_kind != HSA_NONE && !dst->m_gpu_implementation_p) + { + e->redirect_callee (dst->m_binded_function); + if (dump_file) + fprintf (dump_file, + "Redirecting edge to HSA function: %s->%s\n", + xstrdup_for_dump (e->caller->name ()), + xstrdup_for_dump (e->callee->name ())); + } + } + + e = e->next_callee; + } + } + + return 0; +} + +/* Iterate all HSA functions and stream out HSA function summary. */ + +static void +ipa_hsa_write_summary (void) +{ + struct bitpack_d bp; + struct cgraph_node *node; + struct output_block *ob; + unsigned int count = 0; + lto_symtab_encoder_iterator lsei; + lto_symtab_encoder_t encoder; + + if (!hsa_summaries) + return; + + ob = create_output_block (LTO_section_ipa_hsa); + encoder = ob->decl_state->symtab_node_encoder; + ob->symbol = NULL; + for (lsei = lsei_start_function_in_partition (encoder); !lsei_end_p (lsei); + lsei_next_function_in_partition (&lsei)) + { + node = lsei_cgraph_node (lsei); + hsa_function_summary *s = hsa_summaries->get (node); + + if (s->m_kind != HSA_NONE) + count++; + } + + streamer_write_uhwi (ob, count); + + /* Process all of the functions. */ + for (lsei = lsei_start_function_in_partition (encoder); !lsei_end_p (lsei); + lsei_next_function_in_partition (&lsei)) + { + node = lsei_cgraph_node (lsei); + hsa_function_summary *s = hsa_summaries->get (node); + + if (s->m_kind != HSA_NONE) + { + encoder = ob->decl_state->symtab_node_encoder; + int node_ref = lto_symtab_encoder_encode (encoder, node); + streamer_write_uhwi (ob, node_ref); + + bp = bitpack_create (ob->main_stream); + bp_pack_value (&bp, s->m_kind, 2); + bp_pack_value (&bp, s->m_gpu_implementation_p, 1); + bp_pack_value (&bp, s->m_binded_function != NULL, 1); + streamer_write_bitpack (&bp); + if (s->m_binded_function) + stream_write_tree (ob, s->m_binded_function->decl, true); + } + } + + streamer_write_char_stream (ob->main_stream, 0); + produce_asm (ob, NULL); + destroy_output_block (ob); +} + +/* Read section in file FILE_DATA of length LEN with data DATA. */ + +static void +ipa_hsa_read_section (struct lto_file_decl_data *file_data, const char *data, + size_t len) +{ + const struct lto_function_header *header + = (const struct lto_function_header *) data; + const int cfg_offset = sizeof (struct lto_function_header); + const int main_offset = cfg_offset + header->cfg_size; + const int string_offset = main_offset + header->main_size; + struct data_in *data_in; + unsigned int i; + unsigned int count; + + lto_input_block ib_main ((const char *) data + main_offset, + header->main_size, file_data->mode_table); + + data_in + = lto_data_in_create (file_data, (const char *) data + string_offset, + header->string_size, vNULL); + count = streamer_read_uhwi (&ib_main); + + for (i = 0; i < count; i++) + { + unsigned int index; + struct cgraph_node *node; + lto_symtab_encoder_t encoder; + + index = streamer_read_uhwi (&ib_main); + encoder = file_data->symtab_node_encoder; + node = dyn_cast<cgraph_node *> (lto_symtab_encoder_deref (encoder, + index)); + gcc_assert (node->definition); + hsa_function_summary *s = hsa_summaries->get (node); + + struct bitpack_d bp = streamer_read_bitpack (&ib_main); + s->m_kind = (hsa_function_kind) bp_unpack_value (&bp, 2); + s->m_gpu_implementation_p = bp_unpack_value (&bp, 1); + bool has_tree = bp_unpack_value (&bp, 1); + + if (has_tree) + { + tree decl = stream_read_tree (&ib_main, data_in); + s->m_binded_function = cgraph_node::get_create (decl); + } + } + lto_free_section_data (file_data, LTO_section_ipa_hsa, NULL, data, + len); + lto_data_in_delete (data_in); +} + +/* Load streamed HSA functions summary and assign the summary to a function. */ + +static void +ipa_hsa_read_summary (void) +{ + struct lto_file_decl_data **file_data_vec = lto_get_file_decl_data (); + struct lto_file_decl_data *file_data; + unsigned int j = 0; + + if (hsa_summaries == NULL) + hsa_summaries = new hsa_summary_t (symtab); + + while ((file_data = file_data_vec[j++])) + { + size_t len; + const char *data = lto_get_section_data (file_data, LTO_section_ipa_hsa, + NULL, &len); + + if (data) + ipa_hsa_read_section (file_data, data, len); + } +} + +const pass_data pass_data_ipa_hsa = +{ + IPA_PASS, /* type */ + "hsa", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + TV_IPA_HSA, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + TODO_dump_symtab, /* todo_flags_finish */ +}; + +class pass_ipa_hsa : public ipa_opt_pass_d +{ +public: + pass_ipa_hsa (gcc::context *ctxt) + : ipa_opt_pass_d (pass_data_ipa_hsa, ctxt, + NULL, /* generate_summary */ + ipa_hsa_write_summary, /* write_summary */ + ipa_hsa_read_summary, /* read_summary */ + ipa_hsa_write_summary, /* write_optimization_summary */ + ipa_hsa_read_summary, /* read_optimization_summary */ + NULL, /* stmt_fixup */ + 0, /* function_transform_todo_flags_start */ + NULL, /* function_transform */ + NULL) /* variable_transform */ + {} + + /* opt_pass methods: */ + virtual bool gate (function *); + + virtual unsigned int execute (function *) { return process_hsa_functions (); } + +}; // class pass_ipa_reference + +bool +pass_ipa_hsa::gate (function *) +{ + return hsa_gen_requested_p (); +} + +} // anon namespace + +ipa_opt_pass_d * +make_pass_ipa_hsa (gcc::context *ctxt) +{ + return new pass_ipa_hsa (ctxt); +} diff --git a/gcc/lto-section-in.c b/gcc/lto-section-in.c index 972f062..93b82be 100644 --- a/gcc/lto-section-in.c +++ b/gcc/lto-section-in.c @@ -51,7 +51,8 @@ const char *lto_section_name[LTO_N_SECTION_TYPES] = "ipcp_trans", "icf", "offload_table", - "mode_table" + "mode_table", + "hsa" }; diff --git a/gcc/lto-streamer.h b/gcc/lto-streamer.h index 42654f5..0cb200e 100644 --- a/gcc/lto-streamer.h +++ b/gcc/lto-streamer.h @@ -244,6 +244,7 @@ enum lto_section_type LTO_section_ipa_icf, LTO_section_offload_table, LTO_section_mode_table, + LTO_section_ipa_hsa, LTO_N_SECTION_TYPES /* Must be last. */ }; diff --git a/gcc/lto-wrapper.c b/gcc/lto-wrapper.c index ecb9996..16d1f45 100644 --- a/gcc/lto-wrapper.c +++ b/gcc/lto-wrapper.c @@ -736,6 +736,7 @@ compile_images_for_offload_targets (unsigned in_argc, char *in_argv[], return; unsigned num_targets = parse_env_var (target_names, &names, NULL); + int next_name_entry = 0; const char *compiler_path = getenv ("COMPILER_PATH"); if (!compiler_path) goto out; @@ -745,13 +746,19 @@ compile_images_for_offload_targets (unsigned in_argc, char *in_argv[], offload_names = XCNEWVEC (char *, num_targets + 1); for (unsigned i = 0; i < num_targets; i++) { - offload_names[i] + /* HSA does not use LTO-like streaming and a different compiler, skip + it. */ + if (strcmp (names[i], "hsa") == 0) + continue; + + offload_names[next_name_entry] = compile_offload_image (names[i], compiler_path, in_argc, in_argv, compiler_opts, compiler_opt_count, linker_opts, linker_opt_count); - if (!offload_names[i]) + if (!offload_names[next_name_entry]) fatal_error (input_location, "problem with building target image for %s\n", names[i]); + next_name_entry++; } out: diff --git a/gcc/lto/ChangeLog b/gcc/lto/ChangeLog index ae2aba2..430dcfe 100644 --- a/gcc/lto/ChangeLog +++ b/gcc/lto/ChangeLog @@ -1,3 +1,10 @@ +2016-01-19 Martin Liska <mliska@suse.cz> + Martin Jambor <mjambor@suse.cz> + + * lto-partition.c: Include "hsa.h" + (add_symbol_to_partition_1): Put hsa implementations into the + same partition as host implementations. + 2016-01-12 Jan Hubicka <hubicka@ucw.cz> PR lto/69003 diff --git a/gcc/lto/lto-partition.c b/gcc/lto/lto-partition.c index 9eb63c2..eb28fed 100644 --- a/gcc/lto/lto-partition.c +++ b/gcc/lto/lto-partition.c @@ -34,6 +34,7 @@ along with GCC; see the file COPYING3. If not see #include "ipa-prop.h" #include "ipa-inline.h" #include "lto-partition.h" +#include "hsa.h" vec<ltrans_partition> ltrans_partitions; @@ -170,6 +171,24 @@ add_symbol_to_partition_1 (ltrans_partition part, symtab_node *node) Therefore put it into the same partition. */ if (cnode->instrumented_version) add_symbol_to_partition_1 (part, cnode->instrumented_version); + + /* Add an HSA associated with the symbol. */ + if (hsa_summaries != NULL) + { + hsa_function_summary *s = hsa_summaries->get (cnode); + if (s->m_kind == HSA_KERNEL) + { + /* Add binded function. */ + bool added = add_symbol_to_partition_1 (part, + s->m_binded_function); + gcc_assert (added); + if (symtab->dump_file) + fprintf (symtab->dump_file, + "adding an HSA function (host/gpu) to the " + "partition: %s\n", + s->m_binded_function->name ()); + } + } } add_references_to_partition (part, node); diff --git a/gcc/omp-builtins.def b/gcc/omp-builtins.def index 5529547..60199b0 100644 --- a/gcc/omp-builtins.def +++ b/gcc/omp-builtins.def @@ -340,8 +340,13 @@ DEF_GOMP_BUILTIN (BUILT_IN_GOMP_SINGLE_COPY_START, "GOMP_single_copy_start", BT_FN_PTR, ATTR_NOTHROW_LEAF_LIST) DEF_GOMP_BUILTIN (BUILT_IN_GOMP_SINGLE_COPY_END, "GOMP_single_copy_end", BT_FN_VOID_PTR, ATTR_NOTHROW_LEAF_LIST) +DEF_GOMP_BUILTIN (BUILT_IN_GOMP_OFFLOAD_REGISTER, "GOMP_offload_register_ver", + BT_FN_VOID_UINT_PTR_INT_PTR, ATTR_NOTHROW_LIST) +DEF_GOMP_BUILTIN (BUILT_IN_GOMP_OFFLOAD_UNREGISTER, + "GOMP_offload_unregister_ver", + BT_FN_VOID_UINT_PTR_INT_PTR, ATTR_NOTHROW_LIST) DEF_GOMP_BUILTIN (BUILT_IN_GOMP_TARGET, "GOMP_target_ext", - BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_UINT_PTR_INT_INT, + BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_UINT_PTR_PTR, ATTR_NOTHROW_LIST) DEF_GOMP_BUILTIN (BUILT_IN_GOMP_TARGET_DATA, "GOMP_target_data_ext", BT_FN_VOID_INT_SIZE_PTR_PTR_PTR, ATTR_NOTHROW_LIST) diff --git a/gcc/omp-low.c b/gcc/omp-low.c index d7df3db..673dee3 100644 --- a/gcc/omp-low.c +++ b/gcc/omp-low.c @@ -80,6 +80,9 @@ along with GCC; see the file COPYING3. If not see #include "lto-section-names.h" #include "gomp-constants.h" #include "gimple-pretty-print.h" +#include "symbol-summary.h" +#include "hsa.h" +#include "params.h" /* Lowering of OMP parallel and workshare constructs proceeds in two phases. The first phase scans the function looking for OMP statements @@ -450,6 +453,63 @@ is_combined_parallel (struct omp_region *region) return region->is_combined_parallel; } +/* Adjust *COND_CODE and *N2 so that the former is either LT_EXPR or + GT_EXPR. */ + +static void +adjust_for_condition (location_t loc, enum tree_code *cond_code, tree *n2) +{ + switch (*cond_code) + { + case LT_EXPR: + case GT_EXPR: + case NE_EXPR: + break; + case LE_EXPR: + if (POINTER_TYPE_P (TREE_TYPE (*n2))) + *n2 = fold_build_pointer_plus_hwi_loc (loc, *n2, 1); + else + *n2 = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (*n2), *n2, + build_int_cst (TREE_TYPE (*n2), 1)); + *cond_code = LT_EXPR; + break; + case GE_EXPR: + if (POINTER_TYPE_P (TREE_TYPE (*n2))) + *n2 = fold_build_pointer_plus_hwi_loc (loc, *n2, -1); + else + *n2 = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (*n2), *n2, + build_int_cst (TREE_TYPE (*n2), 1)); + *cond_code = GT_EXPR; + break; + default: + gcc_unreachable (); + } +} + +/* Return the looping step from INCR, extracted from the step of a gimple omp + for statement. */ + +static tree +get_omp_for_step_from_incr (location_t loc, tree incr) +{ + tree step; + switch (TREE_CODE (incr)) + { + case PLUS_EXPR: + step = TREE_OPERAND (incr, 1); + break; + case POINTER_PLUS_EXPR: + step = fold_convert (ssizetype, TREE_OPERAND (incr, 1)); + break; + case MINUS_EXPR: + step = TREE_OPERAND (incr, 1); + step = fold_build1_loc (loc, NEGATE_EXPR, TREE_TYPE (step), step); + break; + default: + gcc_unreachable (); + } + return step; +} /* Extract the header elements of parallel loop FOR_STMT and store them into *FD. */ @@ -579,58 +639,14 @@ extract_omp_for_data (gomp_for *for_stmt, struct omp_for_data *fd, loop->cond_code = gimple_omp_for_cond (for_stmt, i); loop->n2 = gimple_omp_for_final (for_stmt, i); - switch (loop->cond_code) - { - case LT_EXPR: - case GT_EXPR: - break; - case NE_EXPR: - gcc_assert (gimple_omp_for_kind (for_stmt) - == GF_OMP_FOR_KIND_CILKSIMD - || (gimple_omp_for_kind (for_stmt) - == GF_OMP_FOR_KIND_CILKFOR)); - break; - case LE_EXPR: - if (POINTER_TYPE_P (TREE_TYPE (loop->n2))) - loop->n2 = fold_build_pointer_plus_hwi_loc (loc, loop->n2, 1); - else - loop->n2 = fold_build2_loc (loc, - PLUS_EXPR, TREE_TYPE (loop->n2), loop->n2, - build_int_cst (TREE_TYPE (loop->n2), 1)); - loop->cond_code = LT_EXPR; - break; - case GE_EXPR: - if (POINTER_TYPE_P (TREE_TYPE (loop->n2))) - loop->n2 = fold_build_pointer_plus_hwi_loc (loc, loop->n2, -1); - else - loop->n2 = fold_build2_loc (loc, - MINUS_EXPR, TREE_TYPE (loop->n2), loop->n2, - build_int_cst (TREE_TYPE (loop->n2), 1)); - loop->cond_code = GT_EXPR; - break; - default: - gcc_unreachable (); - } + gcc_assert (loop->cond_code != NE_EXPR + || gimple_omp_for_kind (for_stmt) == GF_OMP_FOR_KIND_CILKSIMD + || gimple_omp_for_kind (for_stmt) == GF_OMP_FOR_KIND_CILKFOR); + adjust_for_condition (loc, &loop->cond_code, &loop->n2); t = gimple_omp_for_incr (for_stmt, i); gcc_assert (TREE_OPERAND (t, 0) == var); - switch (TREE_CODE (t)) - { - case PLUS_EXPR: - loop->step = TREE_OPERAND (t, 1); - break; - case POINTER_PLUS_EXPR: - loop->step = fold_convert (ssizetype, TREE_OPERAND (t, 1)); - break; - case MINUS_EXPR: - loop->step = TREE_OPERAND (t, 1); - loop->step = fold_build1_loc (loc, - NEGATE_EXPR, TREE_TYPE (loop->step), - loop->step); - break; - default: - gcc_unreachable (); - } + loop->step = get_omp_for_step_from_incr (loc, t); if (simd || (fd->sched_kind == OMP_CLAUSE_SCHEDULE_STATIC @@ -1321,7 +1337,16 @@ build_outer_var_ref (tree var, omp_context *ctx, bool lastprivate = false) } } else if (ctx->outer) - x = lookup_decl (var, ctx->outer); + { + omp_context *outer = ctx->outer; + if (gimple_code (outer->stmt) == GIMPLE_OMP_GRID_BODY) + { + outer = outer->outer; + gcc_assert (outer + && gimple_code (outer->stmt) != GIMPLE_OMP_GRID_BODY); + } + x = lookup_decl (var, outer); + } else if (is_reference (var)) /* This can happen with orphaned constructs. If var is reference, it is possible it is shared and as such valid. */ @@ -1774,6 +1799,8 @@ fixup_child_record_type (omp_context *ctx) { tree f, type = ctx->record_type; + if (!ctx->receiver_decl) + return; /* ??? It isn't sufficient to just call remap_type here, because variably_modified_type_p doesn't work the way we expect for record types. Testing each field for whether it needs remapping @@ -2132,6 +2159,14 @@ scan_sharing_clauses (tree clauses, omp_context *ctx, } break; + case OMP_CLAUSE__GRIDDIM_: + if (ctx->outer) + { + scan_omp_op (&OMP_CLAUSE__GRIDDIM__SIZE (c), ctx->outer); + scan_omp_op (&OMP_CLAUSE__GRIDDIM__GROUP (c), ctx->outer); + } + break; + case OMP_CLAUSE_NOWAIT: case OMP_CLAUSE_ORDERED: case OMP_CLAUSE_COLLAPSE: @@ -2327,6 +2362,7 @@ scan_sharing_clauses (tree clauses, omp_context *ctx, case OMP_CLAUSE_INDEPENDENT: case OMP_CLAUSE_AUTO: case OMP_CLAUSE_SEQ: + case OMP_CLAUSE__GRIDDIM_: break; case OMP_CLAUSE_DEVICE_RESIDENT: @@ -2648,8 +2684,11 @@ scan_omp_parallel (gimple_stmt_iterator *gsi, omp_context *outer_ctx) DECL_NAMELESS (name) = 1; TYPE_NAME (ctx->record_type) = name; TYPE_ARTIFICIAL (ctx->record_type) = 1; - create_omp_child_function (ctx, false); - gimple_omp_parallel_set_child_fn (stmt, ctx->cb.dst_fn); + if (!gimple_omp_parallel_grid_phony (stmt)) + { + create_omp_child_function (ctx, false); + gimple_omp_parallel_set_child_fn (stmt, ctx->cb.dst_fn); + } scan_sharing_clauses (gimple_omp_parallel_clauses (stmt), ctx); scan_omp (gimple_omp_body_ptr (stmt), ctx); @@ -3189,6 +3228,11 @@ check_omp_nesting_restrictions (gimple *stmt, omp_context *ctx) { tree c; + if (ctx && gimple_code (ctx->stmt) == GIMPLE_OMP_GRID_BODY) + /* GRID_BODY is an artificial construct, nesting rules will be checked in + the original copy of its contents. */ + return true; + /* No nesting of non-OpenACC STMT (that is, an OpenMP one, or a GOMP builtin) inside an OpenACC CTX. */ if (!(is_gimple_omp (stmt) @@ -3777,7 +3821,11 @@ scan_omp_1_op (tree *tp, int *walk_subtrees, void *data) case LABEL_DECL: case RESULT_DECL: if (ctx) - *tp = remap_decl (t, &ctx->cb); + { + tree repl = remap_decl (t, &ctx->cb); + gcc_checking_assert (TREE_CODE (repl) != ERROR_MARK); + *tp = repl; + } break; default: @@ -3911,6 +3959,7 @@ scan_omp_1_stmt (gimple_stmt_iterator *gsi, bool *handled_ops_p, case GIMPLE_OMP_TASKGROUP: case GIMPLE_OMP_ORDERED: case GIMPLE_OMP_CRITICAL: + case GIMPLE_OMP_GRID_BODY: ctx = new_omp_context (stmt, ctx); scan_omp (gimple_omp_body_ptr (stmt), ctx); break; @@ -6343,6 +6392,37 @@ gimple_build_cond_empty (tree cond) return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE); } +/* Return true if a parallel REGION is within a declare target function or + within a target region and is not a part of a gridified target. */ + +static bool +parallel_needs_hsa_kernel_p (struct omp_region *region) +{ + bool indirect = false; + for (region = region->outer; region; region = region->outer) + { + if (region->type == GIMPLE_OMP_PARALLEL) + indirect = true; + else if (region->type == GIMPLE_OMP_TARGET) + { + gomp_target *tgt_stmt + = as_a <gomp_target *> (last_stmt (region->entry)); + + if (find_omp_clause (gimple_omp_target_clauses (tgt_stmt), + OMP_CLAUSE__GRIDDIM_)) + return indirect; + else + return true; + } + } + + if (lookup_attribute ("omp declare target", + DECL_ATTRIBUTES (current_function_decl))) + return true; + + return false; +} + static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree, bool = false); @@ -6512,7 +6592,8 @@ expand_parallel_call (struct omp_region *region, basic_block bb, t1 = null_pointer_node; else t1 = build_fold_addr_expr (t); - t2 = build_fold_addr_expr (gimple_omp_parallel_child_fn (entry_stmt)); + tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt); + t2 = build_fold_addr_expr (child_fndecl); vec_alloc (args, 4 + vec_safe_length (ws_args)); args->quick_push (t2); @@ -6527,6 +6608,13 @@ expand_parallel_call (struct omp_region *region, basic_block bb, force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, false, GSI_CONTINUE_LINKING); + + if (hsa_gen_requested_p () + && parallel_needs_hsa_kernel_p (region)) + { + cgraph_node *child_cnode = cgraph_node::get (child_fndecl); + hsa_register_kernel (child_cnode); + } } /* Insert a function call whose name is FUNC_NAME with the information from @@ -12570,6 +12658,236 @@ mark_loops_in_oacc_kernels_region (basic_block region_entry, loop->in_oacc_kernels_region = true; } +/* Types used to pass grid and wortkgroup sizes to kernel invocation. */ + +struct GTY(()) grid_launch_attributes_trees +{ + tree kernel_dim_array_type; + tree kernel_lattrs_dimnum_decl; + tree kernel_lattrs_grid_decl; + tree kernel_lattrs_group_decl; + tree kernel_launch_attributes_type; +}; + +static GTY(()) struct grid_launch_attributes_trees *grid_attr_trees; + +/* Create types used to pass kernel launch attributes to target. */ + +static void +grid_create_kernel_launch_attr_types (void) +{ + if (grid_attr_trees) + return; + grid_attr_trees = ggc_alloc <grid_launch_attributes_trees> (); + + tree dim_arr_index_type + = build_index_type (build_int_cst (integer_type_node, 2)); + grid_attr_trees->kernel_dim_array_type + = build_array_type (uint32_type_node, dim_arr_index_type); + + grid_attr_trees->kernel_launch_attributes_type = make_node (RECORD_TYPE); + grid_attr_trees->kernel_lattrs_dimnum_decl + = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("ndim"), + uint32_type_node); + DECL_CHAIN (grid_attr_trees->kernel_lattrs_dimnum_decl) = NULL_TREE; + + grid_attr_trees->kernel_lattrs_grid_decl + = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("grid_size"), + grid_attr_trees->kernel_dim_array_type); + DECL_CHAIN (grid_attr_trees->kernel_lattrs_grid_decl) + = grid_attr_trees->kernel_lattrs_dimnum_decl; + grid_attr_trees->kernel_lattrs_group_decl + = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("group_size"), + grid_attr_trees->kernel_dim_array_type); + DECL_CHAIN (grid_attr_trees->kernel_lattrs_group_decl) + = grid_attr_trees->kernel_lattrs_grid_decl; + finish_builtin_struct (grid_attr_trees->kernel_launch_attributes_type, + "__gomp_kernel_launch_attributes", + grid_attr_trees->kernel_lattrs_group_decl, NULL_TREE); +} + +/* Insert before the current statement in GSI a store of VALUE to INDEX of + array (of type kernel_dim_array_type) FLD_DECL of RANGE_VAR. VALUE must be + of type uint32_type_node. */ + +static void +grid_insert_store_range_dim (gimple_stmt_iterator *gsi, tree range_var, + tree fld_decl, int index, tree value) +{ + tree ref = build4 (ARRAY_REF, uint32_type_node, + build3 (COMPONENT_REF, + grid_attr_trees->kernel_dim_array_type, + range_var, fld_decl, NULL_TREE), + build_int_cst (integer_type_node, index), + NULL_TREE, NULL_TREE); + gsi_insert_before (gsi, gimple_build_assign (ref, value), GSI_SAME_STMT); +} + +/* Return a tree representation of a pointer to a structure with grid and + work-group size information. Statements filling that information will be + inserted before GSI, TGT_STMT is the target statement which has the + necessary information in it. */ + +static tree +grid_get_kernel_launch_attributes (gimple_stmt_iterator *gsi, + gomp_target *tgt_stmt) +{ + grid_create_kernel_launch_attr_types (); + tree u32_one = build_one_cst (uint32_type_node); + tree lattrs = create_tmp_var (grid_attr_trees->kernel_launch_attributes_type, + "__kernel_launch_attrs"); + + unsigned max_dim = 0; + for (tree clause = gimple_omp_target_clauses (tgt_stmt); + clause; + clause = OMP_CLAUSE_CHAIN (clause)) + { + if (OMP_CLAUSE_CODE (clause) != OMP_CLAUSE__GRIDDIM_) + continue; + + unsigned dim = OMP_CLAUSE__GRIDDIM__DIMENSION (clause); + max_dim = MAX (dim, max_dim); + + grid_insert_store_range_dim (gsi, lattrs, + grid_attr_trees->kernel_lattrs_grid_decl, + dim, OMP_CLAUSE__GRIDDIM__SIZE (clause)); + grid_insert_store_range_dim (gsi, lattrs, + grid_attr_trees->kernel_lattrs_group_decl, + dim, OMP_CLAUSE__GRIDDIM__GROUP (clause)); + } + + tree dimref = build3 (COMPONENT_REF, uint32_type_node, lattrs, + grid_attr_trees->kernel_lattrs_dimnum_decl, NULL_TREE); + /* At this moment we cannot gridify a loop with a collapse clause. */ + /* TODO: Adjust when we support bigger collapse. */ + gcc_assert (max_dim == 0); + gsi_insert_before (gsi, gimple_build_assign (dimref, u32_one), GSI_SAME_STMT); + TREE_ADDRESSABLE (lattrs) = 1; + return build_fold_addr_expr (lattrs); +} + +/* Build target argument identifier from the DEVICE identifier, value + identifier ID and whether the element also has a SUBSEQUENT_PARAM. */ + +static tree +get_target_argument_identifier_1 (int device, bool subseqent_param, int id) +{ + tree t = build_int_cst (integer_type_node, device); + if (subseqent_param) + t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t, + build_int_cst (integer_type_node, + GOMP_TARGET_ARG_SUBSEQUENT_PARAM)); + t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t, + build_int_cst (integer_type_node, id)); + return t; +} + +/* Like above but return it in type that can be directly stored as an element + of the argument array. */ + +static tree +get_target_argument_identifier (int device, bool subseqent_param, int id) +{ + tree t = get_target_argument_identifier_1 (device, subseqent_param, id); + return fold_convert (ptr_type_node, t); +} + +/* Return a target argument consisting of DEVICE identifier, value identifier + ID, and the actual VALUE. */ + +static tree +get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id, + tree value) +{ + tree t = fold_build2 (LSHIFT_EXPR, integer_type_node, + fold_convert (integer_type_node, value), + build_int_cst (unsigned_type_node, + GOMP_TARGET_ARG_VALUE_SHIFT)); + t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t, + get_target_argument_identifier_1 (device, false, id)); + t = fold_convert (ptr_type_node, t); + return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT); +} + +/* If VALUE is an integer constant greater than -2^15 and smaller than 2^15, + push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it, + otherwise push an identifier (with DEVICE and ID) and the VALUE in two + arguments. */ + +static void +push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device, + int id, tree value, vec <tree> *args) +{ + if (tree_fits_shwi_p (value) + && tree_to_shwi (value) > -(1 << 15) + && tree_to_shwi (value) < (1 << 15)) + args->quick_push (get_target_argument_value (gsi, device, id, value)); + else + { + args->quick_push (get_target_argument_identifier (device, true, id)); + value = fold_convert (ptr_type_node, value); + value = force_gimple_operand_gsi (gsi, value, true, NULL, true, + GSI_SAME_STMT); + args->quick_push (value); + } +} + +/* Create an array of arguments that is then passed to GOMP_target. */ + +static tree +get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt) +{ + auto_vec <tree, 6> args; + tree clauses = gimple_omp_target_clauses (tgt_stmt); + tree t, c = find_omp_clause (clauses, OMP_CLAUSE_NUM_TEAMS); + if (c) + t = OMP_CLAUSE_NUM_TEAMS_EXPR (c); + else + t = integer_minus_one_node; + push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL, + GOMP_TARGET_ARG_NUM_TEAMS, t, &args); + + c = find_omp_clause (clauses, OMP_CLAUSE_THREAD_LIMIT); + if (c) + t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c); + else + t = integer_minus_one_node; + push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL, + GOMP_TARGET_ARG_THREAD_LIMIT, t, + &args); + + /* Add HSA-specific grid sizes, if available. */ + if (find_omp_clause (gimple_omp_target_clauses (tgt_stmt), + OMP_CLAUSE__GRIDDIM_)) + { + t = get_target_argument_identifier (GOMP_DEVICE_HSA, true, + GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES); + args.quick_push (t); + args.quick_push (grid_get_kernel_launch_attributes (gsi, tgt_stmt)); + } + + /* Produce more, perhaps device specific, arguments here. */ + + tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node, + args.length () + 1), + ".omp_target_args"); + for (unsigned i = 0; i < args.length (); i++) + { + tree ref = build4 (ARRAY_REF, ptr_type_node, argarray, + build_int_cst (integer_type_node, i), + NULL_TREE, NULL_TREE); + gsi_insert_before (gsi, gimple_build_assign (ref, args[i]), + GSI_SAME_STMT); + } + tree ref = build4 (ARRAY_REF, ptr_type_node, argarray, + build_int_cst (integer_type_node, args.length ()), + NULL_TREE, NULL_TREE); + gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node), + GSI_SAME_STMT); + TREE_ADDRESSABLE (argarray) = 1; + return build_fold_addr_expr (argarray); +} + /* Expand the GIMPLE_OMP_TARGET starting at REGION. */ static void @@ -12982,30 +13300,7 @@ expand_omp_target (struct omp_region *region) depend = build_int_cst (ptr_type_node, 0); args.quick_push (depend); if (start_ix == BUILT_IN_GOMP_TARGET) - { - c = find_omp_clause (clauses, OMP_CLAUSE_NUM_TEAMS); - if (c) - { - t = fold_convert (integer_type_node, - OMP_CLAUSE_NUM_TEAMS_EXPR (c)); - t = force_gimple_operand_gsi (&gsi, t, true, NULL, - true, GSI_SAME_STMT); - } - else - t = integer_minus_one_node; - args.quick_push (t); - c = find_omp_clause (clauses, OMP_CLAUSE_THREAD_LIMIT); - if (c) - { - t = fold_convert (integer_type_node, - OMP_CLAUSE_THREAD_LIMIT_EXPR (c)); - t = force_gimple_operand_gsi (&gsi, t, true, NULL, - true, GSI_SAME_STMT); - } - else - t = integer_minus_one_node; - args.quick_push (t); - } + args.quick_push (get_target_arguments (&gsi, entry_stmt)); break; case BUILT_IN_GOACC_PARALLEL: { @@ -13109,6 +13404,257 @@ expand_omp_target (struct omp_region *region) } } +/* Expand KFOR loop as a GPGPU kernel, i.e. as a body only with iteration + variable derived from the thread number. */ + +static void +grid_expand_omp_for_loop (struct omp_region *kfor) +{ + tree t, threadid; + tree type, itype; + gimple_stmt_iterator gsi; + tree n1, step; + struct omp_for_data fd; + + gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry)); + gcc_checking_assert (gimple_omp_for_kind (for_stmt) + == GF_OMP_FOR_KIND_GRID_LOOP); + basic_block body_bb = FALLTHRU_EDGE (kfor->entry)->dest; + + gcc_assert (gimple_omp_for_collapse (for_stmt) == 1); + gcc_assert (kfor->cont); + extract_omp_for_data (for_stmt, &fd, NULL); + + itype = type = TREE_TYPE (fd.loop.v); + if (POINTER_TYPE_P (type)) + itype = signed_type_for (type); + + gsi = gsi_start_bb (body_bb); + + n1 = fd.loop.n1; + step = fd.loop.step; + n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1), + true, NULL_TREE, true, GSI_SAME_STMT); + step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step), + true, NULL_TREE, true, GSI_SAME_STMT); + threadid = build_call_expr (builtin_decl_explicit + (BUILT_IN_OMP_GET_THREAD_NUM), 0); + threadid = fold_convert (itype, threadid); + threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE, + true, GSI_SAME_STMT); + + tree startvar = fd.loop.v; + t = fold_build2 (MULT_EXPR, itype, threadid, step); + if (POINTER_TYPE_P (type)) + t = fold_build_pointer_plus (n1, t); + else + t = fold_build2 (PLUS_EXPR, type, t, n1); + t = fold_convert (type, t); + t = force_gimple_operand_gsi (&gsi, t, + DECL_P (startvar) + && TREE_ADDRESSABLE (startvar), + NULL_TREE, true, GSI_SAME_STMT); + gassign *assign_stmt = gimple_build_assign (startvar, t); + gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); + + /* Remove the omp for statement */ + gsi = gsi_last_bb (kfor->entry); + gsi_remove (&gsi, true); + + /* Remove the GIMPLE_OMP_CONTINUE statement. */ + gsi = gsi_last_bb (kfor->cont); + gcc_assert (!gsi_end_p (gsi) + && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_CONTINUE); + gsi_remove (&gsi, true); + + /* Replace the GIMPLE_OMP_RETURN with a real return. */ + gsi = gsi_last_bb (kfor->exit); + gcc_assert (!gsi_end_p (gsi) + && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN); + gsi_remove (&gsi, true); + + /* Fixup the much simpler CFG. */ + remove_edge (find_edge (kfor->cont, body_bb)); + + if (kfor->cont != body_bb) + set_immediate_dominator (CDI_DOMINATORS, kfor->cont, body_bb); + set_immediate_dominator (CDI_DOMINATORS, kfor->exit, kfor->cont); +} + +/* Structure passed to grid_remap_kernel_arg_accesses so that it can remap + argument_decls. */ + +struct grid_arg_decl_map +{ + tree old_arg; + tree new_arg; +}; + +/* Invoked through walk_gimple_op, will remap all PARM_DECLs to the ones + pertaining to kernel function. */ + +static tree +grid_remap_kernel_arg_accesses (tree *tp, int *walk_subtrees, void *data) +{ + struct walk_stmt_info *wi = (struct walk_stmt_info *) data; + struct grid_arg_decl_map *adm = (struct grid_arg_decl_map *) wi->info; + tree t = *tp; + + if (t == adm->old_arg) + *tp = adm->new_arg; + *walk_subtrees = !TYPE_P (t) && !DECL_P (t); + return NULL_TREE; +} + +static void expand_omp (struct omp_region *region); + +/* If TARGET region contains a kernel body for loop, remove its region from the + TARGET and expand it in GPGPU kernel fashion. */ + +static void +grid_expand_target_grid_body (struct omp_region *target) +{ + if (!hsa_gen_requested_p ()) + return; + + gomp_target *tgt_stmt = as_a <gomp_target *> (last_stmt (target->entry)); + struct omp_region **pp; + + for (pp = &target->inner; *pp; pp = &(*pp)->next) + if ((*pp)->type == GIMPLE_OMP_GRID_BODY) + break; + + struct omp_region *gpukernel = *pp; + + tree orig_child_fndecl = gimple_omp_target_child_fn (tgt_stmt); + if (!gpukernel) + { + /* HSA cannot handle OACC stuff. */ + if (gimple_omp_target_kind (tgt_stmt) != GF_OMP_TARGET_KIND_REGION) + return; + gcc_checking_assert (orig_child_fndecl); + gcc_assert (!find_omp_clause (gimple_omp_target_clauses (tgt_stmt), + OMP_CLAUSE__GRIDDIM_)); + cgraph_node *n = cgraph_node::get (orig_child_fndecl); + + hsa_register_kernel (n); + return; + } + + gcc_assert (find_omp_clause (gimple_omp_target_clauses (tgt_stmt), + OMP_CLAUSE__GRIDDIM_)); + tree inside_block = gimple_block (first_stmt (single_succ (gpukernel->entry))); + *pp = gpukernel->next; + for (pp = &gpukernel->inner; *pp; pp = &(*pp)->next) + if ((*pp)->type == GIMPLE_OMP_FOR) + break; + + struct omp_region *kfor = *pp; + gcc_assert (kfor); + gcc_assert (gimple_omp_for_kind (last_stmt ((kfor)->entry)) + == GF_OMP_FOR_KIND_GRID_LOOP); + *pp = kfor->next; + if (kfor->inner) + expand_omp (kfor->inner); + if (gpukernel->inner) + expand_omp (gpukernel->inner); + + tree kern_fndecl = copy_node (orig_child_fndecl); + DECL_NAME (kern_fndecl) = clone_function_name (kern_fndecl, "kernel"); + SET_DECL_ASSEMBLER_NAME (kern_fndecl, DECL_NAME (kern_fndecl)); + tree tgtblock = gimple_block (tgt_stmt); + tree fniniblock = make_node (BLOCK); + BLOCK_ABSTRACT_ORIGIN (fniniblock) = tgtblock; + BLOCK_SOURCE_LOCATION (fniniblock) = BLOCK_SOURCE_LOCATION (tgtblock); + BLOCK_SOURCE_END_LOCATION (fniniblock) = BLOCK_SOURCE_END_LOCATION (tgtblock); + DECL_INITIAL (kern_fndecl) = fniniblock; + push_struct_function (kern_fndecl); + cfun->function_end_locus = gimple_location (tgt_stmt); + pop_cfun (); + + tree old_parm_decl = DECL_ARGUMENTS (kern_fndecl); + gcc_assert (!DECL_CHAIN (old_parm_decl)); + tree new_parm_decl = copy_node (DECL_ARGUMENTS (kern_fndecl)); + DECL_CONTEXT (new_parm_decl) = kern_fndecl; + DECL_ARGUMENTS (kern_fndecl) = new_parm_decl; + struct function *kern_cfun = DECL_STRUCT_FUNCTION (kern_fndecl); + kern_cfun->curr_properties = cfun->curr_properties; + + remove_edge (BRANCH_EDGE (kfor->entry)); + grid_expand_omp_for_loop (kfor); + + /* Remove the omp for statement */ + gimple_stmt_iterator gsi = gsi_last_bb (gpukernel->entry); + gsi_remove (&gsi, true); + /* Replace the GIMPLE_OMP_RETURN at the end of the kernel region with a real + return. */ + gsi = gsi_last_bb (gpukernel->exit); + gcc_assert (!gsi_end_p (gsi) + && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN); + gimple *ret_stmt = gimple_build_return (NULL); + gsi_insert_after (&gsi, ret_stmt, GSI_SAME_STMT); + gsi_remove (&gsi, true); + + /* Statements in the first BB in the target construct have been produced by + target lowering and must be copied inside the GPUKERNEL, with the two + exceptions of the first OMP statement and the OMP_DATA assignment + statement. */ + gsi = gsi_start_bb (single_succ (gpukernel->entry)); + tree data_arg = gimple_omp_target_data_arg (tgt_stmt); + tree sender = data_arg ? TREE_VEC_ELT (data_arg, 0) : NULL; + for (gimple_stmt_iterator tsi = gsi_start_bb (single_succ (target->entry)); + !gsi_end_p (tsi); gsi_next (&tsi)) + { + gimple *stmt = gsi_stmt (tsi); + if (is_gimple_omp (stmt)) + break; + if (sender + && is_gimple_assign (stmt) + && TREE_CODE (gimple_assign_rhs1 (stmt)) == ADDR_EXPR + && TREE_OPERAND (gimple_assign_rhs1 (stmt), 0) == sender) + continue; + gimple *copy = gimple_copy (stmt); + gsi_insert_before (&gsi, copy, GSI_SAME_STMT); + gimple_set_block (copy, fniniblock); + } + + move_sese_region_to_fn (kern_cfun, single_succ (gpukernel->entry), + gpukernel->exit, inside_block); + + cgraph_node *kcn = cgraph_node::get_create (kern_fndecl); + kcn->mark_force_output (); + cgraph_node *orig_child = cgraph_node::get (orig_child_fndecl); + + hsa_register_kernel (kcn, orig_child); + + cgraph_node::add_new_function (kern_fndecl, true); + push_cfun (kern_cfun); + cgraph_edge::rebuild_edges (); + + /* Re-map any mention of the PARM_DECL of the original function to the + PARM_DECL of the new one. + + TODO: It would be great if lowering produced references into the GPU + kernel decl straight away and we did not have to do this. */ + struct grid_arg_decl_map adm; + adm.old_arg = old_parm_decl; + adm.new_arg = new_parm_decl; + basic_block bb; + FOR_EACH_BB_FN (bb, kern_cfun) + { + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple *stmt = gsi_stmt (gsi); + struct walk_stmt_info wi; + memset (&wi, 0, sizeof (wi)); + wi.info = &adm; + walk_gimple_op (stmt, grid_remap_kernel_arg_accesses, &wi); + } + } + pop_cfun (); + + return; +} /* Expand the parallel region tree rooted at REGION. Expansion proceeds in depth-first order. Innermost regions are expanded @@ -13129,6 +13675,8 @@ expand_omp (struct omp_region *region) region. */ if (region->type == GIMPLE_OMP_PARALLEL) determine_parallel_type (region); + else if (region->type == GIMPLE_OMP_TARGET) + grid_expand_target_grid_body (region); if (region->type == GIMPLE_OMP_FOR && gimple_omp_for_combined_p (last_stmt (region->entry))) @@ -14507,11 +15055,13 @@ lower_omp_for (gimple_stmt_iterator *gsi_p, omp_context *ctx) ctx); } - gimple_seq_add_stmt (&body, stmt); + if (!gimple_omp_for_grid_phony (stmt)) + gimple_seq_add_stmt (&body, stmt); gimple_seq_add_seq (&body, gimple_omp_body (stmt)); - gimple_seq_add_stmt (&body, gimple_build_omp_continue (fd.loop.v, - fd.loop.v)); + if (!gimple_omp_for_grid_phony (stmt)) + gimple_seq_add_stmt (&body, gimple_build_omp_continue (fd.loop.v, + fd.loop.v)); /* After the loop, add exit clauses. */ lower_reduction_clauses (gimple_omp_for_clauses (stmt), &body, ctx); @@ -14523,9 +15073,12 @@ lower_omp_for (gimple_stmt_iterator *gsi_p, omp_context *ctx) body = maybe_catch_exception (body); - /* Region exit marker goes at the end of the loop body. */ - gimple_seq_add_stmt (&body, gimple_build_omp_return (fd.have_nowait)); - maybe_add_implicit_barrier_cancel (ctx, &body); + if (!gimple_omp_for_grid_phony (stmt)) + { + /* Region exit marker goes at the end of the loop body. */ + gimple_seq_add_stmt (&body, gimple_build_omp_return (fd.have_nowait)); + maybe_add_implicit_barrier_cancel (ctx, &body); + } /* Add OpenACC joining and reduction markers just after the loop. */ if (oacc_tail) @@ -14968,6 +15521,14 @@ lower_omp_taskreg (gimple_stmt_iterator *gsi_p, omp_context *ctx) par_olist = NULL; par_ilist = NULL; par_rlist = NULL; + bool phony_construct = gimple_code (stmt) == GIMPLE_OMP_PARALLEL + && gimple_omp_parallel_grid_phony (as_a <gomp_parallel *> (stmt)); + if (phony_construct && ctx->record_type) + { + gcc_checking_assert (!ctx->receiver_decl); + ctx->receiver_decl = create_tmp_var + (build_reference_type (ctx->record_type), ".omp_rec"); + } lower_rec_input_clauses (clauses, &par_ilist, &par_olist, ctx, NULL); lower_omp (&par_body, ctx); if (gimple_code (stmt) == GIMPLE_OMP_PARALLEL) @@ -15026,13 +15587,19 @@ lower_omp_taskreg (gimple_stmt_iterator *gsi_p, omp_context *ctx) gimple_seq_add_stmt (&new_body, gimple_build_omp_continue (integer_zero_node, integer_zero_node)); - gimple_seq_add_stmt (&new_body, gimple_build_omp_return (false)); - gimple_omp_set_body (stmt, new_body); + if (!phony_construct) + { + gimple_seq_add_stmt (&new_body, gimple_build_omp_return (false)); + gimple_omp_set_body (stmt, new_body); + } bind = gimple_build_bind (NULL, NULL, gimple_bind_block (par_bind)); gsi_replace (gsi_p, dep_bind ? dep_bind : bind, true); gimple_bind_add_seq (bind, ilist); - gimple_bind_add_stmt (bind, stmt); + if (!phony_construct) + gimple_bind_add_stmt (bind, stmt); + else + gimple_bind_add_seq (bind, new_body); gimple_bind_add_seq (bind, olist); pop_gimplify_context (NULL); @@ -16165,19 +16732,22 @@ lower_omp_teams (gimple_stmt_iterator *gsi_p, omp_context *ctx) &bind_body, &dlist, ctx, NULL); lower_omp (gimple_omp_body_ptr (teams_stmt), ctx); lower_reduction_clauses (gimple_omp_teams_clauses (teams_stmt), &olist, ctx); - gimple_seq_add_stmt (&bind_body, teams_stmt); - - location_t loc = gimple_location (teams_stmt); - tree decl = builtin_decl_explicit (BUILT_IN_GOMP_TEAMS); - gimple *call = gimple_build_call (decl, 2, num_teams, thread_limit); - gimple_set_location (call, loc); - gimple_seq_add_stmt (&bind_body, call); + if (!gimple_omp_teams_grid_phony (teams_stmt)) + { + gimple_seq_add_stmt (&bind_body, teams_stmt); + location_t loc = gimple_location (teams_stmt); + tree decl = builtin_decl_explicit (BUILT_IN_GOMP_TEAMS); + gimple *call = gimple_build_call (decl, 2, num_teams, thread_limit); + gimple_set_location (call, loc); + gimple_seq_add_stmt (&bind_body, call); + } gimple_seq_add_seq (&bind_body, gimple_omp_body (teams_stmt)); gimple_omp_set_body (teams_stmt, NULL); gimple_seq_add_seq (&bind_body, olist); gimple_seq_add_seq (&bind_body, dlist); - gimple_seq_add_stmt (&bind_body, gimple_build_omp_return (true)); + if (!gimple_omp_teams_grid_phony (teams_stmt)) + gimple_seq_add_stmt (&bind_body, gimple_build_omp_return (true)); gimple_bind_set_body (bind, bind_body); pop_gimplify_context (bind); @@ -16188,6 +16758,17 @@ lower_omp_teams (gimple_stmt_iterator *gsi_p, omp_context *ctx) TREE_USED (block) = 1; } +/* Expand code within an artificial GIMPLE_OMP_GRID_BODY OMP construct. */ + +static void +lower_omp_grid_body (gimple_stmt_iterator *gsi_p, omp_context *ctx) +{ + gimple *stmt = gsi_stmt (*gsi_p); + lower_omp (gimple_omp_body_ptr (stmt), ctx); + gimple_seq_add_stmt (gimple_omp_body_ptr (stmt), + gimple_build_omp_return (false)); +} + /* Callback for lower_omp_1. Return non-NULL if *tp needs to be regimplified. If DATA is non-NULL, lower_omp_1 is outside @@ -16399,6 +16980,11 @@ lower_omp_1 (gimple_stmt_iterator *gsi_p, omp_context *ctx) gcc_assert (ctx); lower_omp_teams (gsi_p, ctx); break; + case GIMPLE_OMP_GRID_BODY: + ctx = maybe_lookup_ctx (stmt); + gcc_assert (ctx); + lower_omp_grid_body (gsi_p, ctx); + break; case GIMPLE_CALL: tree fndecl; call_stmt = as_a <gcall *> (stmt); @@ -16488,7 +17074,682 @@ lower_omp (gimple_seq *body, omp_context *ctx) fold_stmt (&gsi); input_location = saved_location; } + +/* Returen true if STMT is an assignment of a register-type into a local + VAR_DECL. */ + +static bool +grid_reg_assignment_to_local_var_p (gimple *stmt) +{ + gassign *assign = dyn_cast <gassign *> (stmt); + if (!assign) + return false; + tree lhs = gimple_assign_lhs (assign); + if (TREE_CODE (lhs) != VAR_DECL + || !is_gimple_reg_type (TREE_TYPE (lhs)) + || is_global_var (lhs)) + return false; + return true; +} + +/* Return true if all statements in SEQ are assignments to local register-type + variables. */ + +static bool +grid_seq_only_contains_local_assignments (gimple_seq seq) +{ + if (!seq) + return true; + + gimple_stmt_iterator gsi; + for (gsi = gsi_start (seq); !gsi_end_p (gsi); gsi_next (&gsi)) + if (!grid_reg_assignment_to_local_var_p (gsi_stmt (gsi))) + return false; + return true; +} + +/* Scan statements in SEQ and call itself recursively on any bind. If during + whole search only assignments to register-type local variables and one + single OMP statement is encountered, return true, otherwise return false. + RET is where we store any OMP statement encountered. TARGET_LOC and NAME + are used for dumping a note about a failure. */ + +static bool +grid_find_single_omp_among_assignments_1 (gimple_seq seq, location_t target_loc, + const char *name, gimple **ret) +{ + gimple_stmt_iterator gsi; + for (gsi = gsi_start (seq); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple *stmt = gsi_stmt (gsi); + + if (grid_reg_assignment_to_local_var_p (stmt)) + continue; + if (gbind *bind = dyn_cast <gbind *> (stmt)) + { + if (!grid_find_single_omp_among_assignments_1 (gimple_bind_body (bind), + target_loc, name, ret)) + return false; + } + else if (is_gimple_omp (stmt)) + { + if (*ret) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, target_loc, + "Will not turn target construct into a simple " + "GPGPU kernel because %s construct contains " + "multiple OpenMP constructs\n", name); + return false; + } + *ret = stmt; + } + else + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, target_loc, + "Will not turn target construct into a simple " + "GPGPU kernel because %s construct contains " + "a complex statement\n", name); + return false; + } + } + return true; +} + +/* Scan statements in SEQ and make sure that it and any binds in it contain + only assignments to local register-type variables and one OMP construct. If + so, return that construct, otherwise return NULL. If dumping is enabled and + function fails, use TARGET_LOC and NAME to dump a note with the reason for + failure. */ + +static gimple * +grid_find_single_omp_among_assignments (gimple_seq seq, location_t target_loc, + const char *name) +{ + if (!seq) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, target_loc, + "Will not turn target construct into a simple " + "GPGPU kernel because %s construct has empty " + "body\n", + name); + return NULL; + } + + gimple *ret = NULL; + if (grid_find_single_omp_among_assignments_1 (seq, target_loc, name, &ret)) + { + if (!ret && dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, target_loc, + "Will not turn target construct into a simple " + "GPGPU kernel because %s construct does not contain" + "any other OpenMP construct\n", name); + return ret; + } + else + return NULL; +} + +/* Walker function looking for statements there is no point gridifying (and for + noreturn function calls which we cannot do). Return non-NULL if such a + function is found. */ + +static tree +grid_find_ungridifiable_statement (gimple_stmt_iterator *gsi, + bool *handled_ops_p, + struct walk_stmt_info *) +{ + *handled_ops_p = false; + gimple *stmt = gsi_stmt (*gsi); + switch (gimple_code (stmt)) + { + case GIMPLE_CALL: + if (gimple_call_noreturn_p (as_a <gcall *> (stmt))) + { + *handled_ops_p = true; + return error_mark_node; + } + break; + + /* We may reduce the following list if we find a way to implement the + clauses, but now there is no point trying further. */ + case GIMPLE_OMP_CRITICAL: + case GIMPLE_OMP_TASKGROUP: + case GIMPLE_OMP_TASK: + case GIMPLE_OMP_SECTION: + case GIMPLE_OMP_SECTIONS: + case GIMPLE_OMP_SECTIONS_SWITCH: + case GIMPLE_OMP_TARGET: + case GIMPLE_OMP_ORDERED: + *handled_ops_p = true; + return error_mark_node; + + default: + break; + } + return NULL; +} + + +/* If TARGET follows a pattern that can be turned into a gridified GPGPU + kernel, return true, otherwise return false. In the case of success, also + fill in GROUP_SIZE_P with the requested group size or NULL if there is + none. */ + +static bool +grid_target_follows_gridifiable_pattern (gomp_target *target, tree *group_size_p) +{ + if (gimple_omp_target_kind (target) != GF_OMP_TARGET_KIND_REGION) + return false; + + location_t tloc = gimple_location (target); + gimple *stmt + = grid_find_single_omp_among_assignments (gimple_omp_body (target), + tloc, "target"); + if (!stmt) + return false; + gomp_teams *teams = dyn_cast <gomp_teams *> (stmt); + tree group_size = NULL; + if (!teams) + { + dump_printf_loc (MSG_NOTE, tloc, + "Will not turn target construct into a simple " + "GPGPU kernel because it does not have a sole teams " + "construct in it.\n"); + return false; + } + + tree clauses = gimple_omp_teams_clauses (teams); + while (clauses) + { + switch (OMP_CLAUSE_CODE (clauses)) + { + case OMP_CLAUSE_NUM_TEAMS: + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, tloc, + "Will not turn target construct into a " + "gridified GPGPU kernel because we cannot " + "handle num_teams clause of teams " + "construct\n "); + return false; + + case OMP_CLAUSE_REDUCTION: + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, tloc, + "Will not turn target construct into a " + "gridified GPGPU kernel because a reduction " + "clause is present\n "); + return false; + + case OMP_CLAUSE_LASTPRIVATE: + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, tloc, + "Will not turn target construct into a " + "gridified GPGPU kernel because a lastprivate " + "clause is present\n "); + return false; + + case OMP_CLAUSE_THREAD_LIMIT: + group_size = OMP_CLAUSE_OPERAND (clauses, 0); + break; + + default: + break; + } + clauses = OMP_CLAUSE_CHAIN (clauses); + } + + stmt = grid_find_single_omp_among_assignments (gimple_omp_body (teams), tloc, + "teams"); + if (!stmt) + return false; + gomp_for *dist = dyn_cast <gomp_for *> (stmt); + if (!dist) + { + dump_printf_loc (MSG_NOTE, tloc, + "Will not turn target construct into a simple " + "GPGPU kernel because the teams construct does not have " + "a sole distribute construct in it.\n"); + return false; + } + + gcc_assert (gimple_omp_for_kind (dist) == GF_OMP_FOR_KIND_DISTRIBUTE); + if (!gimple_omp_for_combined_p (dist)) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, tloc, + "Will not turn target construct into a gridified GPGPU " + "kernel because we cannot handle a standalone " + "distribute construct\n "); + return false; + } + if (dist->collapse > 1) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, tloc, + "Will not turn target construct into a gridified GPGPU " + "kernel because the distribute construct contains " + "collapse clause\n"); + return false; + } + struct omp_for_data fd; + extract_omp_for_data (dist, &fd, NULL); + if (fd.chunk_size) + { + if (group_size && !operand_equal_p (group_size, fd.chunk_size, 0)) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, tloc, + "Will not turn target construct into a " + "gridified GPGPU kernel because the teams " + "thread limit is different from distribute " + "schedule chunk\n"); + return false; + } + group_size = fd.chunk_size; + } + stmt = grid_find_single_omp_among_assignments (gimple_omp_body (dist), tloc, + "distribute"); + gomp_parallel *par; + if (!stmt || !(par = dyn_cast <gomp_parallel *> (stmt))) + return false; + + clauses = gimple_omp_parallel_clauses (par); + while (clauses) + { + switch (OMP_CLAUSE_CODE (clauses)) + { + case OMP_CLAUSE_NUM_THREADS: + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, tloc, + "Will not turn target construct into a gridified" + "GPGPU kernel because there is a num_threads " + "clause of the parallel construct\n"); + return false; + + case OMP_CLAUSE_REDUCTION: + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, tloc, + "Will not turn target construct into a " + "gridified GPGPU kernel because a reduction " + "clause is present\n "); + return false; + + case OMP_CLAUSE_LASTPRIVATE: + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, tloc, + "Will not turn target construct into a " + "gridified GPGPU kernel because a lastprivate " + "clause is present\n "); + return false; + + default: + break; + } + clauses = OMP_CLAUSE_CHAIN (clauses); + } + + stmt = grid_find_single_omp_among_assignments (gimple_omp_body (par), tloc, + "parallel"); + gomp_for *gfor; + if (!stmt || !(gfor = dyn_cast <gomp_for *> (stmt))) + return false; + + if (gimple_omp_for_kind (gfor) != GF_OMP_FOR_KIND_FOR) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, tloc, + "Will not turn target construct into a gridified GPGPU " + "kernel because the inner loop is not a simple for " + "loop\n"); + return false; + } + if (gfor->collapse > 1) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, tloc, + "Will not turn target construct into a gridified GPGPU " + "kernel because the inner loop contains collapse " + "clause\n"); + return false; + } + + if (!grid_seq_only_contains_local_assignments (gimple_omp_for_pre_body (gfor))) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, tloc, + "Will not turn target construct into a gridified GPGPU " + "kernel because the inner loop pre_body contains" + "a complex instruction\n"); + return false; + } + + clauses = gimple_omp_for_clauses (gfor); + while (clauses) + { + switch (OMP_CLAUSE_CODE (clauses)) + { + case OMP_CLAUSE_SCHEDULE: + if (OMP_CLAUSE_SCHEDULE_KIND (clauses) != OMP_CLAUSE_SCHEDULE_AUTO) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, tloc, + "Will not turn target construct into a " + "gridified GPGPU kernel because the inner " + "loop has a non-automatic scheduling clause\n"); + return false; + } + break; + + case OMP_CLAUSE_REDUCTION: + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, tloc, + "Will not turn target construct into a " + "gridified GPGPU kernel because a reduction " + "clause is present\n "); + return false; + + case OMP_CLAUSE_LASTPRIVATE: + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, tloc, + "Will not turn target construct into a " + "gridified GPGPU kernel because a lastprivate " + "clause is present\n "); + return false; + + default: + break; + } + clauses = OMP_CLAUSE_CHAIN (clauses); + } + + struct walk_stmt_info wi; + memset (&wi, 0, sizeof (wi)); + if (gimple *bad = walk_gimple_seq (gimple_omp_body (gfor), + grid_find_ungridifiable_statement, + NULL, &wi)) + { + if (dump_enabled_p ()) + { + if (is_gimple_call (bad)) + dump_printf_loc (MSG_NOTE, tloc, + "Will not turn target construct into a gridified " + " GPGPU kernel because the inner loop contains " + "call to a noreturn function\n"); + else + dump_printf_loc (MSG_NOTE, tloc, + "Will not turn target construct into a gridified " + "GPGPU kernel because the inner loop contains " + "statement %s which cannot be transformed\n", + gimple_code_name[(int) gimple_code (bad)]); + } + return false; + } + + *group_size_p = group_size; + return true; +} + +/* Operand walker, used to remap pre-body declarations according to a hash map + provided in DATA. */ + +static tree +grid_remap_prebody_decls (tree *tp, int *walk_subtrees, void *data) +{ + tree t = *tp; + + if (DECL_P (t) || TYPE_P (t)) + *walk_subtrees = 0; + else + *walk_subtrees = 1; + + if (TREE_CODE (t) == VAR_DECL) + { + struct walk_stmt_info *wi = (struct walk_stmt_info *) data; + hash_map<tree, tree> *declmap = (hash_map<tree, tree> *) wi->info; + tree *repl = declmap->get (t); + if (repl) + *tp = *repl; + } + return NULL_TREE; +} + +/* Copy leading register-type assignments to local variables in SRC to just + before DST, Creating temporaries, adjusting mapping of operands in WI and + remapping operands as necessary. Add any new temporaries to TGT_BIND. + Return the first statement that does not conform to + grid_reg_assignment_to_local_var_p or NULL. */ + +static gimple * +grid_copy_leading_local_assignments (gimple_seq src, gimple_stmt_iterator *dst, + gbind *tgt_bind, struct walk_stmt_info *wi) +{ + hash_map<tree, tree> *declmap = (hash_map<tree, tree> *) wi->info; + gimple_stmt_iterator gsi; + for (gsi = gsi_start (src); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple *stmt = gsi_stmt (gsi); + if (gbind *bind = dyn_cast <gbind *> (stmt)) + { + gimple *r = grid_copy_leading_local_assignments + (gimple_bind_body (bind), dst, tgt_bind, wi); + if (r) + return r; + else + continue; + } + if (!grid_reg_assignment_to_local_var_p (stmt)) + return stmt; + tree lhs = gimple_assign_lhs (as_a <gassign *> (stmt)); + tree repl = copy_var_decl (lhs, create_tmp_var_name (NULL), + TREE_TYPE (lhs)); + DECL_CONTEXT (repl) = current_function_decl; + gimple_bind_append_vars (tgt_bind, repl); + + declmap->put (lhs, repl); + gassign *copy = as_a <gassign *> (gimple_copy (stmt)); + walk_gimple_op (copy, grid_remap_prebody_decls, wi); + gsi_insert_before (dst, copy, GSI_SAME_STMT); + } + return NULL; +} + +/* Given freshly copied top level kernel SEQ, identify the individual OMP + components, mark them as part of kernel and return the inner loop, and copy + assignment leading to them just before DST, remapping them using WI and + adding new temporaries to TGT_BIND. */ + +static gomp_for * +grid_process_kernel_body_copy (gimple_seq seq, gimple_stmt_iterator *dst, + gbind *tgt_bind, struct walk_stmt_info *wi) +{ + gimple *stmt = grid_copy_leading_local_assignments (seq, dst, tgt_bind, wi); + gomp_teams *teams = dyn_cast <gomp_teams *> (stmt); + gcc_assert (teams); + gimple_omp_teams_set_grid_phony (teams, true); + stmt = grid_copy_leading_local_assignments (gimple_omp_body (teams), dst, + tgt_bind, wi); + gcc_checking_assert (stmt); + gomp_for *dist = dyn_cast <gomp_for *> (stmt); + gcc_assert (dist); + gimple_seq prebody = gimple_omp_for_pre_body (dist); + if (prebody) + grid_copy_leading_local_assignments (prebody, dst, tgt_bind, wi); + gimple_omp_for_set_grid_phony (dist, true); + stmt = grid_copy_leading_local_assignments (gimple_omp_body (dist), dst, + tgt_bind, wi); + gcc_checking_assert (stmt); + + gomp_parallel *parallel = as_a <gomp_parallel *> (stmt); + gimple_omp_parallel_set_grid_phony (parallel, true); + stmt = grid_copy_leading_local_assignments (gimple_omp_body (parallel), dst, + tgt_bind, wi); + gomp_for *inner_loop = as_a <gomp_for *> (stmt); + gimple_omp_for_set_kind (inner_loop, GF_OMP_FOR_KIND_GRID_LOOP); + prebody = gimple_omp_for_pre_body (inner_loop); + if (prebody) + grid_copy_leading_local_assignments (prebody, dst, tgt_bind, wi); + + return inner_loop; +} + +/* If TARGET points to a GOMP_TARGET which follows a gridifiable pattern, + create a GPU kernel for it. GSI must point to the same statement, TGT_BIND + is the bind into which temporaries inserted before TARGET should be + added. */ + +static void +grid_attempt_target_gridification (gomp_target *target, + gimple_stmt_iterator *gsi, + gbind *tgt_bind) +{ + tree group_size; + if (!target || !grid_target_follows_gridifiable_pattern (target, &group_size)) + return; + + location_t loc = gimple_location (target); + if (dump_enabled_p ()) + dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loc, + "Target construct will be turned into a gridified GPGPU " + "kernel\n"); + + /* Copy target body to a GPUKERNEL construct: */ + gimple_seq kernel_seq = copy_gimple_seq_and_replace_locals + (gimple_omp_body (target)); + + hash_map<tree, tree> *declmap = new hash_map<tree, tree>; + struct walk_stmt_info wi; + memset (&wi, 0, sizeof (struct walk_stmt_info)); + wi.info = declmap; + + /* Copy assignments in between OMP statements before target, mark OMP + statements within copy appropriatly. */ + gomp_for *inner_loop = grid_process_kernel_body_copy (kernel_seq, gsi, + tgt_bind, &wi); + + gbind *old_bind = as_a <gbind *> (gimple_seq_first (gimple_omp_body (target))); + gbind *new_bind = as_a <gbind *> (gimple_seq_first (kernel_seq)); + tree new_block = gimple_bind_block (new_bind); + tree enc_block = BLOCK_SUPERCONTEXT (gimple_bind_block (old_bind)); + BLOCK_CHAIN (new_block) = BLOCK_SUBBLOCKS (enc_block); + BLOCK_SUBBLOCKS (enc_block) = new_block; + BLOCK_SUPERCONTEXT (new_block) = enc_block; + gimple *gpukernel = gimple_build_omp_grid_body (kernel_seq); + gimple_seq_add_stmt + (gimple_bind_body_ptr (as_a <gbind *> (gimple_omp_body (target))), + gpukernel); + + walk_tree (&group_size, grid_remap_prebody_decls, &wi, NULL); + push_gimplify_context (); + size_t collapse = gimple_omp_for_collapse (inner_loop); + for (size_t i = 0; i < collapse; i++) + { + tree itype, type = TREE_TYPE (gimple_omp_for_index (inner_loop, i)); + if (POINTER_TYPE_P (type)) + itype = signed_type_for (type); + else + itype = type; + + enum tree_code cond_code = gimple_omp_for_cond (inner_loop, i); + tree n1 = unshare_expr (gimple_omp_for_initial (inner_loop, i)); + walk_tree (&n1, grid_remap_prebody_decls, &wi, NULL); + tree n2 = unshare_expr (gimple_omp_for_final (inner_loop, i)); + walk_tree (&n2, grid_remap_prebody_decls, &wi, NULL); + adjust_for_condition (loc, &cond_code, &n2); + tree step; + step = get_omp_for_step_from_incr (loc, + gimple_omp_for_incr (inner_loop, i)); + gimple_seq tmpseq = NULL; + n1 = fold_convert (itype, n1); + n2 = fold_convert (itype, n2); + tree t = build_int_cst (itype, (cond_code == LT_EXPR ? -1 : 1)); + t = fold_build2 (PLUS_EXPR, itype, step, t); + t = fold_build2 (PLUS_EXPR, itype, t, n2); + t = fold_build2 (MINUS_EXPR, itype, t, n1); + if (TYPE_UNSIGNED (itype) && cond_code == GT_EXPR) + t = fold_build2 (TRUNC_DIV_EXPR, itype, + fold_build1 (NEGATE_EXPR, itype, t), + fold_build1 (NEGATE_EXPR, itype, step)); + else + t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step); + tree gs = fold_convert (uint32_type_node, t); + gimplify_expr (&gs, &tmpseq, NULL, is_gimple_val, fb_rvalue); + if (!gimple_seq_empty_p (tmpseq)) + gsi_insert_seq_before (gsi, tmpseq, GSI_SAME_STMT); + + tree ws; + if (i == 0 && group_size) + { + ws = fold_convert (uint32_type_node, group_size); + tmpseq = NULL; + gimplify_expr (&ws, &tmpseq, NULL, is_gimple_val, fb_rvalue); + if (!gimple_seq_empty_p (tmpseq)) + gsi_insert_seq_before (gsi, tmpseq, GSI_SAME_STMT); + } + else + ws = build_zero_cst (uint32_type_node); + + tree c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE__GRIDDIM_); + OMP_CLAUSE__GRIDDIM__DIMENSION (c) = i; + OMP_CLAUSE__GRIDDIM__SIZE (c) = gs; + OMP_CLAUSE__GRIDDIM__GROUP (c) = ws; + OMP_CLAUSE_CHAIN (c) = gimple_omp_target_clauses (target); + gimple_omp_target_set_clauses (target, c); + } + pop_gimplify_context (tgt_bind); + delete declmap; + return; +} + +/* Walker function doing all the work for create_target_kernels. */ + +static tree +grid_gridify_all_targets_stmt (gimple_stmt_iterator *gsi, + bool *handled_ops_p, + struct walk_stmt_info *incoming) +{ + *handled_ops_p = false; + + gimple *stmt = gsi_stmt (*gsi); + gomp_target *target = dyn_cast <gomp_target *> (stmt); + if (target) + { + gbind *tgt_bind = (gbind *) incoming->info; + gcc_checking_assert (tgt_bind); + grid_attempt_target_gridification (target, gsi, tgt_bind); + return NULL_TREE; + } + gbind *bind = dyn_cast <gbind *> (stmt); + if (bind) + { + *handled_ops_p = true; + struct walk_stmt_info wi; + memset (&wi, 0, sizeof (wi)); + wi.info = bind; + walk_gimple_seq_mod (gimple_bind_body_ptr (bind), + grid_gridify_all_targets_stmt, NULL, &wi); + } + return NULL_TREE; +} + +/* Attempt to gridify all target constructs in BODY_P. All such targets will + have their bodies duplicated, with the new copy being put into a + gimple_omp_grid_body statement. All kernel-related construct within the + grid_body will be marked with phony flags or kernel kinds. Moreover, some + re-structuring is often needed, such as copying pre-bodies before the target + construct so that kernel grid sizes can be computed. */ + +static void +grid_gridify_all_targets (gimple_seq *body_p) +{ + struct walk_stmt_info wi; + memset (&wi, 0, sizeof (wi)); + walk_gimple_seq_mod (body_p, grid_gridify_all_targets_stmt, NULL, &wi); +} + /* Main entry point. */ static unsigned int @@ -16508,6 +17769,10 @@ execute_lower_omp (void) delete_omp_context); body = gimple_body (current_function_decl); + + if (hsa_gen_requested_p ()) + grid_gridify_all_targets (&body); + scan_omp (&body, NULL); gcc_assert (taskreg_nesting_level == 0); FOR_EACH_VEC_ELT (taskreg_contexts, i, ctx) @@ -16845,6 +18110,7 @@ make_gimple_omp_edges (basic_block bb, struct omp_region **region, case GIMPLE_OMP_TASKGROUP: case GIMPLE_OMP_CRITICAL: case GIMPLE_OMP_SECTION: + case GIMPLE_OMP_GRID_BODY: cur_region = new_omp_region (bb, code, cur_region); fallthru = true; break; @@ -1916,8 +1916,35 @@ common_handle_option (struct gcc_options *opts, break; case OPT_foffload_: - /* Deferred. */ - break; + { + const char *p = arg; + opts->x_flag_disable_hsa = true; + while (*p != 0) + { + const char *comma = strchr (p, ','); + + if ((strncmp (p, "disable", 7) == 0) + && (p[7] == ',' || p[7] == '\0')) + { + opts->x_flag_disable_hsa = true; + break; + } + + if ((strncmp (p, "hsa", 3) == 0) + && (p[3] == ',' || p[3] == '\0')) + { +#ifdef ENABLE_HSA + opts->x_flag_disable_hsa = false; +#else + sorry ("HSA has not been enabled during configuration"); +#endif + } + if (!comma) + break; + p = comma + 1; + } + break; + } #ifndef ACCEL_COMPILER case OPT_foffload_abi_: diff --git a/gcc/params.def b/gcc/params.def index 308844a..88971c7 100644 --- a/gcc/params.def +++ b/gcc/params.def @@ -1183,6 +1183,11 @@ DEFPARAM (PARAM_MAX_RTL_IF_CONVERSION_INSNS, "Maximum number of insns in a basic block to consider for RTL " "if-conversion.", 10, 0, 99) + +DEFPARAM (PARAM_HSA_GEN_DEBUG_STORES, + "hsa-gen-debug-stores", + "Level of hsa debug stores verbosity", + 0, 0, 1) /* Local variables: diff --git a/gcc/passes.def b/gcc/passes.def index ab6e083..a6dae76 100644 --- a/gcc/passes.def +++ b/gcc/passes.def @@ -151,6 +151,7 @@ along with GCC; see the file COPYING3. If not see NEXT_PASS (pass_ipa_cp); NEXT_PASS (pass_ipa_cdtor_merge); NEXT_PASS (pass_target_clone); + NEXT_PASS (pass_ipa_hsa); NEXT_PASS (pass_ipa_inline); NEXT_PASS (pass_ipa_pure_const); NEXT_PASS (pass_ipa_reference); @@ -386,6 +387,7 @@ along with GCC; see the file COPYING3. If not see NEXT_PASS (pass_nrv); NEXT_PASS (pass_cleanup_cfg_post_optimizing); NEXT_PASS (pass_warn_function_noreturn); + NEXT_PASS (pass_gen_hsail); NEXT_PASS (pass_expand); diff --git a/gcc/timevar.def b/gcc/timevar.def index 2765179..d9a5066 100644 --- a/gcc/timevar.def +++ b/gcc/timevar.def @@ -97,6 +97,7 @@ DEFTIMEVAR (TV_WHOPR_WPA_IO , "whopr wpa I/O") DEFTIMEVAR (TV_WHOPR_PARTITIONING , "whopr partitioning") DEFTIMEVAR (TV_WHOPR_LTRANS , "whopr ltrans") DEFTIMEVAR (TV_IPA_REFERENCE , "ipa reference") +DEFTIMEVAR (TV_IPA_HSA , "ipa HSA") DEFTIMEVAR (TV_IPA_PROFILE , "ipa profile") DEFTIMEVAR (TV_IPA_AUTOFDO , "auto profile") DEFTIMEVAR (TV_IPA_PURE_CONST , "ipa pure const") diff --git a/gcc/toplev.c b/gcc/toplev.c index 8bab3e5..b754e5b 100644 --- a/gcc/toplev.c +++ b/gcc/toplev.c @@ -75,6 +75,7 @@ along with GCC; see the file COPYING3. If not see #include "gcse.h" #include "tree-chkp.h" #include "omp-low.h" +#include "hsa.h" #if defined(DBX_DEBUGGING_INFO) || defined(XCOFF_DEBUGGING_INFO) #include "dbxout.h" @@ -518,6 +519,8 @@ compile_file (void) omp_finish_file (); + hsa_output_brig (); + output_shared_constant_pool (); output_object_blocks (); finish_tm_clone_pairs (); diff --git a/gcc/tree-core.h b/gcc/tree-core.h index f78cc7b..41c1a9b 100644 --- a/gcc/tree-core.h +++ b/gcc/tree-core.h @@ -458,7 +458,11 @@ enum omp_clause_code { OMP_CLAUSE_VECTOR_LENGTH, /* OpenACC clause: tile ( size-expr-list ). */ - OMP_CLAUSE_TILE + OMP_CLAUSE_TILE, + + /* OpenMP internal-only clause to specify grid dimensions of a gridified + kernel. */ + OMP_CLAUSE__GRIDDIM_ }; #undef DEFTREESTRUCT @@ -1375,6 +1379,9 @@ struct GTY(()) tree_omp_clause { enum tree_code reduction_code; enum omp_clause_linear_kind linear_kind; enum tree_code if_modifier; + /* The dimension a OMP_CLAUSE__GRIDDIM_ clause of a gridified target + construct describes. */ + unsigned int dimension; } GTY ((skip)) subcode; /* The gimplification of OMP_CLAUSE_REDUCTION_{INIT,MERGE} for omp-low's diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h index e8e8e48..b942a01 100644 --- a/gcc/tree-pass.h +++ b/gcc/tree-pass.h @@ -471,6 +471,7 @@ extern gimple_opt_pass *make_pass_sanopt (gcc::context *ctxt); extern gimple_opt_pass *make_pass_oacc_kernels (gcc::context *ctxt); extern simple_ipa_opt_pass *make_pass_ipa_oacc (gcc::context *ctxt); extern simple_ipa_opt_pass *make_pass_ipa_oacc_kernels (gcc::context *ctxt); +extern gimple_opt_pass *make_pass_gen_hsail (gcc::context *ctxt); /* IPA Passes */ extern simple_ipa_opt_pass *make_pass_ipa_lower_emutls (gcc::context *ctxt); @@ -495,6 +496,7 @@ extern ipa_opt_pass_d *make_pass_ipa_cp (gcc::context *ctxt); extern ipa_opt_pass_d *make_pass_ipa_icf (gcc::context *ctxt); extern ipa_opt_pass_d *make_pass_ipa_devirt (gcc::context *ctxt); extern ipa_opt_pass_d *make_pass_ipa_reference (gcc::context *ctxt); +extern ipa_opt_pass_d *make_pass_ipa_hsa (gcc::context *ctxt); extern ipa_opt_pass_d *make_pass_ipa_pure_const (gcc::context *ctxt); extern simple_ipa_opt_pass *make_pass_ipa_pta (gcc::context *ctxt); extern simple_ipa_opt_pass *make_pass_ipa_tm (gcc::context *ctxt); diff --git a/gcc/tree-pretty-print.c b/gcc/tree-pretty-print.c index 4488973..9c13d84 100644 --- a/gcc/tree-pretty-print.c +++ b/gcc/tree-pretty-print.c @@ -942,6 +942,18 @@ dump_omp_clause (pretty_printer *pp, tree clause, int spc, int flags) pp_right_paren (pp); break; + case OMP_CLAUSE__GRIDDIM_: + pp_string (pp, "_griddim_("); + pp_unsigned_wide_integer (pp, OMP_CLAUSE__GRIDDIM__DIMENSION (clause)); + pp_colon (pp); + dump_generic_node (pp, OMP_CLAUSE__GRIDDIM__SIZE (clause), spc, flags, + false); + pp_comma (pp); + dump_generic_node (pp, OMP_CLAUSE__GRIDDIM__GROUP (clause), spc, flags, + false); + pp_right_paren (pp); + break; + default: /* Should never happen. */ dump_generic_node (pp, clause, spc, flags, false); @@ -328,6 +328,7 @@ unsigned const char omp_clause_num_ops[] = 1, /* OMP_CLAUSE_NUM_WORKERS */ 1, /* OMP_CLAUSE_VECTOR_LENGTH */ 1, /* OMP_CLAUSE_TILE */ + 2, /* OMP_CLAUSE__GRIDDIM_ */ }; const char * const omp_clause_code_name[] = @@ -398,7 +399,8 @@ const char * const omp_clause_code_name[] = "num_gangs", "num_workers", "vector_length", - "tile" + "tile", + "_griddim_" }; @@ -11744,6 +11746,7 @@ walk_tree_1 (tree *tp, walk_tree_fn func, void *data, switch (OMP_CLAUSE_CODE (*tp)) { case OMP_CLAUSE_GANG: + case OMP_CLAUSE__GRIDDIM_: WALK_SUBTREE (OMP_CLAUSE_OPERAND (*tp, 1)); /* FALLTHRU */ @@ -1636,6 +1636,14 @@ extern void protected_set_expr_location (tree, location_t); #define OMP_CLAUSE_TILE_LIST(NODE) \ OMP_CLAUSE_OPERAND (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_TILE), 0) +#define OMP_CLAUSE__GRIDDIM__DIMENSION(NODE) \ + (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE__GRIDDIM_)\ + ->omp_clause.subcode.dimension) +#define OMP_CLAUSE__GRIDDIM__SIZE(NODE) \ + OMP_CLAUSE_OPERAND (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE__GRIDDIM_), 0) +#define OMP_CLAUSE__GRIDDIM__GROUP(NODE) \ + OMP_CLAUSE_OPERAND (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE__GRIDDIM_), 1) + /* SSA_NAME accessors. */ /* Returns the IDENTIFIER_NODE giving the SSA name a name or NULL_TREE diff --git a/include/ChangeLog b/include/ChangeLog index adab9ee..51736f7 100644 --- a/include/ChangeLog +++ b/include/ChangeLog @@ -1,3 +1,16 @@ +2016-01-19 Martin Jambor <mjambor@suse.cz> + + * gomp-constants.h (GOMP_DEVICE_HSA): New macro. + (GOMP_VERSION_HSA): Likewise. + (GOMP_TARGET_ARG_DEVICE_MASK): Likewise. + (GOMP_TARGET_ARG_DEVICE_ALL): Likewise. + (GOMP_TARGET_ARG_SUBSEQUENT_PARAM): Likewise. + (GOMP_TARGET_ARG_ID_MASK): Likewise. + (GOMP_TARGET_ARG_NUM_TEAMS): Likewise. + (GOMP_TARGET_ARG_THREAD_LIMIT): Likewise. + (GOMP_TARGET_ARG_VALUE_SHIFT): Likewise. + (GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES): Likewise. + 2016-01-07 Mike Frysinger <vapier@gentoo.org> * longlong.h: Change !__SHMEDIA__ to diff --git a/include/gomp-constants.h b/include/gomp-constants.h index dffd631..a8e7723 100644 --- a/include/gomp-constants.h +++ b/include/gomp-constants.h @@ -176,6 +176,7 @@ enum gomp_map_kind #define GOMP_DEVICE_NOT_HOST 4 #define GOMP_DEVICE_NVIDIA_PTX 5 #define GOMP_DEVICE_INTEL_MIC 6 +#define GOMP_DEVICE_HSA 7 #define GOMP_DEVICE_ICV -1 #define GOMP_DEVICE_HOST_FALLBACK -2 @@ -201,6 +202,7 @@ enum gomp_map_kind #define GOMP_VERSION 0 #define GOMP_VERSION_NVIDIA_PTX 1 #define GOMP_VERSION_INTEL_MIC 0 +#define GOMP_VERSION_HSA 0 #define GOMP_VERSION_PACK(LIB, DEV) (((LIB) << 16) | (DEV)) #define GOMP_VERSION_LIB(PACK) (((PACK) >> 16) & 0xffff) @@ -228,4 +230,30 @@ enum gomp_map_kind #define GOMP_LAUNCH_OP(X) (((X) >> GOMP_LAUNCH_OP_SHIFT) & 0xffff) #define GOMP_LAUNCH_OP_MAX 0xffff +/* Bitmask to apply in order to find out the intended device of a target + argument. */ +#define GOMP_TARGET_ARG_DEVICE_MASK ((1 << 7) - 1) +/* The target argument is significant for all devices. */ +#define GOMP_TARGET_ARG_DEVICE_ALL 0 + +/* Flag set when the subsequent element in the device-specific argument + values. */ +#define GOMP_TARGET_ARG_SUBSEQUENT_PARAM (1 << 7) + +/* Bitmask to apply to a target argument to find out the value identifier. */ +#define GOMP_TARGET_ARG_ID_MASK (((1 << 8) - 1) << 8) +/* Target argument index of NUM_TEAMS. */ +#define GOMP_TARGET_ARG_NUM_TEAMS (1 << 8) +/* Target argument index of THREAD_LIMIT. */ +#define GOMP_TARGET_ARG_THREAD_LIMIT (2 << 8) + +/* If the value is directly embeded in target argument, it should be a 16-bit + at most and shifted by this many bits. */ +#define GOMP_TARGET_ARG_VALUE_SHIFT 16 + +/* HSA specific data structures. */ + +/* Identifiers of device-specific target arguments. */ +#define GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES (1 << 8) + #endif diff --git a/libgomp/ChangeLog b/libgomp/ChangeLog index 2efc516..82619e6 100644 --- a/libgomp/ChangeLog +++ b/libgomp/ChangeLog @@ -1,3 +1,64 @@ +2016-01-19 Martin Jambor <mjambor@suse.cz> + Martin Liska <mliska@suse.cz> + + * plugin/Makefrag.am: Add HSA plugin requirements. + * plugin/configfrag.ac (HSA_RUNTIME_INCLUDE): New variable. + (HSA_RUNTIME_LIB): Likewise. + (HSA_RUNTIME_CPPFLAGS): Likewise. + (HSA_RUNTIME_INCLUDE): New substitution. + (HSA_RUNTIME_LIB): Likewise. + (HSA_RUNTIME_LDFLAGS): Likewise. + (hsa-runtime): New configure option. + (hsa-runtime-include): Likewise. + (hsa-runtime-lib): Likewise. + (PLUGIN_HSA): New substitution variable. + Fill HSA_RUNTIME_INCLUDE and HSA_RUNTIME_LIB according to the new + configure options. + (PLUGIN_HSA_CPPFLAGS): Likewise. + (PLUGIN_HSA_LDFLAGS): Likewise. + (PLUGIN_HSA_LIBS): Likewise. + Check that we have access to HSA run-time. + * libgomp-plugin.h (offload_target_type): New element + OFFLOAD_TARGET_TYPE_HSA. + * libgomp.h (gomp_target_task): New fields firstprivate_copies and + args. + (bool gomp_create_target_task): Updated. + (gomp_device_descr): Extra parameter of run_func and async_run_func, + new field can_run_func. + * libgomp_g.h (GOMP_target_ext): Update prototype. + * oacc-host.c (host_run): Added a new parameter args. + * target.c (calculate_firstprivate_requirements): New function. + (copy_firstprivate_data): Likewise. + (gomp_target_fallback_firstprivate): Use them. + (gomp_target_unshare_firstprivate): New function. + (gomp_get_target_fn_addr): Allow returning NULL for shared memory + devices. + (GOMP_target): Do host fallback for all shared memory devices. Do not + pass any args to plugins. + (GOMP_target_ext): Introduce device-specific argument parameter args. + Allow host fallback if device shares memory. Do not remap data if + device has shared memory. + (gomp_target_task_fn): Likewise. Also treat shared memory devices + like host fallback for mappings. + (GOMP_target_data): Treat shared memory devices like host fallback. + (GOMP_target_data_ext): Likewise. + (GOMP_target_update): Likewise. + (GOMP_target_update_ext): Likewise. Also pass NULL as args to + gomp_create_target_task. + (GOMP_target_enter_exit_data): Likewise. + (omp_target_alloc): Treat shared memory devices like host fallback. + (omp_target_free): Likewise. + (omp_target_is_present): Likewise. + (omp_target_memcpy): Likewise. + (omp_target_memcpy_rect): Likewise. + (omp_target_associate_ptr): Likewise. + (gomp_load_plugin_for_device): Also load can_run. + * task.c (GOMP_PLUGIN_target_task_completion): Free + firstprivate_copies. + (gomp_create_target_task): Accept new argument args and store it to + ttask. + * plugin/plugin-hsa.c: New file. + 2016-01-18 Tom de Vries <tom@codesourcery.com> * testsuite/libgomp.oacc-c-c++-common/kernels-loop-2.c: New test. diff --git a/libgomp/Makefile.in b/libgomp/Makefile.in index 7a1c976..bbfac4e 100644 --- a/libgomp/Makefile.in +++ b/libgomp/Makefile.in @@ -17,7 +17,7 @@ # Plugins for offload execution, Makefile.am fragment. # -# Copyright (C) 2014-2015 Free Software Foundation, Inc. +# Copyright (C) 2014-2016 Free Software Foundation, Inc. # # Contributed by Mentor Embedded. # @@ -89,7 +89,8 @@ DIST_COMMON = $(top_srcdir)/plugin/Makefrag.am ChangeLog \ $(srcdir)/omp_lib.f90.in $(srcdir)/libgomp_f.h.in \ $(srcdir)/libgomp.spec.in $(srcdir)/../depcomp @PLUGIN_NVPTX_TRUE@am__append_1 = libgomp-plugin-nvptx.la -@USE_FORTRAN_TRUE@am__append_2 = openacc.f90 +@PLUGIN_HSA_TRUE@am__append_2 = libgomp-plugin-hsa.la +@USE_FORTRAN_TRUE@am__append_3 = openacc.f90 subdir = . ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/../config/acx.m4 \ @@ -147,6 +148,17 @@ am__installdirs = "$(DESTDIR)$(toolexeclibdir)" "$(DESTDIR)$(infodir)" \ "$(DESTDIR)$(toolexeclibdir)" LTLIBRARIES = $(toolexeclib_LTLIBRARIES) am__DEPENDENCIES_1 = +@PLUGIN_HSA_TRUE@libgomp_plugin_hsa_la_DEPENDENCIES = libgomp.la \ +@PLUGIN_HSA_TRUE@ $(am__DEPENDENCIES_1) +@PLUGIN_HSA_TRUE@am_libgomp_plugin_hsa_la_OBJECTS = \ +@PLUGIN_HSA_TRUE@ libgomp_plugin_hsa_la-plugin-hsa.lo +libgomp_plugin_hsa_la_OBJECTS = $(am_libgomp_plugin_hsa_la_OBJECTS) +libgomp_plugin_hsa_la_LINK = $(LIBTOOL) --tag=CC \ + $(libgomp_plugin_hsa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ + --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(libgomp_plugin_hsa_la_LDFLAGS) $(LDFLAGS) -o $@ +@PLUGIN_HSA_TRUE@am_libgomp_plugin_hsa_la_rpath = -rpath \ +@PLUGIN_HSA_TRUE@ $(toolexeclibdir) @PLUGIN_NVPTX_TRUE@libgomp_plugin_nvptx_la_DEPENDENCIES = libgomp.la \ @PLUGIN_NVPTX_TRUE@ $(am__DEPENDENCIES_1) @PLUGIN_NVPTX_TRUE@am_libgomp_plugin_nvptx_la_OBJECTS = \ @@ -187,7 +199,8 @@ FCLD = $(FC) FCLINK = $(LIBTOOL) --tag=FC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ --mode=link $(FCLD) $(AM_FCFLAGS) $(FCFLAGS) $(AM_LDFLAGS) \ $(LDFLAGS) -o $@ -SOURCES = $(libgomp_plugin_nvptx_la_SOURCES) $(libgomp_la_SOURCES) +SOURCES = $(libgomp_plugin_hsa_la_SOURCES) \ + $(libgomp_plugin_nvptx_la_SOURCES) $(libgomp_la_SOURCES) MULTISRCTOP = MULTIBUILDTOP = MULTIDIRS = @@ -255,6 +268,8 @@ FC = @FC@ FCFLAGS = @FCFLAGS@ FGREP = @FGREP@ GREP = @GREP@ +HSA_RUNTIME_INCLUDE = @HSA_RUNTIME_INCLUDE@ +HSA_RUNTIME_LIB = @HSA_RUNTIME_LIB@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ @@ -299,6 +314,10 @@ PACKAGE_URL = @PACKAGE_URL@ PACKAGE_VERSION = @PACKAGE_VERSION@ PATH_SEPARATOR = @PATH_SEPARATOR@ PERL = @PERL@ +PLUGIN_HSA = @PLUGIN_HSA@ +PLUGIN_HSA_CPPFLAGS = @PLUGIN_HSA_CPPFLAGS@ +PLUGIN_HSA_LDFLAGS = @PLUGIN_HSA_LDFLAGS@ +PLUGIN_HSA_LIBS = @PLUGIN_HSA_LIBS@ PLUGIN_NVPTX = @PLUGIN_NVPTX@ PLUGIN_NVPTX_CPPFLAGS = @PLUGIN_NVPTX_CPPFLAGS@ PLUGIN_NVPTX_LDFLAGS = @PLUGIN_NVPTX_LDFLAGS@ @@ -391,7 +410,7 @@ libsubincludedir = $(libdir)/gcc/$(target_alias)/$(gcc_version)/include AM_CPPFLAGS = $(addprefix -I, $(search_path)) AM_CFLAGS = $(XCFLAGS) AM_LDFLAGS = $(XLDFLAGS) $(SECTION_LDFLAGS) $(OPT_LDFLAGS) -toolexeclib_LTLIBRARIES = libgomp.la $(am__append_1) +toolexeclib_LTLIBRARIES = libgomp.la $(am__append_1) $(am__append_2) nodist_toolexeclib_HEADERS = libgomp.spec # -Wc is only a libtool option. @@ -415,7 +434,7 @@ libgomp_la_SOURCES = alloc.c barrier.c critical.c env.c error.c iter.c \ bar.c ptrlock.c time.c fortran.c affinity.c target.c \ splay-tree.c libgomp-plugin.c oacc-parallel.c oacc-host.c \ oacc-init.c oacc-mem.c oacc-async.c oacc-plugin.c oacc-cuda.c \ - priority_queue.c $(am__append_2) + priority_queue.c $(am__append_3) # Nvidia PTX OpenACC plugin. @PLUGIN_NVPTX_TRUE@libgomp_plugin_nvptx_version_info = -version-info $(libtool_VERSION) @@ -426,6 +445,16 @@ libgomp_la_SOURCES = alloc.c barrier.c critical.c env.c error.c iter.c \ @PLUGIN_NVPTX_TRUE@ $(lt_host_flags) $(PLUGIN_NVPTX_LDFLAGS) @PLUGIN_NVPTX_TRUE@libgomp_plugin_nvptx_la_LIBADD = libgomp.la $(PLUGIN_NVPTX_LIBS) @PLUGIN_NVPTX_TRUE@libgomp_plugin_nvptx_la_LIBTOOLFLAGS = --tag=disable-static + +# Heterogenous Systems Architecture plugin +@PLUGIN_HSA_TRUE@libgomp_plugin_hsa_version_info = -version-info $(libtool_VERSION) +@PLUGIN_HSA_TRUE@libgomp_plugin_hsa_la_SOURCES = plugin/plugin-hsa.c +@PLUGIN_HSA_TRUE@libgomp_plugin_hsa_la_CPPFLAGS = $(AM_CPPFLAGS) $(PLUGIN_HSA_CPPFLAGS) +@PLUGIN_HSA_TRUE@libgomp_plugin_hsa_la_LDFLAGS = \ +@PLUGIN_HSA_TRUE@ $(libgomp_plugin_hsa_version_info) \ +@PLUGIN_HSA_TRUE@ $(lt_host_flags) $(PLUGIN_HSA_LDFLAGS) +@PLUGIN_HSA_TRUE@libgomp_plugin_hsa_la_LIBADD = libgomp.la $(PLUGIN_HSA_LIBS) +@PLUGIN_HSA_TRUE@libgomp_plugin_hsa_la_LIBTOOLFLAGS = --tag=disable-static nodist_noinst_HEADERS = libgomp_f.h nodist_libsubinclude_HEADERS = omp.h openacc.h @USE_FORTRAN_TRUE@nodist_finclude_HEADERS = omp_lib.h omp_lib.f90 omp_lib.mod omp_lib_kinds.mod \ @@ -553,6 +582,8 @@ clean-toolexeclibLTLIBRARIES: echo "rm -f \"$${dir}/so_locations\""; \ rm -f "$${dir}/so_locations"; \ done +libgomp-plugin-hsa.la: $(libgomp_plugin_hsa_la_OBJECTS) $(libgomp_plugin_hsa_la_DEPENDENCIES) $(EXTRA_libgomp_plugin_hsa_la_DEPENDENCIES) + $(libgomp_plugin_hsa_la_LINK) $(am_libgomp_plugin_hsa_la_rpath) $(libgomp_plugin_hsa_la_OBJECTS) $(libgomp_plugin_hsa_la_LIBADD) $(LIBS) libgomp-plugin-nvptx.la: $(libgomp_plugin_nvptx_la_OBJECTS) $(libgomp_plugin_nvptx_la_DEPENDENCIES) $(EXTRA_libgomp_plugin_nvptx_la_DEPENDENCIES) $(libgomp_plugin_nvptx_la_LINK) $(am_libgomp_plugin_nvptx_la_rpath) $(libgomp_plugin_nvptx_la_OBJECTS) $(libgomp_plugin_nvptx_la_LIBADD) $(LIBS) libgomp.la: $(libgomp_la_OBJECTS) $(libgomp_la_DEPENDENCIES) $(EXTRA_libgomp_la_DEPENDENCIES) @@ -575,6 +606,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/iter.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/iter_ull.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libgomp-plugin.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libgomp_plugin_hsa_la-plugin-hsa.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libgomp_plugin_nvptx_la-plugin-nvptx.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lock.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/loop.Plo@am__quote@ @@ -623,6 +655,13 @@ distclean-compile: @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(LTCOMPILE) -c -o $@ $< +libgomp_plugin_hsa_la-plugin-hsa.lo: plugin/plugin-hsa.c +@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(libgomp_plugin_hsa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgomp_plugin_hsa_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT libgomp_plugin_hsa_la-plugin-hsa.lo -MD -MP -MF $(DEPDIR)/libgomp_plugin_hsa_la-plugin-hsa.Tpo -c -o libgomp_plugin_hsa_la-plugin-hsa.lo `test -f 'plugin/plugin-hsa.c' || echo '$(srcdir)/'`plugin/plugin-hsa.c +@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/libgomp_plugin_hsa_la-plugin-hsa.Tpo $(DEPDIR)/libgomp_plugin_hsa_la-plugin-hsa.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='plugin/plugin-hsa.c' object='libgomp_plugin_hsa_la-plugin-hsa.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(libgomp_plugin_hsa_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgomp_plugin_hsa_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o libgomp_plugin_hsa_la-plugin-hsa.lo `test -f 'plugin/plugin-hsa.c' || echo '$(srcdir)/'`plugin/plugin-hsa.c + libgomp_plugin_nvptx_la-plugin-nvptx.lo: plugin/plugin-nvptx.c @am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(libgomp_plugin_nvptx_la_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(libgomp_plugin_nvptx_la_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT libgomp_plugin_nvptx_la-plugin-nvptx.lo -MD -MP -MF $(DEPDIR)/libgomp_plugin_nvptx_la-plugin-nvptx.Tpo -c -o libgomp_plugin_nvptx_la-plugin-nvptx.lo `test -f 'plugin/plugin-nvptx.c' || echo '$(srcdir)/'`plugin/plugin-nvptx.c @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/libgomp_plugin_nvptx_la-plugin-nvptx.Tpo $(DEPDIR)/libgomp_plugin_nvptx_la-plugin-nvptx.Plo diff --git a/libgomp/config.h.in b/libgomp/config.h.in index 2e4c698..226ac53 100644 --- a/libgomp/config.h.in +++ b/libgomp/config.h.in @@ -60,6 +60,9 @@ /* Define to 1 if you have the `strtoull' function. */ #undef HAVE_STRTOULL +/* Define to 1 if the system has the type `struct _Mutex_Control'. */ +#undef HAVE_STRUCT__MUTEX_CONTROL + /* Define to 1 if the target runtime linker supports binding the same symbol to different versions. */ #undef HAVE_SYMVER_SYMBOL_RENAMING_RUNTIME_SUPPORT @@ -119,6 +122,9 @@ /* Define to the version of this package. */ #undef PACKAGE_VERSION +/* Define to 1 if the HSA plugin is built, 0 if not. */ +#undef PLUGIN_HSA + /* Define to 1 if the NVIDIA plugin is built, 0 if not. */ #undef PLUGIN_NVPTX diff --git a/libgomp/configure b/libgomp/configure index aaa17c9..1410bc7 100755 --- a/libgomp/configure +++ b/libgomp/configure @@ -627,10 +627,18 @@ LIBGOMP_BUILD_VERSIONED_SHLIB_FALSE LIBGOMP_BUILD_VERSIONED_SHLIB_TRUE OPT_LDFLAGS SECTION_LDFLAGS +PLUGIN_HSA_FALSE +PLUGIN_HSA_TRUE PLUGIN_NVPTX_FALSE PLUGIN_NVPTX_TRUE offload_additional_lib_paths offload_additional_options +PLUGIN_HSA_LIBS +PLUGIN_HSA_LDFLAGS +PLUGIN_HSA_CPPFLAGS +PLUGIN_HSA +HSA_RUNTIME_LIB +HSA_RUNTIME_INCLUDE PLUGIN_NVPTX_LIBS PLUGIN_NVPTX_LDFLAGS PLUGIN_NVPTX_CPPFLAGS @@ -782,6 +790,10 @@ enable_maintainer_mode with_cuda_driver with_cuda_driver_include with_cuda_driver_lib +with_hsa_runtime +with_hsa_runtime_include +with_hsa_runtime_lib +with_hsa_kmt_lib enable_linux_futex enable_tls enable_symvers @@ -1453,6 +1465,17 @@ Optional Packages: --with-cuda-driver-lib=PATH specify directory for the installed CUDA driver library + --with-hsa-runtime=PATH specify prefix directory for installed HSA run-time + package. Equivalent to + --with-hsa-runtime-include=PATH/include plus + --with-hsa-runtime-lib=PATH/lib + --with-hsa-runtime-include=PATH + specify directory for installed HSA run-time include + files + --with-hsa-runtime-lib=PATH + specify directory for the installed HSA run-time + library + --with-hsa-kmt-lib=PATH specify directory for installed HSA KMT library. Some influential environment variables: CC C compiler command @@ -11121,7 +11144,7 @@ else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <<_LT_EOF -#line 11124 "configure" +#line 11147 "configure" #include "confdefs.h" #if HAVE_DLFCN_H @@ -11227,7 +11250,7 @@ else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <<_LT_EOF -#line 11230 "configure" +#line 11253 "configure" #include "confdefs.h" #if HAVE_DLFCN_H @@ -15090,7 +15113,7 @@ esac # Plugins for offload execution, configure.ac fragment. -*- mode: autoconf -*- # -# Copyright (C) 2014-2015 Free Software Foundation, Inc. +# Copyright (C) 2014-2016 Free Software Foundation, Inc. # # Contributed by Mentor Embedded. # @@ -15225,6 +15248,72 @@ PLUGIN_NVPTX_LIBS= +# Look for HSA run-time, its includes and libraries + +HSA_RUNTIME_INCLUDE= +HSA_RUNTIME_LIB= + + +HSA_RUNTIME_CPPFLAGS= +HSA_RUNTIME_LDFLAGS= + + +# Check whether --with-hsa-runtime was given. +if test "${with_hsa_runtime+set}" = set; then : + withval=$with_hsa_runtime; +fi + + +# Check whether --with-hsa-runtime-include was given. +if test "${with_hsa_runtime_include+set}" = set; then : + withval=$with_hsa_runtime_include; +fi + + +# Check whether --with-hsa-runtime-lib was given. +if test "${with_hsa_runtime_lib+set}" = set; then : + withval=$with_hsa_runtime_lib; +fi + +if test "x$with_hsa_runtime" != x; then + HSA_RUNTIME_INCLUDE=$with_hsa_runtime/include + HSA_RUNTIME_LIB=$with_hsa_runtime/lib +fi +if test "x$with_hsa_runtime_include" != x; then + HSA_RUNTIME_INCLUDE=$with_hsa_runtime_include +fi +if test "x$with_hsa_runtime_lib" != x; then + HSA_RUNTIME_LIB=$with_hsa_runtime_lib +fi +if test "x$HSA_RUNTIME_INCLUDE" != x; then + HSA_RUNTIME_CPPFLAGS=-I$HSA_RUNTIME_INCLUDE +fi +if test "x$HSA_RUNTIME_LIB" != x; then + HSA_RUNTIME_LDFLAGS=-L$HSA_RUNTIME_LIB +fi + + +# Check whether --with-hsa-kmt-lib was given. +if test "${with_hsa_kmt_lib+set}" = set; then : + withval=$with_hsa_kmt_lib; +fi + +if test "x$with_hsa_kmt_lib" != x; then + HSA_RUNTIME_LDFLAGS="$HSA_RUNTIME_LDFLAGS -L$with_hsa_kmt_lib" + HSA_RUNTIME_LIB= +fi + +PLUGIN_HSA=0 +PLUGIN_HSA_CPPFLAGS= +PLUGIN_HSA_LDFLAGS= +PLUGIN_HSA_LIBS= + + + + + + + # Get offload targets and path to install tree of offloading compiler. offload_additional_options= offload_additional_lib_paths= @@ -15277,6 +15366,60 @@ rm -f core conftest.err conftest.$ac_objext \ ;; esac ;; + hsa*) + case "${target}" in + x86_64-*-*) + case " ${CC} ${CFLAGS} " in + *" -m32 "*) + PLUGIN_HSA=0 + ;; + *) + tgt_name=hsa + PLUGIN_HSA=$tgt + PLUGIN_HSA_CPPFLAGS=$HSA_RUNTIME_CPPFLAGS + PLUGIN_HSA_LDFLAGS=$HSA_RUNTIME_LDFLAGS + PLUGIN_HSA_LIBS="-lhsa-runtime64 -lhsakmt" + + PLUGIN_HSA_save_CPPFLAGS=$CPPFLAGS + CPPFLAGS="$PLUGIN_HSA_CPPFLAGS $CPPFLAGS" + PLUGIN_HSA_save_LDFLAGS=$LDFLAGS + LDFLAGS="$PLUGIN_HSA_LDFLAGS $LDFLAGS" + PLUGIN_HSA_save_LIBS=$LIBS + LIBS="$PLUGIN_HSA_LIBS $LIBS" + + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include "hsa.h" +int +main () +{ +hsa_status_t status = hsa_init () + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + PLUGIN_HSA=1 +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + CPPFLAGS=$PLUGIN_HSA_save_CPPFLAGS + LDFLAGS=$PLUGIN_HSA_save_LDFLAGS + LIBS=$PLUGIN_HSA_save_LIBS + case $PLUGIN_HSA in + hsa*) + HSA_PLUGIN=0 + as_fn_error "HSA run-time package required for HSA support" "$LINENO" 5 + ;; + esac + ;; + esac + ;; + *-*-*) + PLUGIN_HSA=0 + ;; + esac + ;; *) as_fn_error "unknown offload target specified" "$LINENO" 5 ;; @@ -15313,6 +15456,19 @@ cat >>confdefs.h <<_ACEOF #define PLUGIN_NVPTX $PLUGIN_NVPTX _ACEOF + if test $PLUGIN_HSA = 1; then + PLUGIN_HSA_TRUE= + PLUGIN_HSA_FALSE='#' +else + PLUGIN_HSA_TRUE='#' + PLUGIN_HSA_FALSE= +fi + + +cat >>confdefs.h <<_ACEOF +#define PLUGIN_HSA $PLUGIN_HSA +_ACEOF + # Check for functions needed. @@ -16712,6 +16868,10 @@ if test -z "${PLUGIN_NVPTX_TRUE}" && test -z "${PLUGIN_NVPTX_FALSE}"; then as_fn_error "conditional \"PLUGIN_NVPTX\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi +if test -z "${PLUGIN_HSA_TRUE}" && test -z "${PLUGIN_HSA_FALSE}"; then + as_fn_error "conditional \"PLUGIN_HSA\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi if test -z "${LIBGOMP_BUILD_VERSIONED_SHLIB_TRUE}" && test -z "${LIBGOMP_BUILD_VERSIONED_SHLIB_FALSE}"; then as_fn_error "conditional \"LIBGOMP_BUILD_VERSIONED_SHLIB\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 diff --git a/libgomp/libgomp-plugin.h b/libgomp/libgomp-plugin.h index 64035e4..53f9248 100644 --- a/libgomp/libgomp-plugin.h +++ b/libgomp/libgomp-plugin.h @@ -48,7 +48,8 @@ enum offload_target_type OFFLOAD_TARGET_TYPE_HOST = 2, /* OFFLOAD_TARGET_TYPE_HOST_NONSHM = 3 removed. */ OFFLOAD_TARGET_TYPE_NVIDIA_PTX = 5, - OFFLOAD_TARGET_TYPE_INTEL_MIC = 6 + OFFLOAD_TARGET_TYPE_INTEL_MIC = 6, + OFFLOAD_TARGET_TYPE_HSA = 7 }; /* Auxiliary struct, used for transferring pairs of addresses from plugin diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h index 6ddde56..7108a6d 100644 --- a/libgomp/libgomp.h +++ b/libgomp/libgomp.h @@ -496,6 +496,10 @@ struct gomp_target_task struct target_mem_desc *tgt; struct gomp_task *task; struct gomp_team *team; + /* Copies of firstprivate mapped data for shared memory accelerators. */ + void *firstprivate_copies; + /* Device-specific target arguments. */ + void **args; void *hostaddrs[]; }; @@ -750,7 +754,8 @@ extern void gomp_task_maybe_wait_for_dependencies (void **); extern bool gomp_create_target_task (struct gomp_device_descr *, void (*) (void *), size_t, void **, size_t *, unsigned short *, unsigned int, - void **, enum gomp_target_task_state); + void **, void **, + enum gomp_target_task_state); static void inline gomp_finish_task (struct gomp_task *task) @@ -937,8 +942,9 @@ struct gomp_device_descr void *(*dev2host_func) (int, void *, const void *, size_t); void *(*host2dev_func) (int, void *, const void *, size_t); void *(*dev2dev_func) (int, void *, const void *, size_t); - void (*run_func) (int, void *, void *); - void (*async_run_func) (int, void *, void *, void *); + bool (*can_run_func) (void *); + void (*run_func) (int, void *, void *, void **); + void (*async_run_func) (int, void *, void *, void **, void *); /* Splay tree containing information about mapped memory regions. */ struct splay_tree_s mem_map; diff --git a/libgomp/libgomp_g.h b/libgomp/libgomp_g.h index 6229ca0..24eebb6 100644 --- a/libgomp/libgomp_g.h +++ b/libgomp/libgomp_g.h @@ -278,8 +278,7 @@ extern void GOMP_single_copy_end (void *); extern void GOMP_target (int, void (*) (void *), const void *, size_t, void **, size_t *, unsigned char *); extern void GOMP_target_ext (int, void (*) (void *), size_t, void **, size_t *, - unsigned short *, unsigned int, void **, - int, int); + unsigned short *, unsigned int, void **, void **); extern void GOMP_target_data (int, const void *, size_t, void **, size_t *, unsigned char *); extern void GOMP_target_data_ext (int, size_t, void **, size_t *, diff --git a/libgomp/oacc-host.c b/libgomp/oacc-host.c index 0760e44..1e760f6 100644 --- a/libgomp/oacc-host.c +++ b/libgomp/oacc-host.c @@ -123,7 +123,8 @@ host_host2dev (int n __attribute__ ((unused)), } static void -host_run (int n __attribute__ ((unused)), void *fn_ptr, void *vars) +host_run (int n __attribute__ ((unused)), void *fn_ptr, void *vars, + void **args __attribute__((unused))) { void (*fn)(void *) = (void (*)(void *)) fn_ptr; diff --git a/libgomp/plugin/Makefrag.am b/libgomp/plugin/Makefrag.am index 4efe963..035a663 100644 --- a/libgomp/plugin/Makefrag.am +++ b/libgomp/plugin/Makefrag.am @@ -38,3 +38,16 @@ libgomp_plugin_nvptx_la_LDFLAGS += $(PLUGIN_NVPTX_LDFLAGS) libgomp_plugin_nvptx_la_LIBADD = libgomp.la $(PLUGIN_NVPTX_LIBS) libgomp_plugin_nvptx_la_LIBTOOLFLAGS = --tag=disable-static endif + +if PLUGIN_HSA +# Heterogenous Systems Architecture plugin +libgomp_plugin_hsa_version_info = -version-info $(libtool_VERSION) +toolexeclib_LTLIBRARIES += libgomp-plugin-hsa.la +libgomp_plugin_hsa_la_SOURCES = plugin/plugin-hsa.c +libgomp_plugin_hsa_la_CPPFLAGS = $(AM_CPPFLAGS) $(PLUGIN_HSA_CPPFLAGS) +libgomp_plugin_hsa_la_LDFLAGS = $(libgomp_plugin_hsa_version_info) \ + $(lt_host_flags) +libgomp_plugin_hsa_la_LDFLAGS += $(PLUGIN_HSA_LDFLAGS) +libgomp_plugin_hsa_la_LIBADD = libgomp.la $(PLUGIN_HSA_LIBS) +libgomp_plugin_hsa_la_LIBTOOLFLAGS = --tag=disable-static +endif diff --git a/libgomp/plugin/configfrag.ac b/libgomp/plugin/configfrag.ac index 768954a..2a9d9f9 100644 --- a/libgomp/plugin/configfrag.ac +++ b/libgomp/plugin/configfrag.ac @@ -81,6 +81,62 @@ AC_SUBST(PLUGIN_NVPTX_CPPFLAGS) AC_SUBST(PLUGIN_NVPTX_LDFLAGS) AC_SUBST(PLUGIN_NVPTX_LIBS) +# Look for HSA run-time, its includes and libraries + +HSA_RUNTIME_INCLUDE= +HSA_RUNTIME_LIB= +AC_SUBST(HSA_RUNTIME_INCLUDE) +AC_SUBST(HSA_RUNTIME_LIB) +HSA_RUNTIME_CPPFLAGS= +HSA_RUNTIME_LDFLAGS= + +AC_ARG_WITH(hsa-runtime, + [AS_HELP_STRING([--with-hsa-runtime=PATH], + [specify prefix directory for installed HSA run-time package. + Equivalent to --with-hsa-runtime-include=PATH/include + plus --with-hsa-runtime-lib=PATH/lib])]) +AC_ARG_WITH(hsa-runtime-include, + [AS_HELP_STRING([--with-hsa-runtime-include=PATH], + [specify directory for installed HSA run-time include files])]) +AC_ARG_WITH(hsa-runtime-lib, + [AS_HELP_STRING([--with-hsa-runtime-lib=PATH], + [specify directory for the installed HSA run-time library])]) +if test "x$with_hsa_runtime" != x; then + HSA_RUNTIME_INCLUDE=$with_hsa_runtime/include + HSA_RUNTIME_LIB=$with_hsa_runtime/lib +fi +if test "x$with_hsa_runtime_include" != x; then + HSA_RUNTIME_INCLUDE=$with_hsa_runtime_include +fi +if test "x$with_hsa_runtime_lib" != x; then + HSA_RUNTIME_LIB=$with_hsa_runtime_lib +fi +if test "x$HSA_RUNTIME_INCLUDE" != x; then + HSA_RUNTIME_CPPFLAGS=-I$HSA_RUNTIME_INCLUDE +fi +if test "x$HSA_RUNTIME_LIB" != x; then + HSA_RUNTIME_LDFLAGS=-L$HSA_RUNTIME_LIB +fi + +AC_ARG_WITH(hsa-kmt-lib, + [AS_HELP_STRING([--with-hsa-kmt-lib=PATH], + [specify directory for installed HSA KMT library.])]) +if test "x$with_hsa_kmt_lib" != x; then + HSA_RUNTIME_LDFLAGS="$HSA_RUNTIME_LDFLAGS -L$with_hsa_kmt_lib" + HSA_RUNTIME_LIB= +fi + +PLUGIN_HSA=0 +PLUGIN_HSA_CPPFLAGS= +PLUGIN_HSA_LDFLAGS= +PLUGIN_HSA_LIBS= +AC_SUBST(PLUGIN_HSA) +AC_SUBST(PLUGIN_HSA_CPPFLAGS) +AC_SUBST(PLUGIN_HSA_LDFLAGS) +AC_SUBST(PLUGIN_HSA_LIBS) + + + # Get offload targets and path to install tree of offloading compiler. offload_additional_options= offload_additional_lib_paths= @@ -122,6 +178,49 @@ if test x"$enable_offload_targets" != x; then ;; esac ;; + hsa*) + case "${target}" in + x86_64-*-*) + case " ${CC} ${CFLAGS} " in + *" -m32 "*) + PLUGIN_HSA=0 + ;; + *) + tgt_name=hsa + PLUGIN_HSA=$tgt + PLUGIN_HSA_CPPFLAGS=$HSA_RUNTIME_CPPFLAGS + PLUGIN_HSA_LDFLAGS=$HSA_RUNTIME_LDFLAGS + PLUGIN_HSA_LIBS="-lhsa-runtime64 -lhsakmt" + + PLUGIN_HSA_save_CPPFLAGS=$CPPFLAGS + CPPFLAGS="$PLUGIN_HSA_CPPFLAGS $CPPFLAGS" + PLUGIN_HSA_save_LDFLAGS=$LDFLAGS + LDFLAGS="$PLUGIN_HSA_LDFLAGS $LDFLAGS" + PLUGIN_HSA_save_LIBS=$LIBS + LIBS="$PLUGIN_HSA_LIBS $LIBS" + + AC_LINK_IFELSE( + [AC_LANG_PROGRAM( + [#include "hsa.h"], + [hsa_status_t status = hsa_init ()])], + [PLUGIN_HSA=1]) + CPPFLAGS=$PLUGIN_HSA_save_CPPFLAGS + LDFLAGS=$PLUGIN_HSA_save_LDFLAGS + LIBS=$PLUGIN_HSA_save_LIBS + case $PLUGIN_HSA in + hsa*) + HSA_PLUGIN=0 + AC_MSG_ERROR([HSA run-time package required for HSA support]) + ;; + esac + ;; + esac + ;; + *-*-*) + PLUGIN_HSA=0 + ;; + esac + ;; *) AC_MSG_ERROR([unknown offload target specified]) ;; @@ -145,3 +244,6 @@ AC_DEFINE_UNQUOTED(OFFLOAD_TARGETS, "$offload_targets", AM_CONDITIONAL([PLUGIN_NVPTX], [test $PLUGIN_NVPTX = 1]) AC_DEFINE_UNQUOTED([PLUGIN_NVPTX], [$PLUGIN_NVPTX], [Define to 1 if the NVIDIA plugin is built, 0 if not.]) +AM_CONDITIONAL([PLUGIN_HSA], [test $PLUGIN_HSA = 1]) +AC_DEFINE_UNQUOTED([PLUGIN_HSA], [$PLUGIN_HSA], + [Define to 1 if the HSA plugin is built, 0 if not.]) diff --git a/libgomp/plugin/plugin-hsa.c b/libgomp/plugin/plugin-hsa.c new file mode 100644 index 0000000..d888493 --- /dev/null +++ b/libgomp/plugin/plugin-hsa.c @@ -0,0 +1,1493 @@ +/* Plugin for HSAIL execution. + + Copyright (C) 2013-2016 Free Software Foundation, Inc. + + Contributed by Martin Jambor <mjambor@suse.cz> and + Martin Liska <mliska@suse.cz>. + + This file is part of the GNU Offloading and Multi Processing Library + (libgomp). + + Libgomp is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <pthread.h> +#include <hsa.h> +#include <hsa_ext_finalize.h> +#include <dlfcn.h> +#include "libgomp-plugin.h" +#include "gomp-constants.h" + +/* Keep the following GOMP prefixed structures in sync with respective parts of + the compiler. */ + +/* Structure describing the run-time and grid properties of an HSA kernel + lauch. */ + +struct GOMP_kernel_launch_attributes +{ + /* Number of dimensions the workload has. Maximum number is 3. */ + uint32_t ndim; + /* Size of the grid in the three respective dimensions. */ + uint32_t gdims[3]; + /* Size of work-groups in the respective dimensions. */ + uint32_t wdims[3]; +}; + +/* Collection of information needed for a dispatch of a kernel from a + kernel. */ + +struct GOMP_hsa_kernel_dispatch +{ + /* Pointer to a command queue associated with a kernel dispatch agent. */ + void *queue; + /* Pointer to reserved memory for OMP data struct copying. */ + void *omp_data_memory; + /* Pointer to a memory space used for kernel arguments passing. */ + void *kernarg_address; + /* Kernel object. */ + uint64_t object; + /* Synchronization signal used for dispatch synchronization. */ + uint64_t signal; + /* Private segment size. */ + uint32_t private_segment_size; + /* Group segment size. */ + uint32_t group_segment_size; + /* Number of children kernel dispatches. */ + uint64_t kernel_dispatch_count; + /* Debug purpose argument. */ + uint64_t debug; + /* Levels-var ICV. */ + uint64_t omp_level; + /* Kernel dispatch structures created for children kernel dispatches. */ + struct GOMP_hsa_kernel_dispatch **children_dispatches; + /* Number of threads. */ + uint32_t omp_num_threads; +}; + +/* Part of the libgomp plugin interface. Return the name of the accelerator, + which is "hsa". */ + +const char * +GOMP_OFFLOAD_get_name (void) +{ + return "hsa"; +} + +/* Part of the libgomp plugin interface. Return the specific capabilities the + HSA accelerator have. */ + +unsigned int +GOMP_OFFLOAD_get_caps (void) +{ + return GOMP_OFFLOAD_CAP_SHARED_MEM | GOMP_OFFLOAD_CAP_OPENMP_400; +} + +/* Part of the libgomp plugin interface. Identify as HSA accelerator. */ + +int +GOMP_OFFLOAD_get_type (void) +{ + return OFFLOAD_TARGET_TYPE_HSA; +} + +/* Return the libgomp version number we're compatible with. There is + no requirement for cross-version compatibility. */ + +unsigned +GOMP_OFFLOAD_version (void) +{ + return GOMP_VERSION; +} + +/* Flag to decide whether print to stderr information about what is going on. + Set in init_debug depending on environment variables. */ + +static bool debug; + +/* Flag to decide if the runtime should suppress a possible fallback to host + execution. */ + +static bool suppress_host_fallback; + +/* Initialize debug and suppress_host_fallback according to the environment. */ + +static void +init_enviroment_variables (void) +{ + if (getenv ("HSA_DEBUG")) + debug = true; + else + debug = false; + + if (getenv ("HSA_SUPPRESS_HOST_FALLBACK")) + suppress_host_fallback = true; + else + suppress_host_fallback = false; +} + +/* Print a logging message with PREFIX to stderr if HSA_DEBUG value + is set to true. */ + +#define HSA_LOG(prefix, ...) \ + do \ + { \ + if (debug) \ + { \ + fprintf (stderr, prefix); \ + fprintf (stderr, __VA_ARGS__); \ + } \ + } \ + while (false); + +/* Print a debugging message to stderr. */ + +#define HSA_DEBUG(...) HSA_LOG ("HSA debug: ", __VA_ARGS__) + +/* Print a warning message to stderr. */ + +#define HSA_WARNING(...) HSA_LOG ("HSA warning: ", __VA_ARGS__) + +/* Print HSA warning STR with an HSA STATUS code. */ + +static void +hsa_warn (const char *str, hsa_status_t status) +{ + if (!debug) + return; + + const char *hsa_error; + hsa_status_string (status, &hsa_error); + + fprintf (stderr, "HSA warning: %s\nRuntime message: %s", str, hsa_error); +} + +/* Report a fatal error STR together with the HSA error corresponding to STATUS + and terminate execution of the current process. */ + +static void +hsa_fatal (const char *str, hsa_status_t status) +{ + const char *hsa_error; + hsa_status_string (status, &hsa_error); + GOMP_PLUGIN_fatal ("HSA fatal error: %s\nRuntime message: %s", str, + hsa_error); +} + +struct hsa_kernel_description +{ + const char *name; + unsigned omp_data_size; + bool gridified_kernel_p; + unsigned kernel_dependencies_count; + const char **kernel_dependencies; +}; + +struct global_var_info +{ + const char *name; + void *address; +}; + +/* Data passed by the static initializer of a compilation unit containing BRIG + to GOMP_offload_register. */ + +struct brig_image_desc +{ + hsa_ext_module_t brig_module; + const unsigned kernel_count; + struct hsa_kernel_description *kernel_infos; + const unsigned global_variable_count; + struct global_var_info *global_variables; +}; + +struct agent_info; + +/* Information required to identify, finalize and run any given kernel. */ + +struct kernel_info +{ + /* Name of the kernel, required to locate it within the brig module. */ + const char *name; + /* Size of memory space for OMP data. */ + unsigned omp_data_size; + /* The specific agent the kernel has been or will be finalized for and run + on. */ + struct agent_info *agent; + /* The specific module where the kernel takes place. */ + struct module_info *module; + /* Mutex enforcing that at most once thread ever initializes a kernel for + use. A thread should have locked agent->modules_rwlock for reading before + acquiring it. */ + pthread_mutex_t init_mutex; + /* Flag indicating whether the kernel has been initialized and all fields + below it contain valid data. */ + bool initialized; + /* Flag indicating that the kernel has a problem that blocks an execution. */ + bool initialization_failed; + /* The object to be put into the dispatch queue. */ + uint64_t object; + /* Required size of kernel arguments. */ + uint32_t kernarg_segment_size; + /* Required size of group segment. */ + uint32_t group_segment_size; + /* Required size of private segment. */ + uint32_t private_segment_size; + /* List of all kernel dependencies. */ + const char **dependencies; + /* Number of dependencies. */ + unsigned dependencies_count; + /* Maximum OMP data size necessary for kernel from kernel dispatches. */ + unsigned max_omp_data_size; + /* True if the kernel is gridified. */ + bool gridified_kernel_p; +}; + +/* Information about a particular brig module, its image and kernels. */ + +struct module_info +{ + /* The next and previous module in the linked list of modules of an agent. */ + struct module_info *next, *prev; + /* The description with which the program has registered the image. */ + struct brig_image_desc *image_desc; + + /* Number of kernels in this module. */ + int kernel_count; + /* An array of kernel_info structures describing each kernel in this + module. */ + struct kernel_info kernels[]; +}; + +/* Information about shared brig library. */ + +struct brig_library_info +{ + char *file_name; + hsa_ext_module_t image; +}; + +/* Description of an HSA GPU agent and the program associated with it. */ + +struct agent_info +{ + /* The HSA ID of the agent. Assigned when hsa_context is initialized. */ + hsa_agent_t id; + /* Whether the agent has been initialized. The fields below are usable only + if it has been. */ + bool initialized; + /* The HSA ISA of this agent. */ + hsa_isa_t isa; + /* Command queue of the agent. */ + hsa_queue_t *command_q; + /* Kernel from kernel dispatch command queue. */ + hsa_queue_t *kernel_dispatch_command_q; + /* The HSA memory region from which to allocate kernel arguments. */ + hsa_region_t kernarg_region; + + /* Read-write lock that protects kernels which are running or about to be run + from interference with loading and unloading of images. Needs to be + locked for reading while a kernel is being run, and for writing if the + list of modules is manipulated (and thus the HSA program invalidated). */ + pthread_rwlock_t modules_rwlock; + /* The first module in a linked list of modules associated with this + kernel. */ + struct module_info *first_module; + + /* Mutex enforcing that only one thread will finalize the HSA program. A + thread should have locked agent->modules_rwlock for reading before + acquiring it. */ + pthread_mutex_t prog_mutex; + /* Flag whether the HSA program that consists of all the modules has been + finalized. */ + bool prog_finalized; + /* Flag whether the program was finalized but with a failure. */ + bool prog_finalized_error; + /* HSA executable - the finalized program that is used to locate kernels. */ + hsa_executable_t executable; + /* List of BRIG libraries. */ + struct brig_library_info **brig_libraries; + /* Number of loaded shared BRIG libraries. */ + unsigned brig_libraries_count; +}; + +/* Information about the whole HSA environment and all of its agents. */ + +struct hsa_context_info +{ + /* Whether the structure has been initialized. */ + bool initialized; + /* Number of usable GPU HSA agents in the system. */ + int agent_count; + /* Array of agent_info structures describing the individual HSA agents. */ + struct agent_info *agents; +}; + +/* Information about the whole HSA environment and all of its agents. */ + +static struct hsa_context_info hsa_context; + +/* Find kernel for an AGENT by name provided in KERNEL_NAME. */ + +static struct kernel_info * +get_kernel_for_agent (struct agent_info *agent, const char *kernel_name) +{ + struct module_info *module = agent->first_module; + + while (module) + { + for (unsigned i = 0; i < module->kernel_count; i++) + if (strcmp (module->kernels[i].name, kernel_name) == 0) + return &module->kernels[i]; + + module = module->next; + } + + return NULL; +} + +/* Return true if the agent is a GPU and acceptable of concurrent submissions + from different threads. */ + +static bool +suitable_hsa_agent_p (hsa_agent_t agent) +{ + hsa_device_type_t device_type; + hsa_status_t status + = hsa_agent_get_info (agent, HSA_AGENT_INFO_DEVICE, &device_type); + if (status != HSA_STATUS_SUCCESS || device_type != HSA_DEVICE_TYPE_GPU) + return false; + + uint32_t features = 0; + status = hsa_agent_get_info (agent, HSA_AGENT_INFO_FEATURE, &features); + if (status != HSA_STATUS_SUCCESS + || !(features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) + return false; + hsa_queue_type_t queue_type; + status = hsa_agent_get_info (agent, HSA_AGENT_INFO_QUEUE_TYPE, &queue_type); + if (status != HSA_STATUS_SUCCESS + || (queue_type != HSA_QUEUE_TYPE_MULTI)) + return false; + + return true; +} + +/* Callback of hsa_iterate_agents, if AGENT is a GPU device, increment + agent_count in hsa_context. */ + +static hsa_status_t +count_gpu_agents (hsa_agent_t agent, void *data __attribute__ ((unused))) +{ + if (suitable_hsa_agent_p (agent)) + hsa_context.agent_count++; + return HSA_STATUS_SUCCESS; +} + +/* Callback of hsa_iterate_agents, if AGENT is a GPU device, assign the agent + id to the describing structure in the hsa context. The index of the + structure is pointed to by DATA, increment it afterwards. */ + +static hsa_status_t +assign_agent_ids (hsa_agent_t agent, void *data) +{ + if (suitable_hsa_agent_p (agent)) + { + int *agent_index = (int *) data; + hsa_context.agents[*agent_index].id = agent; + ++*agent_index; + } + return HSA_STATUS_SUCCESS; +} + +/* Initialize hsa_context if it has not already been done. */ + +static void +init_hsa_context (void) +{ + hsa_status_t status; + int agent_index = 0; + + if (hsa_context.initialized) + return; + init_enviroment_variables (); + status = hsa_init (); + if (status != HSA_STATUS_SUCCESS) + hsa_fatal ("Run-time could not be initialized", status); + HSA_DEBUG ("HSA run-time initialized\n"); + status = hsa_iterate_agents (count_gpu_agents, NULL); + if (status != HSA_STATUS_SUCCESS) + hsa_fatal ("HSA GPU devices could not be enumerated", status); + HSA_DEBUG ("There are %i HSA GPU devices.\n", hsa_context.agent_count); + + hsa_context.agents + = GOMP_PLUGIN_malloc_cleared (hsa_context.agent_count + * sizeof (struct agent_info)); + status = hsa_iterate_agents (assign_agent_ids, &agent_index); + if (agent_index != hsa_context.agent_count) + GOMP_PLUGIN_fatal ("Failed to assign IDs to all HSA agents"); + hsa_context.initialized = true; +} + +/* Callback of dispatch queues to report errors. */ + +static void +queue_callback (hsa_status_t status, + hsa_queue_t *queue __attribute__ ((unused)), + void *data __attribute__ ((unused))) +{ + hsa_fatal ("Asynchronous queue error", status); +} + +/* Callback of hsa_agent_iterate_regions. Determine if a memory REGION can be + used for kernarg allocations and if so write it to the memory pointed to by + DATA and break the query. */ + +static hsa_status_t +get_kernarg_memory_region (hsa_region_t region, void *data) +{ + hsa_status_t status; + hsa_region_segment_t segment; + + status = hsa_region_get_info (region, HSA_REGION_INFO_SEGMENT, &segment); + if (status != HSA_STATUS_SUCCESS) + return status; + if (segment != HSA_REGION_SEGMENT_GLOBAL) + return HSA_STATUS_SUCCESS; + + uint32_t flags; + status = hsa_region_get_info (region, HSA_REGION_INFO_GLOBAL_FLAGS, &flags); + if (status != HSA_STATUS_SUCCESS) + return status; + if (flags & HSA_REGION_GLOBAL_FLAG_KERNARG) + { + hsa_region_t *ret = (hsa_region_t *) data; + *ret = region; + return HSA_STATUS_INFO_BREAK; + } + return HSA_STATUS_SUCCESS; +} + +/* Part of the libgomp plugin interface. Return the number of HSA devices on + the system. */ + +int +GOMP_OFFLOAD_get_num_devices (void) +{ + init_hsa_context (); + return hsa_context.agent_count; +} + +/* Part of the libgomp plugin interface. Initialize agent number N so that it + can be used for computation. */ + +void +GOMP_OFFLOAD_init_device (int n) +{ + init_hsa_context (); + if (n >= hsa_context.agent_count) + GOMP_PLUGIN_fatal ("Request to initialize non-existing HSA device %i", n); + struct agent_info *agent = &hsa_context.agents[n]; + + if (agent->initialized) + return; + + if (pthread_rwlock_init (&agent->modules_rwlock, NULL)) + GOMP_PLUGIN_fatal ("Failed to initialize an HSA agent rwlock"); + if (pthread_mutex_init (&agent->prog_mutex, NULL)) + GOMP_PLUGIN_fatal ("Failed to initialize an HSA agent program mutex"); + + uint32_t queue_size; + hsa_status_t status; + status = hsa_agent_get_info (agent->id, HSA_AGENT_INFO_QUEUE_MAX_SIZE, + &queue_size); + if (status != HSA_STATUS_SUCCESS) + hsa_fatal ("Error requesting maximum queue size of the HSA agent", status); + status = hsa_agent_get_info (agent->id, HSA_AGENT_INFO_ISA, &agent->isa); + if (status != HSA_STATUS_SUCCESS) + hsa_fatal ("Error querying the ISA of the agent", status); + status = hsa_queue_create (agent->id, queue_size, HSA_QUEUE_TYPE_MULTI, + queue_callback, NULL, UINT32_MAX, UINT32_MAX, + &agent->command_q); + if (status != HSA_STATUS_SUCCESS) + hsa_fatal ("Error creating command queue", status); + + status = hsa_queue_create (agent->id, queue_size, HSA_QUEUE_TYPE_MULTI, + queue_callback, NULL, UINT32_MAX, UINT32_MAX, + &agent->kernel_dispatch_command_q); + if (status != HSA_STATUS_SUCCESS) + hsa_fatal ("Error creating kernel dispatch command queue", status); + + agent->kernarg_region.handle = (uint64_t) -1; + status = hsa_agent_iterate_regions (agent->id, get_kernarg_memory_region, + &agent->kernarg_region); + if (agent->kernarg_region.handle == (uint64_t) -1) + GOMP_PLUGIN_fatal ("Could not find suitable memory region for kernel " + "arguments"); + HSA_DEBUG ("HSA agent initialized, queue has id %llu\n", + (long long unsigned) agent->command_q->id); + HSA_DEBUG ("HSA agent initialized, kernel dispatch queue has id %llu\n", + (long long unsigned) agent->kernel_dispatch_command_q->id); + agent->initialized = true; +} + +/* Verify that hsa_context has already been initialized and return the + agent_info structure describing device number N. */ + +static struct agent_info * +get_agent_info (int n) +{ + if (!hsa_context.initialized) + GOMP_PLUGIN_fatal ("Attempt to use uninitialized HSA context."); + if (n >= hsa_context.agent_count) + GOMP_PLUGIN_fatal ("Request to operate on anon-existing HSA device %i", n); + if (!hsa_context.agents[n].initialized) + GOMP_PLUGIN_fatal ("Attempt to use an uninitialized HSA agent."); + return &hsa_context.agents[n]; +} + +/* Insert MODULE to the linked list of modules of AGENT. */ + +static void +add_module_to_agent (struct agent_info *agent, struct module_info *module) +{ + if (agent->first_module) + agent->first_module->prev = module; + module->next = agent->first_module; + module->prev = NULL; + agent->first_module = module; +} + +/* Remove MODULE from the linked list of modules of AGENT. */ + +static void +remove_module_from_agent (struct agent_info *agent, struct module_info *module) +{ + if (agent->first_module == module) + agent->first_module = module->next; + if (module->prev) + module->prev->next = module->next; + if (module->next) + module->next->prev = module->prev; +} + +/* Free the HSA program in agent and everything associated with it and set + agent->prog_finalized and the initialized flags of all kernels to false. */ + +static void +destroy_hsa_program (struct agent_info *agent) +{ + if (!agent->prog_finalized || agent->prog_finalized_error) + return; + + hsa_status_t status; + + HSA_DEBUG ("Destroying the current HSA program.\n"); + + status = hsa_executable_destroy (agent->executable); + if (status != HSA_STATUS_SUCCESS) + hsa_fatal ("Could not destroy HSA executable", status); + + struct module_info *module; + for (module = agent->first_module; module; module = module->next) + { + int i; + for (i = 0; i < module->kernel_count; i++) + module->kernels[i].initialized = false; + } + agent->prog_finalized = false; +} + +/* Part of the libgomp plugin interface. Load BRIG module described by struct + brig_image_desc in TARGET_DATA and return references to kernel descriptors + in TARGET_TABLE. */ + +int +GOMP_OFFLOAD_load_image (int ord, unsigned version, void *target_data, + struct addr_pair **target_table) +{ + if (GOMP_VERSION_DEV (version) > GOMP_VERSION_HSA) + GOMP_PLUGIN_fatal ("Offload data incompatible with HSA plugin" + " (expected %u, received %u)", + GOMP_VERSION_HSA, GOMP_VERSION_DEV (version)); + + struct brig_image_desc *image_desc = (struct brig_image_desc *) target_data; + struct agent_info *agent; + struct addr_pair *pair; + struct module_info *module; + struct kernel_info *kernel; + int kernel_count = image_desc->kernel_count; + + agent = get_agent_info (ord); + if (pthread_rwlock_wrlock (&agent->modules_rwlock)) + GOMP_PLUGIN_fatal ("Unable to write-lock an HSA agent rwlock"); + if (agent->prog_finalized) + destroy_hsa_program (agent); + + HSA_DEBUG ("Encountered %d kernels in an image\n", kernel_count); + pair = GOMP_PLUGIN_malloc (kernel_count * sizeof (struct addr_pair)); + *target_table = pair; + module = (struct module_info *) + GOMP_PLUGIN_malloc_cleared (sizeof (struct module_info) + + kernel_count * sizeof (struct kernel_info)); + module->image_desc = image_desc; + module->kernel_count = kernel_count; + + kernel = &module->kernels[0]; + + /* Allocate memory for kernel dependencies. */ + for (unsigned i = 0; i < kernel_count; i++) + { + pair->start = (uintptr_t) kernel; + pair->end = (uintptr_t) (kernel + 1); + + struct hsa_kernel_description *d = &image_desc->kernel_infos[i]; + kernel->agent = agent; + kernel->module = module; + kernel->name = d->name; + kernel->omp_data_size = d->omp_data_size; + kernel->gridified_kernel_p = d->gridified_kernel_p; + kernel->dependencies_count = d->kernel_dependencies_count; + kernel->dependencies = d->kernel_dependencies; + if (pthread_mutex_init (&kernel->init_mutex, NULL)) + GOMP_PLUGIN_fatal ("Failed to initialize an HSA kernel mutex"); + + kernel++; + pair++; + } + + add_module_to_agent (agent, module); + if (pthread_rwlock_unlock (&agent->modules_rwlock)) + GOMP_PLUGIN_fatal ("Unable to unlock an HSA agent rwlock"); + return kernel_count; +} + +/* Add a shared BRIG library from a FILE_NAME to an AGENT. */ + +static struct brig_library_info * +add_shared_library (const char *file_name, struct agent_info *agent) +{ + struct brig_library_info *library = NULL; + + void *f = dlopen (file_name, RTLD_NOW); + void *start = dlsym (f, "__brig_start"); + void *end = dlsym (f, "__brig_end"); + + if (start == NULL || end == NULL) + return NULL; + + unsigned size = end - start; + char *buf = (char *) GOMP_PLUGIN_malloc (size); + memcpy (buf, start, size); + + library = GOMP_PLUGIN_malloc (sizeof (struct agent_info)); + library->file_name = (char *) GOMP_PLUGIN_malloc + ((strlen (file_name) + 1)); + strcpy (library->file_name, file_name); + library->image = (hsa_ext_module_t) buf; + + return library; +} + +/* Release memory used for BRIG shared libraries that correspond + to an AGENT. */ + +static void +release_agent_shared_libraries (struct agent_info *agent) +{ + for (unsigned i = 0; i < agent->brig_libraries_count; i++) + if (agent->brig_libraries[i]) + { + free (agent->brig_libraries[i]->file_name); + free (agent->brig_libraries[i]->image); + free (agent->brig_libraries[i]); + } + + free (agent->brig_libraries); +} + +/* Create and finalize the program consisting of all loaded modules. */ + +static void +create_and_finalize_hsa_program (struct agent_info *agent) +{ + hsa_status_t status; + hsa_ext_program_t prog_handle; + int mi = 0; + + if (pthread_mutex_lock (&agent->prog_mutex)) + GOMP_PLUGIN_fatal ("Could not lock an HSA agent program mutex"); + if (agent->prog_finalized) + goto final; + + status = hsa_ext_program_create (HSA_MACHINE_MODEL_LARGE, HSA_PROFILE_FULL, + HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT, + NULL, &prog_handle); + if (status != HSA_STATUS_SUCCESS) + hsa_fatal ("Could not create an HSA program", status); + + HSA_DEBUG ("Created a finalized program\n"); + + struct module_info *module = agent->first_module; + while (module) + { + status = hsa_ext_program_add_module (prog_handle, + module->image_desc->brig_module); + if (status != HSA_STATUS_SUCCESS) + hsa_fatal ("Could not add a module to the HSA program", status); + module = module->next; + mi++; + } + + /* Load all shared libraries. */ + const char *libraries[] = { "libhsamath.so", "libhsastd.so" }; + const unsigned libraries_count = sizeof (libraries) / sizeof (const char *); + + agent->brig_libraries_count = libraries_count; + agent->brig_libraries = GOMP_PLUGIN_malloc_cleared + (sizeof (struct brig_library_info) * libraries_count); + + for (unsigned i = 0; i < libraries_count; i++) + { + struct brig_library_info *library = add_shared_library (libraries[i], + agent); + if (library == NULL) + { + HSA_WARNING ("Could not open a shared BRIG library: %s\n", + libraries[i]); + continue; + } + + status = hsa_ext_program_add_module (prog_handle, library->image); + if (status != HSA_STATUS_SUCCESS) + hsa_warn ("Could not add a shared BRIG library the HSA program", + status); + else + HSA_DEBUG ("a shared BRIG library has been added to a program: %s\n", + libraries[i]); + } + + hsa_ext_control_directives_t control_directives; + memset (&control_directives, 0, sizeof (control_directives)); + hsa_code_object_t code_object; + status = hsa_ext_program_finalize (prog_handle, agent->isa, + HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO, + control_directives, "", + HSA_CODE_OBJECT_TYPE_PROGRAM, + &code_object); + if (status != HSA_STATUS_SUCCESS) + { + hsa_warn ("Finalization of the HSA program failed", status); + goto failure; + } + + HSA_DEBUG ("Finalization done\n"); + hsa_ext_program_destroy (prog_handle); + + status + = hsa_executable_create (HSA_PROFILE_FULL, HSA_EXECUTABLE_STATE_UNFROZEN, + "", &agent->executable); + if (status != HSA_STATUS_SUCCESS) + hsa_fatal ("Could not create HSA executable", status); + + module = agent->first_module; + while (module) + { + /* Initialize all global variables declared in the module. */ + for (unsigned i = 0; i < module->image_desc->global_variable_count; i++) + { + struct global_var_info *var; + var = &module->image_desc->global_variables[i]; + status + = hsa_executable_global_variable_define (agent->executable, + var->name, var->address); + + HSA_DEBUG ("Defining global variable: %s, address: %p\n", var->name, + var->address); + + if (status != HSA_STATUS_SUCCESS) + hsa_fatal ("Could not define a global variable in the HSA program", + status); + } + + module = module->next; + } + + status = hsa_executable_load_code_object (agent->executable, agent->id, + code_object, ""); + if (status != HSA_STATUS_SUCCESS) + hsa_fatal ("Could not add a code object to the HSA executable", status); + status = hsa_executable_freeze (agent->executable, ""); + if (status != HSA_STATUS_SUCCESS) + hsa_fatal ("Could not freeze the HSA executable", status); + + HSA_DEBUG ("Froze HSA executable with the finalized code object\n"); + + /* If all goes good, jump to final. */ + goto final; + +failure: + agent->prog_finalized_error = true; + +final: + agent->prog_finalized = true; + + if (pthread_mutex_unlock (&agent->prog_mutex)) + GOMP_PLUGIN_fatal ("Could not unlock an HSA agent program mutex"); +} + +/* Create kernel dispatch data structure for given KERNEL. */ + +static struct GOMP_hsa_kernel_dispatch * +create_single_kernel_dispatch (struct kernel_info *kernel, + unsigned omp_data_size) +{ + struct agent_info *agent = kernel->agent; + struct GOMP_hsa_kernel_dispatch *shadow + = GOMP_PLUGIN_malloc_cleared (sizeof (struct GOMP_hsa_kernel_dispatch)); + + shadow->queue = agent->command_q; + shadow->omp_data_memory + = omp_data_size > 0 ? GOMP_PLUGIN_malloc (omp_data_size) : NULL; + unsigned dispatch_count = kernel->dependencies_count; + shadow->kernel_dispatch_count = dispatch_count; + + shadow->children_dispatches + = GOMP_PLUGIN_malloc (dispatch_count * sizeof (shadow)); + + shadow->object = kernel->object; + + hsa_signal_t sync_signal; + hsa_status_t status = hsa_signal_create (1, 0, NULL, &sync_signal); + if (status != HSA_STATUS_SUCCESS) + hsa_fatal ("Error creating the HSA sync signal", status); + + shadow->signal = sync_signal.handle; + shadow->private_segment_size = kernel->private_segment_size; + shadow->group_segment_size = kernel->group_segment_size; + + status + = hsa_memory_allocate (agent->kernarg_region, kernel->kernarg_segment_size, + &shadow->kernarg_address); + if (status != HSA_STATUS_SUCCESS) + hsa_fatal ("Could not allocate memory for HSA kernel arguments", status); + + return shadow; +} + +/* Release data structure created for a kernel dispatch in SHADOW argument. */ + +static void +release_kernel_dispatch (struct GOMP_hsa_kernel_dispatch *shadow) +{ + HSA_DEBUG ("Released kernel dispatch: %p has value: %lu (%p)\n", shadow, + shadow->debug, (void *) shadow->debug); + + hsa_memory_free (shadow->kernarg_address); + + hsa_signal_t s; + s.handle = shadow->signal; + hsa_signal_destroy (s); + + free (shadow->omp_data_memory); + + for (unsigned i = 0; i < shadow->kernel_dispatch_count; i++) + release_kernel_dispatch (shadow->children_dispatches[i]); + + free (shadow->children_dispatches); + free (shadow); +} + +/* Initialize a KERNEL without its dependencies. MAX_OMP_DATA_SIZE is used + to calculate maximum necessary memory for OMP data allocation. */ + +static void +init_single_kernel (struct kernel_info *kernel, unsigned *max_omp_data_size) +{ + hsa_status_t status; + struct agent_info *agent = kernel->agent; + hsa_executable_symbol_t kernel_symbol; + status = hsa_executable_get_symbol (agent->executable, NULL, kernel->name, + agent->id, 0, &kernel_symbol); + if (status != HSA_STATUS_SUCCESS) + { + hsa_warn ("Could not find symbol for kernel in the code object", status); + goto failure; + } + HSA_DEBUG ("Located kernel %s\n", kernel->name); + status + = hsa_executable_symbol_get_info (kernel_symbol, + HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, + &kernel->object); + if (status != HSA_STATUS_SUCCESS) + hsa_fatal ("Could not extract a kernel object from its symbol", status); + status = hsa_executable_symbol_get_info + (kernel_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE, + &kernel->kernarg_segment_size); + if (status != HSA_STATUS_SUCCESS) + hsa_fatal ("Could not get info about kernel argument size", status); + status = hsa_executable_symbol_get_info + (kernel_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE, + &kernel->group_segment_size); + if (status != HSA_STATUS_SUCCESS) + hsa_fatal ("Could not get info about kernel group segment size", status); + status = hsa_executable_symbol_get_info + (kernel_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE, + &kernel->private_segment_size); + if (status != HSA_STATUS_SUCCESS) + hsa_fatal ("Could not get info about kernel private segment size", + status); + + HSA_DEBUG ("Kernel structure for %s fully initialized with " + "following segment sizes: \n", kernel->name); + HSA_DEBUG (" group_segment_size: %u\n", + (unsigned) kernel->group_segment_size); + HSA_DEBUG (" private_segment_size: %u\n", + (unsigned) kernel->private_segment_size); + HSA_DEBUG (" kernarg_segment_size: %u\n", + (unsigned) kernel->kernarg_segment_size); + HSA_DEBUG (" omp_data_size: %u\n", kernel->omp_data_size); + HSA_DEBUG (" gridified_kernel_p: %u\n", kernel->gridified_kernel_p); + + if (kernel->omp_data_size > *max_omp_data_size) + *max_omp_data_size = kernel->omp_data_size; + + for (unsigned i = 0; i < kernel->dependencies_count; i++) + { + struct kernel_info *dependency + = get_kernel_for_agent (agent, kernel->dependencies[i]); + + if (dependency == NULL) + { + HSA_DEBUG ("Could not find a dependency for a kernel: %s, " + "dependency name: %s\n", kernel->name, + kernel->dependencies[i]); + goto failure; + } + + if (dependency->dependencies_count > 0) + { + HSA_DEBUG ("HSA does not allow kernel dispatching code with " + "a depth bigger than one\n") + goto failure; + } + + init_single_kernel (dependency, max_omp_data_size); + } + + return; + +failure: + kernel->initialization_failed = true; +} + +/* Indent stream F by INDENT spaces. */ + +static void +indent_stream (FILE *f, unsigned indent) +{ + fprintf (f, "%*s", indent, ""); +} + +/* Dump kernel DISPATCH data structure and indent it by INDENT spaces. */ + +static void +print_kernel_dispatch (struct GOMP_hsa_kernel_dispatch *dispatch, unsigned indent) +{ + indent_stream (stderr, indent); + fprintf (stderr, "this: %p\n", dispatch); + indent_stream (stderr, indent); + fprintf (stderr, "queue: %p\n", dispatch->queue); + indent_stream (stderr, indent); + fprintf (stderr, "omp_data_memory: %p\n", dispatch->omp_data_memory); + indent_stream (stderr, indent); + fprintf (stderr, "kernarg_address: %p\n", dispatch->kernarg_address); + indent_stream (stderr, indent); + fprintf (stderr, "object: %lu\n", dispatch->object); + indent_stream (stderr, indent); + fprintf (stderr, "signal: %lu\n", dispatch->signal); + indent_stream (stderr, indent); + fprintf (stderr, "private_segment_size: %u\n", + dispatch->private_segment_size); + indent_stream (stderr, indent); + fprintf (stderr, "group_segment_size: %u\n", + dispatch->group_segment_size); + indent_stream (stderr, indent); + fprintf (stderr, "children dispatches: %lu\n", + dispatch->kernel_dispatch_count); + indent_stream (stderr, indent); + fprintf (stderr, "omp_num_threads: %u\n", + dispatch->omp_num_threads); + fprintf (stderr, "\n"); + + for (unsigned i = 0; i < dispatch->kernel_dispatch_count; i++) + print_kernel_dispatch (dispatch->children_dispatches[i], indent + 2); +} + +/* Create kernel dispatch data structure for a KERNEL and all its + dependencies. */ + +static struct GOMP_hsa_kernel_dispatch * +create_kernel_dispatch (struct kernel_info *kernel, unsigned omp_data_size) +{ + struct GOMP_hsa_kernel_dispatch *shadow + = create_single_kernel_dispatch (kernel, omp_data_size); + shadow->omp_num_threads = 64; + shadow->debug = 0; + shadow->omp_level = kernel->gridified_kernel_p ? 1 : 0; + + /* Create kernel dispatch data structures. We do not allow to have + a kernel dispatch with depth bigger than one. */ + for (unsigned i = 0; i < kernel->dependencies_count; i++) + { + struct kernel_info *dependency + = get_kernel_for_agent (kernel->agent, kernel->dependencies[i]); + shadow->children_dispatches[i] + = create_single_kernel_dispatch (dependency, omp_data_size); + shadow->children_dispatches[i]->queue + = kernel->agent->kernel_dispatch_command_q; + shadow->children_dispatches[i]->omp_level = 1; + } + + return shadow; +} + +/* Do all the work that is necessary before running KERNEL for the first time. + The function assumes the program has been created, finalized and frozen by + create_and_finalize_hsa_program. */ + +static void +init_kernel (struct kernel_info *kernel) +{ + if (pthread_mutex_lock (&kernel->init_mutex)) + GOMP_PLUGIN_fatal ("Could not lock an HSA kernel initialization mutex"); + if (kernel->initialized) + { + if (pthread_mutex_unlock (&kernel->init_mutex)) + GOMP_PLUGIN_fatal ("Could not unlock an HSA kernel initialization " + "mutex"); + + return; + } + + /* Precomputed maximum size of OMP data necessary for a kernel from kernel + dispatch operation. */ + init_single_kernel (kernel, &kernel->max_omp_data_size); + + if (!kernel->initialization_failed) + HSA_DEBUG ("\n"); + + kernel->initialized = true; + if (pthread_mutex_unlock (&kernel->init_mutex)) + GOMP_PLUGIN_fatal ("Could not unlock an HSA kernel initialization " + "mutex"); +} + +/* Parse the target attributes INPUT provided by the compiler and return true + if we should run anything all. If INPUT is NULL, fill DEF with default + values, then store INPUT or DEF into *RESULT. */ + +static bool +parse_target_attributes (void **input, + struct GOMP_kernel_launch_attributes *def, + struct GOMP_kernel_launch_attributes **result) +{ + if (!input) + GOMP_PLUGIN_fatal ("No target arguments provided"); + + bool attrs_found = false; + while (*input) + { + uintptr_t id = (uintptr_t) *input; + if ((id & GOMP_TARGET_ARG_DEVICE_MASK) == GOMP_DEVICE_HSA + && ((id & GOMP_TARGET_ARG_ID_MASK) + == GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES)) + { + input++; + attrs_found = true; + break; + } + + if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM) + input++; + input++; + } + + if (!attrs_found) + { + def->ndim = 1; + def->gdims[0] = 1; + def->gdims[1] = 1; + def->gdims[2] = 1; + def->wdims[0] = 1; + def->wdims[1] = 1; + def->wdims[2] = 1; + *result = def; + HSA_DEBUG ("GOMP_OFFLOAD_run called with no launch attributes\n"); + return true; + } + + struct GOMP_kernel_launch_attributes *kla; + kla = (struct GOMP_kernel_launch_attributes *) *input; + *result = kla; + if (kla->ndim != 1) + GOMP_PLUGIN_fatal ("HSA does not yet support number of dimensions " + "different from one."); + if (kla->gdims[0] == 0) + return false; + + HSA_DEBUG ("GOMP_OFFLOAD_run called with grid size %u and group size %u\n", + kla->gdims[0], kla->wdims[0]); + + return true; +} + +/* Return true if the HSA runtime can run function FN_PTR. */ + +bool +GOMP_OFFLOAD_can_run (void *fn_ptr) +{ + struct kernel_info *kernel = (struct kernel_info *) fn_ptr; + struct agent_info *agent = kernel->agent; + create_and_finalize_hsa_program (agent); + + if (agent->prog_finalized_error) + goto failure; + + init_kernel (kernel); + if (kernel->initialization_failed) + goto failure; + + return true; + +failure: + if (suppress_host_fallback) + GOMP_PLUGIN_fatal ("HSA host fallback has been suppressed"); + HSA_DEBUG ("HSA target cannot be launched, doing a host fallback\n"); + return false; +} + +/* Part of the libgomp plugin interface. Run a kernel on device N and pass it + an array of pointers in VARS as a parameter. The kernel is identified by + FN_PTR which must point to a kernel_info structure. */ + +void +GOMP_OFFLOAD_run (int n, void *fn_ptr, void *vars, void **args) +{ + struct kernel_info *kernel = (struct kernel_info *) fn_ptr; + struct agent_info *agent = kernel->agent; + struct GOMP_kernel_launch_attributes def; + struct GOMP_kernel_launch_attributes *kla; + if (!parse_target_attributes (args, &def, &kla)) + { + HSA_DEBUG ("Will not run HSA kernel because the grid size is zero\n"); + return; + } + if (pthread_rwlock_rdlock (&agent->modules_rwlock)) + GOMP_PLUGIN_fatal ("Unable to read-lock an HSA agent rwlock"); + + if (!agent->initialized) + GOMP_PLUGIN_fatal ("Agent must be initialized"); + + if (!kernel->initialized) + GOMP_PLUGIN_fatal ("Called kernel must be initialized"); + + struct GOMP_hsa_kernel_dispatch *shadow + = create_kernel_dispatch (kernel, kernel->max_omp_data_size); + + if (debug) + { + fprintf (stderr, "\nKernel has following dependencies:\n"); + print_kernel_dispatch (shadow, 2); + } + + uint64_t index = hsa_queue_add_write_index_release (agent->command_q, 1); + HSA_DEBUG ("Got AQL index %llu\n", (long long int) index); + + /* Wait until the queue is not full before writing the packet. */ + while (index - hsa_queue_load_read_index_acquire (agent->command_q) + >= agent->command_q->size) + ; + + hsa_kernel_dispatch_packet_t *packet; + packet = ((hsa_kernel_dispatch_packet_t *) agent->command_q->base_address) + + index % agent->command_q->size; + + memset (((uint8_t *) packet) + 4, 0, sizeof (*packet) - 4); + packet->setup |= (uint16_t) 1 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS; + packet->grid_size_x = kla->gdims[0]; + uint32_t wgs = kla->wdims[0]; + if (wgs == 0) + /* TODO: Provide a default via environment. */ + wgs = 64; + else if (wgs > kla->gdims[0]) + wgs = kla->gdims[0]; + packet->workgroup_size_x = wgs; + packet->grid_size_y = 1; + packet->workgroup_size_y = 1; + packet->grid_size_z = 1; + packet->workgroup_size_z = 1; + packet->private_segment_size = kernel->private_segment_size; + packet->group_segment_size = kernel->group_segment_size; + packet->kernel_object = kernel->object; + packet->kernarg_address = shadow->kernarg_address; + hsa_signal_t s; + s.handle = shadow->signal; + packet->completion_signal = s; + hsa_signal_store_relaxed (s, 1); + memcpy (shadow->kernarg_address, &vars, sizeof (vars)); + + memcpy (shadow->kernarg_address + sizeof (vars), &shadow, + sizeof (struct hsa_kernel_runtime *)); + + HSA_DEBUG ("Copying kernel runtime pointer to kernarg_address\n"); + + uint16_t header; + header = HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE; + header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE; + header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE; + + HSA_DEBUG ("Going to dispatch kernel %s\n", kernel->name); + + __atomic_store_n ((uint16_t *) (&packet->header), header, __ATOMIC_RELEASE); + hsa_signal_store_release (agent->command_q->doorbell_signal, index); + + /* TODO: GPU agents in Carrizo APUs cannot properly update L2 cache for + signal wait and signal load operations on their own and we need to + periodically call the hsa_signal_load_acquire on completion signals of + children kernels in the CPU to make that happen. As soon the + limitation will be resolved, this workaround can be removed. */ + + HSA_DEBUG ("Kernel dispatched, waiting for completion\n"); + + /* Root signal waits with 1ms timeout. */ + while (hsa_signal_wait_acquire (s, HSA_SIGNAL_CONDITION_LT, 1, 1000 * 1000, + HSA_WAIT_STATE_BLOCKED) != 0) + for (unsigned i = 0; i < shadow->kernel_dispatch_count; i++) + { + hsa_signal_t child_s; + child_s.handle = shadow->children_dispatches[i]->signal; + + HSA_DEBUG ("Waiting for children completion signal: %lu\n", + shadow->children_dispatches[i]->signal); + hsa_signal_load_acquire (child_s); + } + + release_kernel_dispatch (shadow); + + if (pthread_rwlock_unlock (&agent->modules_rwlock)) + GOMP_PLUGIN_fatal ("Unable to unlock an HSA agent rwlock"); +} + +/* Information to be passed to a thread running a kernel asycnronously. */ + +struct async_run_info +{ + int device; + void *tgt_fn; + void *tgt_vars; + void **args; + void *async_data; +}; + +/* Thread routine to run a kernel asynchronously. */ + +static void * +run_kernel_asynchronously (void *thread_arg) +{ + struct async_run_info *info = (struct async_run_info *) thread_arg; + int device = info->device; + void *tgt_fn = info->tgt_fn; + void *tgt_vars = info->tgt_vars; + void **args = info->args; + void *async_data = info->async_data; + + free (info); + GOMP_OFFLOAD_run (device, tgt_fn, tgt_vars, args); + GOMP_PLUGIN_target_task_completion (async_data); + return NULL; +} + +/* Part of the libgomp plugin interface. Run a kernel like GOMP_OFFLOAD_run + does, but asynchronously and call GOMP_PLUGIN_target_task_completion when it + has finished. */ + +void +GOMP_OFFLOAD_async_run (int device, void *tgt_fn, void *tgt_vars, + void **args, void *async_data) +{ + pthread_t pt; + struct async_run_info *info; + HSA_DEBUG ("GOMP_OFFLOAD_async_run invoked\n") + info = GOMP_PLUGIN_malloc (sizeof (struct async_run_info)); + + info->device = device; + info->tgt_fn = tgt_fn; + info->tgt_vars = tgt_vars; + info->args = args; + info->async_data = async_data; + + int err = pthread_create (&pt, NULL, &run_kernel_asynchronously, info); + if (err != 0) + GOMP_PLUGIN_fatal ("HSA asynchronous thread creation failed: %s", + strerror (err)); + err = pthread_detach (pt); + if (err != 0) + GOMP_PLUGIN_fatal ("Failed to detach a thread to run HSA kernel " + "asynchronously: %s", strerror (err)); +} + +/* Deinitialize all information associated with MODULE and kernels within + it. */ + +void +destroy_module (struct module_info *module) +{ + int i; + for (i = 0; i < module->kernel_count; i++) + if (pthread_mutex_destroy (&module->kernels[i].init_mutex)) + GOMP_PLUGIN_fatal ("Failed to destroy an HSA kernel initialization " + "mutex"); +} + +/* Part of the libgomp plugin interface. Unload BRIG module described by + struct brig_image_desc in TARGET_DATA from agent number N. */ + +void +GOMP_OFFLOAD_unload_image (int n, unsigned version, void *target_data) +{ + if (GOMP_VERSION_DEV (version) > GOMP_VERSION_HSA) + GOMP_PLUGIN_fatal ("Offload data incompatible with HSA plugin" + " (expected %u, received %u)", + GOMP_VERSION_HSA, GOMP_VERSION_DEV (version)); + + struct agent_info *agent; + agent = get_agent_info (n); + if (pthread_rwlock_wrlock (&agent->modules_rwlock)) + GOMP_PLUGIN_fatal ("Unable to write-lock an HSA agent rwlock"); + + struct module_info *module = agent->first_module; + while (module) + { + if (module->image_desc == target_data) + break; + module = module->next; + } + if (!module) + GOMP_PLUGIN_fatal ("Attempt to unload an image that has never been " + "loaded before"); + + remove_module_from_agent (agent, module); + destroy_module (module); + free (module); + destroy_hsa_program (agent); + if (pthread_rwlock_unlock (&agent->modules_rwlock)) + GOMP_PLUGIN_fatal ("Unable to unlock an HSA agent rwlock"); +} + +/* Part of the libgomp plugin interface. Deinitialize all information and + status associated with agent number N. We do not attempt any + synchronization, assuming the user and libgomp will not attempt + deinitialization of a device that is in any way being used at the same + time. */ + +void +GOMP_OFFLOAD_fini_device (int n) +{ + struct agent_info *agent = get_agent_info (n); + if (!agent->initialized) + return; + + struct module_info *next_module = agent->first_module; + while (next_module) + { + struct module_info *module = next_module; + next_module = module->next; + destroy_module (module); + free (module); + } + agent->first_module = NULL; + destroy_hsa_program (agent); + + release_agent_shared_libraries (agent); + + hsa_status_t status = hsa_queue_destroy (agent->command_q); + if (status != HSA_STATUS_SUCCESS) + hsa_fatal ("Error destroying command queue", status); + status = hsa_queue_destroy (agent->kernel_dispatch_command_q); + if (status != HSA_STATUS_SUCCESS) + hsa_fatal ("Error destroying kernel dispatch command queue", status); + if (pthread_mutex_destroy (&agent->prog_mutex)) + GOMP_PLUGIN_fatal ("Failed to destroy an HSA agent program mutex"); + if (pthread_rwlock_destroy (&agent->modules_rwlock)) + GOMP_PLUGIN_fatal ("Failed to destroy an HSA agent rwlock"); + agent->initialized = false; +} + +/* Part of the libgomp plugin interface. Not implemented as it is not required + for HSA. */ + +void * +GOMP_OFFLOAD_alloc (int ord, size_t size) +{ + GOMP_PLUGIN_fatal ("HSA GOMP_OFFLOAD_alloc is not implemented because " + "it should never be called"); +} + +/* Part of the libgomp plugin interface. Not implemented as it is not required + for HSA. */ + +void +GOMP_OFFLOAD_free (int ord, void *ptr) +{ + GOMP_PLUGIN_fatal ("HSA GOMP_OFFLOAD_free is not implemented because " + "it should never be called"); +} + +/* Part of the libgomp plugin interface. Not implemented as it is not required + for HSA. */ + +void * +GOMP_OFFLOAD_dev2host (int ord, void *dst, const void *src, size_t n) +{ + GOMP_PLUGIN_fatal ("HSA GOMP_OFFLOAD_dev2host is not implemented because " + "it should never be called"); +} + +/* Part of the libgomp plugin interface. Not implemented as it is not required + for HSA. */ + +void * +GOMP_OFFLOAD_host2dev (int ord, void *dst, const void *src, size_t n) +{ + GOMP_PLUGIN_fatal ("HSA GOMP_OFFLOAD_host2dev is not implemented because " + "it should never be called"); +} + +/* Part of the libgomp plugin interface. Not implemented as it is not required + for HSA. */ + +void * +GOMP_OFFLOAD_dev2dev (int ord, void *dst, const void *src, size_t n) +{ + GOMP_PLUGIN_fatal ("HSA GOMP_OFFLOAD_dev2dev is not implemented because " + "it should never be called"); +} diff --git a/libgomp/target.c b/libgomp/target.c index bea5822..f1f5849 100644 --- a/libgomp/target.c +++ b/libgomp/target.c @@ -1329,44 +1329,90 @@ gomp_target_fallback (void (*fn) (void *), void **hostaddrs) *thr = old_thr; } -/* Host fallback with firstprivate map-type handling. */ +/* Calculate alignment and size requirements of a private copy of data shared + as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */ -static void -gomp_target_fallback_firstprivate (void (*fn) (void *), size_t mapnum, - void **hostaddrs, size_t *sizes, - unsigned short *kinds) +static inline void +calculate_firstprivate_requirements (size_t mapnum, size_t *sizes, + unsigned short *kinds, size_t *tgt_align, + size_t *tgt_size) { - size_t i, tgt_align = 0, tgt_size = 0; - char *tgt = NULL; + size_t i; + for (i = 0; i < mapnum; i++) + if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE) + { + size_t align = (size_t) 1 << (kinds[i] >> 8); + if (*tgt_align < align) + *tgt_align = align; + *tgt_size = (*tgt_size + align - 1) & ~(align - 1); + *tgt_size += sizes[i]; + } +} + +/* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */ + +static inline void +copy_firstprivate_data (char *tgt, size_t mapnum, void **hostaddrs, + size_t *sizes, unsigned short *kinds, size_t tgt_align, + size_t tgt_size) +{ + uintptr_t al = (uintptr_t) tgt & (tgt_align - 1); + if (al) + tgt += tgt_align - al; + tgt_size = 0; + size_t i; for (i = 0; i < mapnum; i++) if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE) { size_t align = (size_t) 1 << (kinds[i] >> 8); - if (tgt_align < align) - tgt_align = align; tgt_size = (tgt_size + align - 1) & ~(align - 1); - tgt_size += sizes[i]; + memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]); + hostaddrs[i] = tgt + tgt_size; + tgt_size = tgt_size + sizes[i]; } +} + +/* Host fallback with firstprivate map-type handling. */ + +static void +gomp_target_fallback_firstprivate (void (*fn) (void *), size_t mapnum, + void **hostaddrs, size_t *sizes, + unsigned short *kinds) +{ + size_t tgt_align = 0, tgt_size = 0; + calculate_firstprivate_requirements (mapnum, sizes, kinds, &tgt_align, + &tgt_size); if (tgt_align) { - tgt = gomp_alloca (tgt_size + tgt_align - 1); - uintptr_t al = (uintptr_t) tgt & (tgt_align - 1); - if (al) - tgt += tgt_align - al; - tgt_size = 0; - for (i = 0; i < mapnum; i++) - if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE) - { - size_t align = (size_t) 1 << (kinds[i] >> 8); - tgt_size = (tgt_size + align - 1) & ~(align - 1); - memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]); - hostaddrs[i] = tgt + tgt_size; - tgt_size = tgt_size + sizes[i]; - } + char *tgt = gomp_alloca (tgt_size + tgt_align - 1); + copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds, tgt_align, + tgt_size); } gomp_target_fallback (fn, hostaddrs); } +/* Handle firstprivate map-type for shared memory devices and the host + fallback. Return the pointer of firstprivate copies which has to be freed + after use. */ + +static void * +gomp_target_unshare_firstprivate (size_t mapnum, void **hostaddrs, + size_t *sizes, unsigned short *kinds) +{ + size_t tgt_align = 0, tgt_size = 0; + char *tgt = NULL; + + calculate_firstprivate_requirements (mapnum, sizes, kinds, &tgt_align, + &tgt_size); + if (tgt_align) + { + tgt = gomp_malloc (tgt_size + tgt_align - 1); + copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds, tgt_align, + tgt_size); + } + return tgt; +} + /* Helper function of GOMP_target{,_ext} routines. */ static void * @@ -1390,7 +1436,12 @@ gomp_get_target_fn_addr (struct gomp_device_descr *devicep, splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k); gomp_mutex_unlock (&devicep->lock); if (tgt_fn == NULL) - gomp_fatal ("Target function wasn't mapped"); + { + if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) + return NULL; + else + gomp_fatal ("Target function wasn't mapped"); + } return (void *) tgt_fn->tgt_offset; } @@ -1416,13 +1467,16 @@ GOMP_target (int device, void (*fn) (void *), const void *unused, void *fn_addr; if (devicep == NULL || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) + /* All shared memory devices should use the GOMP_target_ext function. */ + || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM || !(fn_addr = gomp_get_target_fn_addr (devicep, fn))) return gomp_target_fallback (fn, hostaddrs); struct target_mem_desc *tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false, GOMP_MAP_VARS_TARGET); - devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start); + devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start, + NULL); gomp_unmap_vars (tgt_vars, true); } @@ -1430,6 +1484,15 @@ GOMP_target (int device, void (*fn) (void *), const void *unused, and several arguments have been added: FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h. DEPEND is array of dependencies, see GOMP_task for details. + + ARGS is a pointer to an array consisting of a variable number of both + device-independent and device-specific arguments, which can take one two + elements where the first specifies for which device it is intended, the type + and optionally also the value. If the value is not present in the first + one, the whole second element the actual value. The last element of the + array is a single NULL. Among the device independent can be for example + NUM_TEAMS and THREAD_LIMIT. + NUM_TEAMS is positive if GOMP_teams will be called in the body with that value, or 1 if teams construct is not present, or 0, if teams construct does not have num_teams clause and so the choice is @@ -1443,14 +1506,10 @@ GOMP_target (int device, void (*fn) (void *), const void *unused, void GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum, void **hostaddrs, size_t *sizes, unsigned short *kinds, - unsigned int flags, void **depend, int num_teams, - int thread_limit) + unsigned int flags, void **depend, void **args) { struct gomp_device_descr *devicep = resolve_device (device); - (void) num_teams; - (void) thread_limit; - if (flags & GOMP_TARGET_FLAG_NOWAIT) { struct gomp_thread *thr = gomp_thread (); @@ -1487,7 +1546,7 @@ GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum, && !thr->task->final_task) { gomp_create_target_task (devicep, fn, mapnum, hostaddrs, - sizes, kinds, flags, depend, + sizes, kinds, flags, depend, args, GOMP_TARGET_TASK_BEFORE_MAP); return; } @@ -1507,17 +1566,30 @@ GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum, void *fn_addr; if (devicep == NULL || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) - || !(fn_addr = gomp_get_target_fn_addr (devicep, fn))) + || !(fn_addr = gomp_get_target_fn_addr (devicep, fn)) + || (devicep->can_run_func && !devicep->can_run_func (fn_addr))) { gomp_target_fallback_firstprivate (fn, mapnum, hostaddrs, sizes, kinds); return; } - struct target_mem_desc *tgt_vars - = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true, - GOMP_MAP_VARS_TARGET); - devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start); - gomp_unmap_vars (tgt_vars, true); + struct target_mem_desc *tgt_vars; + void *fpc = NULL; + if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) + { + fpc = gomp_target_unshare_firstprivate (mapnum, hostaddrs, sizes, kinds); + tgt_vars = NULL; + } + else + tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, + true, GOMP_MAP_VARS_TARGET); + devicep->run_func (devicep->target_id, fn_addr, + tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs, + args); + if (tgt_vars) + gomp_unmap_vars (tgt_vars, true); + else + free (fpc); } /* Host fallback for GOMP_target_data{,_ext} routines. */ @@ -1547,7 +1619,8 @@ GOMP_target_data (int device, const void *unused, size_t mapnum, struct gomp_device_descr *devicep = resolve_device (device); if (devicep == NULL - || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) + || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) + || (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)) return gomp_target_data_fallback (); struct target_mem_desc *tgt @@ -1565,7 +1638,8 @@ GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs, struct gomp_device_descr *devicep = resolve_device (device); if (devicep == NULL - || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) + || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) + || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) return gomp_target_data_fallback (); struct target_mem_desc *tgt @@ -1595,7 +1669,8 @@ GOMP_target_update (int device, const void *unused, size_t mapnum, struct gomp_device_descr *devicep = resolve_device (device); if (devicep == NULL - || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) + || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) + || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) return; gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false); @@ -1626,7 +1701,7 @@ GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs, if (gomp_create_target_task (devicep, (void (*) (void *)) NULL, mapnum, hostaddrs, sizes, kinds, flags | GOMP_TARGET_FLAG_UPDATE, - depend, GOMP_TARGET_TASK_DATA)) + depend, NULL, GOMP_TARGET_TASK_DATA)) return; } else @@ -1646,7 +1721,8 @@ GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs, } if (devicep == NULL - || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) + || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) + || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) return; struct gomp_thread *thr = gomp_thread (); @@ -1756,7 +1832,7 @@ GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs, { if (gomp_create_target_task (devicep, (void (*) (void *)) NULL, mapnum, hostaddrs, sizes, kinds, - flags, depend, + flags, depend, NULL, GOMP_TARGET_TASK_DATA)) return; } @@ -1777,7 +1853,8 @@ GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs, } if (devicep == NULL - || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) + || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) + || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) return; struct gomp_thread *thr = gomp_thread (); @@ -1815,7 +1892,8 @@ gomp_target_task_fn (void *data) void *fn_addr; if (devicep == NULL || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) - || !(fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn))) + || !(fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn)) + || (devicep->can_run_func && !devicep->can_run_func (fn_addr))) { ttask->state = GOMP_TARGET_TASK_FALLBACK; gomp_target_fallback_firstprivate (ttask->fn, ttask->mapnum, @@ -1826,22 +1904,36 @@ gomp_target_task_fn (void *data) if (ttask->state == GOMP_TARGET_TASK_FINISHED) { - gomp_unmap_vars (ttask->tgt, true); + if (ttask->tgt) + gomp_unmap_vars (ttask->tgt, true); return false; } - ttask->tgt - = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs, NULL, - ttask->sizes, ttask->kinds, true, - GOMP_MAP_VARS_TARGET); + void *actual_arguments; + if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) + { + ttask->tgt = NULL; + ttask->firstprivate_copies + = gomp_target_unshare_firstprivate (ttask->mapnum, ttask->hostaddrs, + ttask->sizes, ttask->kinds); + actual_arguments = ttask->hostaddrs; + } + else + { + ttask->tgt = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs, + NULL, ttask->sizes, ttask->kinds, true, + GOMP_MAP_VARS_TARGET); + actual_arguments = (void *) ttask->tgt->tgt_start; + } ttask->state = GOMP_TARGET_TASK_READY_TO_RUN; - devicep->async_run_func (devicep->target_id, fn_addr, - (void *) ttask->tgt->tgt_start, (void *) ttask); + devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments, + ttask->args, (void *) ttask); return true; } else if (devicep == NULL - || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) + || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) + || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) return false; size_t i; @@ -1891,7 +1983,8 @@ omp_target_alloc (size_t size, int device_num) if (devicep == NULL) return NULL; - if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) + if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) + || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) return malloc (size); gomp_mutex_lock (&devicep->lock); @@ -1919,7 +2012,8 @@ omp_target_free (void *device_ptr, int device_num) if (devicep == NULL) return; - if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) + if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) + || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) { free (device_ptr); return; @@ -1946,7 +2040,8 @@ omp_target_is_present (void *ptr, int device_num) if (devicep == NULL) return 0; - if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) + if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) + || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) return 1; gomp_mutex_lock (&devicep->lock); @@ -1976,7 +2071,8 @@ omp_target_memcpy (void *dst, void *src, size_t length, size_t dst_offset, if (dst_devicep == NULL) return EINVAL; - if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) + if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) + || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) dst_devicep = NULL; } if (src_device_num != GOMP_DEVICE_HOST_FALLBACK) @@ -1988,7 +2084,8 @@ omp_target_memcpy (void *dst, void *src, size_t length, size_t dst_offset, if (src_devicep == NULL) return EINVAL; - if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) + if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) + || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) src_devicep = NULL; } if (src_devicep == NULL && dst_devicep == NULL) @@ -2118,7 +2215,8 @@ omp_target_memcpy_rect (void *dst, void *src, size_t element_size, if (dst_devicep == NULL) return EINVAL; - if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) + if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) + || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) dst_devicep = NULL; } if (src_device_num != GOMP_DEVICE_HOST_FALLBACK) @@ -2130,7 +2228,8 @@ omp_target_memcpy_rect (void *dst, void *src, size_t element_size, if (src_devicep == NULL) return EINVAL; - if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) + if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) + || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) src_devicep = NULL; } @@ -2166,7 +2265,8 @@ omp_target_associate_ptr (void *host_ptr, void *device_ptr, size_t size, if (devicep == NULL) return EINVAL; - if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) + if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) + || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) return EINVAL; gomp_mutex_lock (&devicep->lock); @@ -2309,6 +2409,7 @@ gomp_load_plugin_for_device (struct gomp_device_descr *device, { DLSYM (run); DLSYM (async_run); + DLSYM_OPT (can_run, can_run); DLSYM (dev2dev); } if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200) diff --git a/libgomp/task.c b/libgomp/task.c index b18b6e2..0f45c44 100644 --- a/libgomp/task.c +++ b/libgomp/task.c @@ -582,6 +582,7 @@ GOMP_PLUGIN_target_task_completion (void *data) return; } ttask->state = GOMP_TARGET_TASK_FINISHED; + free (ttask->firstprivate_copies); gomp_target_task_completion (team, task); gomp_mutex_unlock (&team->task_lock); } @@ -594,7 +595,7 @@ bool gomp_create_target_task (struct gomp_device_descr *devicep, void (*fn) (void *), size_t mapnum, void **hostaddrs, size_t *sizes, unsigned short *kinds, - unsigned int flags, void **depend, + unsigned int flags, void **depend, void **args, enum gomp_target_task_state state) { struct gomp_thread *thr = gomp_thread (); @@ -654,6 +655,7 @@ gomp_create_target_task (struct gomp_device_descr *devicep, ttask->devicep = devicep; ttask->fn = fn; ttask->mapnum = mapnum; + ttask->args = args; memcpy (ttask->hostaddrs, hostaddrs, mapnum * sizeof (void *)); ttask->sizes = (size_t *) &ttask->hostaddrs[mapnum]; memcpy (ttask->sizes, sizes, mapnum * sizeof (size_t)); diff --git a/libgomp/testsuite/Makefile.in b/libgomp/testsuite/Makefile.in index c25d21f..1fae9e8 100644 --- a/libgomp/testsuite/Makefile.in +++ b/libgomp/testsuite/Makefile.in @@ -111,6 +111,8 @@ FC = @FC@ FCFLAGS = @FCFLAGS@ FGREP = @FGREP@ GREP = @GREP@ +HSA_RUNTIME_INCLUDE = @HSA_RUNTIME_INCLUDE@ +HSA_RUNTIME_LIB = @HSA_RUNTIME_LIB@ INSTALL = @INSTALL@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ @@ -155,6 +157,10 @@ PACKAGE_URL = @PACKAGE_URL@ PACKAGE_VERSION = @PACKAGE_VERSION@ PATH_SEPARATOR = @PATH_SEPARATOR@ PERL = @PERL@ +PLUGIN_HSA = @PLUGIN_HSA@ +PLUGIN_HSA_CPPFLAGS = @PLUGIN_HSA_CPPFLAGS@ +PLUGIN_HSA_LDFLAGS = @PLUGIN_HSA_LDFLAGS@ +PLUGIN_HSA_LIBS = @PLUGIN_HSA_LIBS@ PLUGIN_NVPTX = @PLUGIN_NVPTX@ PLUGIN_NVPTX_CPPFLAGS = @PLUGIN_NVPTX_CPPFLAGS@ PLUGIN_NVPTX_LDFLAGS = @PLUGIN_NVPTX_LDFLAGS@ diff --git a/liboffloadmic/ChangeLog b/liboffloadmic/ChangeLog index c6fd49f..a4dc72e 100644 --- a/liboffloadmic/ChangeLog +++ b/liboffloadmic/ChangeLog @@ -1,3 +1,8 @@ +2016-01-19 Martin Jambor <mjambor@suse.cz> + * plugin/libgomp-plugin-intelmic.cpp (GOMP_OFFLOAD_async_run): New + unused parameter. + (GOMP_OFFLOAD_run): Likewise. + 2015-12-14 Ilya Verbin <ilya.verbin@intel.com> * plugin/libgomp-plugin-intelmic.cpp (unregister_main_image): Remove. diff --git a/liboffloadmic/plugin/libgomp-plugin-intelmic.cpp b/liboffloadmic/plugin/libgomp-plugin-intelmic.cpp index 68f7b2c..58ef595 100644 --- a/liboffloadmic/plugin/libgomp-plugin-intelmic.cpp +++ b/liboffloadmic/plugin/libgomp-plugin-intelmic.cpp @@ -528,7 +528,7 @@ GOMP_OFFLOAD_dev2dev (int device, void *dst_ptr, const void *src_ptr, extern "C" void GOMP_OFFLOAD_async_run (int device, void *tgt_fn, void *tgt_vars, - void *async_data) + void **, void *async_data) { TRACE ("(device = %d, tgt_fn = %p, tgt_vars = %p, async_data = %p)", device, tgt_fn, tgt_vars, async_data); @@ -544,7 +544,7 @@ GOMP_OFFLOAD_async_run (int device, void *tgt_fn, void *tgt_vars, } extern "C" void -GOMP_OFFLOAD_run (int device, void *tgt_fn, void *tgt_vars) +GOMP_OFFLOAD_run (int device, void *tgt_fn, void *tgt_vars, void **) { TRACE ("(device = %d, tgt_fn = %p, tgt_vars = %p)", device, tgt_fn, tgt_vars); |