diff options
Diffstat (limited to 'gcc')
55 files changed, 145 insertions, 16790 deletions
diff --git a/gcc/Makefile.in b/gcc/Makefile.in index 7251c00..b77aa3b 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -933,8 +933,7 @@ FIXED_VALUE_H = fixed-value.h RTL_H = $(RTL_BASE_H) $(FLAGS_H) genrtl.h READ_MD_H = $(OBSTACK_H) $(HASHTAB_H) read-md.h BUILTINS_DEF = builtins.def sync-builtins.def omp-builtins.def \ - gtm-builtins.def sanitizer.def \ - hsa-builtins.def + gtm-builtins.def sanitizer.def INTERNAL_FN_DEF = internal-fn.def INTERNAL_FN_H = internal-fn.h $(INTERNAL_FN_DEF) TREE_CORE_H = tree-core.h $(CORETYPES_H) all-tree.def tree.def \ @@ -1395,11 +1394,6 @@ OBJS = \ haifa-sched.o \ hash-map-tests.o \ hash-set-tests.o \ - hsa-common.o \ - hsa-gen.o \ - hsa-regalloc.o \ - hsa-brig.o \ - hsa-dump.o \ hw-doloop.o \ hwint.o \ ifcvt.o \ @@ -1427,7 +1421,6 @@ OBJS = \ ipa-icf.o \ ipa-icf-gimple.o \ ipa-reference.o \ - ipa-hsa.o \ ipa-ref.o \ ipa-utils.o \ ipa.o \ @@ -1471,7 +1464,6 @@ OBJS = \ omp-offload.o \ omp-expand.o \ omp-general.o \ - omp-grid.o \ omp-low.o \ omp-simd-clone.o \ opt-problem.o \ @@ -2619,7 +2611,6 @@ GTFILES = $(CPPLIB_H) $(srcdir)/input.h $(srcdir)/coretypes.h \ $(srcdir)/tree-profile.c $(srcdir)/tree-nested.c \ $(srcdir)/omp-offload.h \ $(srcdir)/omp-offload.c \ - $(srcdir)/omp-expand.c \ $(srcdir)/omp-general.c \ $(srcdir)/omp-low.c \ $(srcdir)/targhooks.c $(out_file) $(srcdir)/passes.c $(srcdir)/cgraphunit.c \ @@ -2643,7 +2634,6 @@ GTFILES = $(CPPLIB_H) $(srcdir)/input.h $(srcdir)/coretypes.h \ $(srcdir)/sancov.c \ $(srcdir)/ipa-devirt.c \ $(srcdir)/internal-fn.h \ - $(srcdir)/hsa-common.c \ $(srcdir)/calls.c \ $(srcdir)/omp-general.h \ @all_gtfiles@ diff --git a/gcc/brig/brigfrontend/brig-util.cc b/gcc/brig/brigfrontend/brig-util.cc index ad803a2..a10f5e6 100644 --- a/gcc/brig/brigfrontend/brig-util.cc +++ b/gcc/brig/brigfrontend/brig-util.cc @@ -563,3 +563,12 @@ gccbrig_print_reg_use_info (FILE *dump, const regs_use_index &info) } } } + +/* Return true if TYPE is a packed HSA type. */ + +bool +hsa_type_packed_p (BrigType16_t type) +{ + return (type & BRIG_TYPE_PACK_MASK) != BRIG_TYPE_PACK_NONE; +} + diff --git a/gcc/brig/brigfrontend/brig-util.h b/gcc/brig/brigfrontend/brig-util.h index 0d7123a..b9f5232 100644 --- a/gcc/brig/brigfrontend/brig-util.h +++ b/gcc/brig/brigfrontend/brig-util.h @@ -115,4 +115,6 @@ gccbrig_type_vector_subparts (const_tree type) return TYPE_VECTOR_SUBPARTS (type).to_constant (); } +bool hsa_type_packed_p (BrigType16_t type); + #endif diff --git a/gcc/hsa-brig-format.h b/gcc/brig/brigfrontend/hsa-brig-format.h index e16f469..e16f469 100644 --- a/gcc/hsa-brig-format.h +++ b/gcc/brig/brigfrontend/hsa-brig-format.h diff --git a/gcc/builtins.def b/gcc/builtins.def index ee67ac1..102322b 100644 --- a/gcc/builtins.def +++ b/gcc/builtins.def @@ -222,19 +222,6 @@ along with GCC; see the file COPYING3. If not see || flag_tree_parallelize_loops > 1 \ || flag_offload_abi != OFFLOAD_ABI_UNSET)) -#undef DEF_HSA_BUILTIN -#ifdef ENABLE_HSA -#define DEF_HSA_BUILTIN(ENUM, NAME, TYPE, ATTRS) \ - DEF_BUILTIN (ENUM, "__builtin_" NAME, BUILT_IN_NORMAL, TYPE, TYPE, \ - false, false, true, ATTRS, false, \ - (!flag_disable_hsa)) -#else -#define DEF_HSA_BUILTIN(ENUM, NAME, TYPE, ATTRS) \ - DEF_BUILTIN (ENUM, "__builtin_" NAME, BUILT_IN_NORMAL, TYPE, TYPE, \ - false, false, true, ATTRS, false, \ - (false)) -#endif - /* Builtin used by the implementation of GNU TM. These functions are mapped to the actual implementation of the STM library. */ #undef DEF_TM_BUILTIN @@ -1063,9 +1050,6 @@ DEF_GCC_BUILTIN (BUILT_IN_LINE, "LINE", BT_FN_INT, ATTR_NOTHROW_LEAF_LIST) /* Offloading and Multi Processing builtins. */ #include "omp-builtins.def" -/* Heterogeneous Systems Architecture. */ -#include "hsa-builtins.def" - /* GTM builtins. */ #include "gtm-builtins.def" diff --git a/gcc/common.opt b/gcc/common.opt index 47f4261..c16d1fa 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -228,10 +228,6 @@ unsigned int flag_sanitize_coverage Variable bool dump_base_name_prefixed = false -; Flag whether HSA generation has been explicitely disabled -Variable -bool flag_disable_hsa = false - ### Driver @@ -619,8 +615,8 @@ Common Var(warn_free_nonheap_object) Init(1) Warning Warn when attempting to free a non-heap object. Whsa -Common Var(warn_hsa) Init(1) Warning -Warn when a function cannot be expanded to HSAIL. +Common Ignore Warning +Does nothing. Preserved for backward compatibility. Wimplicit-fallthrough Common Alias(Wimplicit-fallthrough=,3,0) Warning diff --git a/gcc/config.in b/gcc/config.in index 364eba4..478e74f 100644 --- a/gcc/config.in +++ b/gcc/config.in @@ -181,12 +181,6 @@ #endif -/* Define this to enable support for generating HSAIL. */ -#ifndef USED_FOR_TARGET -#undef ENABLE_HSA -#endif - - /* Define if gcc should always pass --build-id to linker. */ #ifndef USED_FOR_TARGET #undef ENABLE_LD_BUILDID diff --git a/gcc/configure b/gcc/configure index 21ce66d..0f7a8db 100755 --- a/gcc/configure +++ b/gcc/configure @@ -7948,30 +7948,26 @@ fi for tgt in `echo $enable_offload_targets | sed 's/,/ /g'`; do tgt=`echo $tgt | sed 's/=.*//'` - if echo "$tgt" | grep "^hsa" > /dev/null ; then - enable_hsa=1 - else - enable_offloading=1 - case "$tgt" in - *-intelmic-* | *-intelmicemul-*) - omp_device_property=omp-device-properties-i386 - omp_device_property_tmake_file="${omp_device_property_tmake_file} \$(srcdir)/config/i386/t-omp-device" - ;; - amdgcn*) - omp_device_property=omp-device-properties-gcn - omp_device_property_tmake_file="${omp_device_property_tmake_file} \$(srcdir)/config/gcn/t-omp-device" - ;; - nvptx*) - omp_device_property=omp-device-properties-nvptx - omp_device_property_tmake_file="${omp_device_property_tmake_file} \$(srcdir)/config/nvptx/t-omp-device" - ;; - *) - as_fn_error $? "unknown offload target specified" "$LINENO" 5 - ;; - esac - omp_device_properties="${omp_device_properties} ${tgt}=${omp_device_property}" - omp_device_property_deps="${omp_device_property_deps} ${omp_device_property}" - fi + enable_offloading=1 + case "$tgt" in + *-intelmic-* | *-intelmicemul-*) + omp_device_property=omp-device-properties-i386 + omp_device_property_tmake_file="${omp_device_property_tmake_file} \$(srcdir)/config/i386/t-omp-device" + ;; + amdgcn*) + omp_device_property=omp-device-properties-gcn + omp_device_property_tmake_file="${omp_device_property_tmake_file} \$(srcdir)/config/gcn/t-omp-device" + ;; + nvptx*) + omp_device_property=omp-device-properties-nvptx + omp_device_property_tmake_file="${omp_device_property_tmake_file} \$(srcdir)/config/nvptx/t-omp-device" + ;; + *) + as_fn_error $? "unknown offload target specified" "$LINENO" 5 + ;; + esac + omp_device_properties="${omp_device_properties} ${tgt}=${omp_device_property}" + omp_device_property_deps="${omp_device_property_deps} ${omp_device_property}" if test x"$offload_targets" = x; then offload_targets=$tgt @@ -7997,12 +7993,6 @@ $as_echo "#define ENABLE_OFFLOADING 0" >>confdefs.h fi -if test x"$enable_hsa" = x1 ; then - -$as_echo "#define ENABLE_HSA 1" >>confdefs.h - -fi - # Check whether --with-multilib-list was given. if test "${with_multilib_list+set}" = set; then : @@ -19023,7 +19013,7 @@ else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <<_LT_EOF -#line 19026 "configure" +#line 19016 "configure" #include "confdefs.h" #if HAVE_DLFCN_H @@ -19129,7 +19119,7 @@ else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <<_LT_EOF -#line 19132 "configure" +#line 19122 "configure" #include "confdefs.h" #if HAVE_DLFCN_H diff --git a/gcc/configure.ac b/gcc/configure.ac index a7c683b..0f11238 100644 --- a/gcc/configure.ac +++ b/gcc/configure.ac @@ -1057,30 +1057,26 @@ AC_SUBST(accel_dir_suffix) for tgt in `echo $enable_offload_targets | sed 's/,/ /g'`; do tgt=`echo $tgt | sed 's/=.*//'` - if echo "$tgt" | grep "^hsa" > /dev/null ; then - enable_hsa=1 - else - enable_offloading=1 - case "$tgt" in - *-intelmic-* | *-intelmicemul-*) - omp_device_property=omp-device-properties-i386 - omp_device_property_tmake_file="${omp_device_property_tmake_file} \$(srcdir)/config/i386/t-omp-device" - ;; - amdgcn*) - omp_device_property=omp-device-properties-gcn - omp_device_property_tmake_file="${omp_device_property_tmake_file} \$(srcdir)/config/gcn/t-omp-device" - ;; - nvptx*) - omp_device_property=omp-device-properties-nvptx - omp_device_property_tmake_file="${omp_device_property_tmake_file} \$(srcdir)/config/nvptx/t-omp-device" - ;; - *) - AC_MSG_ERROR([unknown offload target specified]) - ;; - esac - omp_device_properties="${omp_device_properties} ${tgt}=${omp_device_property}" - omp_device_property_deps="${omp_device_property_deps} ${omp_device_property}" - fi + enable_offloading=1 + case "$tgt" in + *-intelmic-* | *-intelmicemul-*) + omp_device_property=omp-device-properties-i386 + omp_device_property_tmake_file="${omp_device_property_tmake_file} \$(srcdir)/config/i386/t-omp-device" + ;; + amdgcn*) + omp_device_property=omp-device-properties-gcn + omp_device_property_tmake_file="${omp_device_property_tmake_file} \$(srcdir)/config/gcn/t-omp-device" + ;; + nvptx*) + omp_device_property=omp-device-properties-nvptx + omp_device_property_tmake_file="${omp_device_property_tmake_file} \$(srcdir)/config/nvptx/t-omp-device" + ;; + *) + AC_MSG_ERROR([unknown offload target specified]) + ;; + esac + omp_device_properties="${omp_device_properties} ${tgt}=${omp_device_property}" + omp_device_property_deps="${omp_device_property_deps} ${omp_device_property}" if test x"$offload_targets" = x; then offload_targets=$tgt @@ -1101,11 +1097,6 @@ else [Define this to enable support for offloading.]) fi -if test x"$enable_hsa" = x1 ; then - AC_DEFINE(ENABLE_HSA, 1, - [Define this to enable support for generating HSAIL.]) -fi - AC_ARG_WITH(multilib-list, [AS_HELP_STRING([--with-multilib-list], [select multilibs (AArch64, SH and x86-64 only)])], :, diff --git a/gcc/doc/install.texi b/gcc/doc/install.texi index c367f4f..e1ca876 100644 --- a/gcc/doc/install.texi +++ b/gcc/doc/install.texi @@ -2194,22 +2194,18 @@ specifying paths @var{path1}, @dots{}, @var{pathN}. @smallexample % @var{srcdir}/configure \ - --enable-offload-targets=x86_64-intelmicemul-linux-gnu=/path/to/x86_64/compiler,nvptx-none,hsa + --enable-offload-targets=x86_64-intelmicemul-linux-gnu=/path/to/x86_64/compiler,nvptx-none @end smallexample -If @samp{hsa} is specified as one of the targets, the compiler will be -built with support for HSA GPU accelerators. Because the same -compiler will emit the accelerator code, no path should be specified. - @item --with-hsa-runtime=@var{pathname} @itemx --with-hsa-runtime-include=@var{pathname} @itemx --with-hsa-runtime-lib=@var{pathname} -If you configure GCC with HSA offloading but do not have the HSA -run-time library installed in a standard location then you can -explicitly specify the directory where they are installed. The -@option{--with-hsa-runtime=@/@var{hsainstalldir}} option is a -shorthand for +If you configure GCC with offloading which uses an HSA run-time such as +AMDGCN but do not have the HSA run-time library installed in a standard +location then you can explicitly specify the directory where they are +installed. The @option{--with-hsa-runtime=@/@var{hsainstalldir}} option +is a shorthand for @option{--with-hsa-runtime-lib=@/@var{hsainstalldir}/lib} and @option{--with-hsa-runtime-include=@/@var{hsainstalldir}/include}. diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index eb73f30..003b998 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -332,7 +332,7 @@ Objective-C and Objective-C++ Dialects}. -Wformat-security -Wformat-signedness -Wformat-truncation=@var{n} @gol -Wformat-y2k -Wframe-address @gol -Wframe-larger-than=@var{byte-size} -Wno-free-nonheap-object @gol --Wno-hsa -Wno-if-not-aligned -Wno-ignored-attributes @gol +-Wno-if-not-aligned -Wno-ignored-attributes @gol -Wignored-qualifiers -Wno-incompatible-pointer-types @gol -Wimplicit -Wimplicit-fallthrough -Wimplicit-fallthrough=@var{n} @gol -Wno-implicit-function-declaration -Wno-implicit-int @gol @@ -8591,12 +8591,6 @@ Suppress warnings when a positional initializer is used to initialize a structure that has been marked with the @code{designated_init} attribute. -@item -Wno-hsa -@opindex Whsa -@opindex Wno-hsa -Do not warn when HSAIL cannot be emitted for the compiled function or -OpenMP construct. These warnings are enabled by default. - @end table @node Static Analyzer Options @@ -13393,12 +13387,6 @@ Maximum depth of recursion when querying properties of SSA names in things like fold routines. One level of recursion corresponds to following a use-def chain. -@item hsa-gen-debug-stores -Enable emission of special debug stores within HSA kernels which are -then read and reported by libgomp plugin. Generation of these stores -is disabled by default, use @option{--param hsa-gen-debug-stores=1} to -enable it. - @item max-speculative-devirt-maydefs The maximum number of may-defs we analyze when looking for a must-def specifying the dynamic type of an object that invokes a virtual call diff --git a/gcc/doc/passes.texi b/gcc/doc/passes.texi index 7424690..a5ae414 100644 --- a/gcc/doc/passes.texi +++ b/gcc/doc/passes.texi @@ -360,13 +360,6 @@ target doesn't support constructors and destructors natively. The pass is located in @file{ipa.c} and is described by @code{pass_ipa_cdtor_merge}. -@item IPA HSA - -This pass is part of the GCC support for HSA (Heterogeneous System -Architecture) accelerators. It is responsible for creation of HSA -clones and emitting HSAIL instructions for them. It is located in -@file{ipa-hsa.c} and is described by @code{pass_ipa_hsa}. - @item IPA function summary This pass provides function analysis for inter-procedural passes. diff --git a/gcc/fortran/f95-lang.c b/gcc/fortran/f95-lang.c index da8c35f..e3288d7 100644 --- a/gcc/fortran/f95-lang.c +++ b/gcc/fortran/f95-lang.c @@ -1238,17 +1238,6 @@ gfc_init_builtin_functions (void) #undef DEF_GOMP_BUILTIN } -#ifdef ENABLE_HSA - if (!flag_disable_hsa) - { -#undef DEF_HSA_BUILTIN -#define DEF_HSA_BUILTIN(code, name, type, attr) \ - gfc_define_builtin ("__builtin_" name, builtin_types[type], \ - code, name, attr); -#include "../hsa-builtins.def" - } -#endif - gfc_define_builtin ("__builtin_trap", builtin_types[BT_FN_VOID], BUILT_IN_TRAP, NULL, ATTR_NOTHROW_LEAF_LIST); TREE_THIS_VOLATILE (builtin_decl_explicit (BUILT_IN_TRAP)) = 1; diff --git a/gcc/gimple-low.c b/gcc/gimple-low.c index dcfa4be..e744d2a 100644 --- a/gcc/gimple-low.c +++ b/gcc/gimple-low.c @@ -393,7 +393,6 @@ lower_stmt (gimple_stmt_iterator *gsi, struct lower_data *data) case GIMPLE_OMP_TASK: case GIMPLE_OMP_TARGET: case GIMPLE_OMP_TEAMS: - case GIMPLE_OMP_GRID_BODY: data->cannot_fallthru = false; lower_omp_directive (gsi, data); data->cannot_fallthru = false; diff --git a/gcc/gimple-pretty-print.c b/gcc/gimple-pretty-print.c index e05b770..a01bf90 100644 --- a/gcc/gimple-pretty-print.c +++ b/gcc/gimple-pretty-print.c @@ -1498,9 +1498,6 @@ dump_gimple_omp_for (pretty_printer *buffer, const gomp_for *gs, int spc, case GF_OMP_FOR_KIND_SIMD: pp_string (buffer, "#pragma omp simd"); break; - case GF_OMP_FOR_KIND_GRID_LOOP: - pp_string (buffer, "#pragma omp for grid_loop"); - break; default: gcc_unreachable (); } @@ -1836,9 +1833,6 @@ dump_gimple_omp_block (pretty_printer *buffer, const gimple *gs, int spc, case GIMPLE_OMP_SECTION: pp_string (buffer, "#pragma omp section"); break; - case GIMPLE_OMP_GRID_BODY: - pp_string (buffer, "#pragma omp gridified body"); - break; default: gcc_unreachable (); } @@ -2703,7 +2697,6 @@ pp_gimple_stmt_1 (pretty_printer *buffer, const gimple *gs, int spc, case GIMPLE_OMP_MASTER: case GIMPLE_OMP_SECTION: - case GIMPLE_OMP_GRID_BODY: dump_gimple_omp_block (buffer, gs, spc, flags); break; diff --git a/gcc/gimple-walk.c b/gcc/gimple-walk.c index 9a761f3..a1ae3aa 100644 --- a/gcc/gimple-walk.c +++ b/gcc/gimple-walk.c @@ -668,7 +668,6 @@ walk_gimple_stmt (gimple_stmt_iterator *gsi, walk_stmt_fn callback_stmt, case GIMPLE_OMP_SINGLE: case GIMPLE_OMP_TARGET: case GIMPLE_OMP_TEAMS: - case GIMPLE_OMP_GRID_BODY: ret = walk_gimple_seq_mod (gimple_omp_body_ptr (stmt), callback_stmt, callback_op, wi); if (ret) diff --git a/gcc/gimple.c b/gcc/gimple.c index 10c562f..41f7cf3 100644 --- a/gcc/gimple.c +++ b/gcc/gimple.c @@ -1035,20 +1035,6 @@ gimple_build_omp_master (gimple_seq body) return p; } -/* Build a GIMPLE_OMP_GRID_BODY statement. - - BODY is the sequence of statements to be executed by the kernel. */ - -gimple * -gimple_build_omp_grid_body (gimple_seq body) -{ - gimple *p = gimple_alloc (GIMPLE_OMP_GRID_BODY, 0); - if (body) - gimple_omp_set_body (p, body); - - return p; -} - /* Build a GIMPLE_OMP_TASKGROUP statement. BODY is the sequence of statements to be executed by the taskgroup @@ -2018,7 +2004,6 @@ gimple_copy (gimple *stmt) case GIMPLE_OMP_SECTION: case GIMPLE_OMP_MASTER: - case GIMPLE_OMP_GRID_BODY: copy_omp_body: new_seq = gimple_seq_copy (gimple_omp_body (stmt)); gimple_omp_set_body (copy, new_seq); diff --git a/gcc/gimple.def b/gcc/gimple.def index 075f7f4..4aa6f3d 100644 --- a/gcc/gimple.def +++ b/gcc/gimple.def @@ -384,10 +384,6 @@ DEFGSCODE(GIMPLE_OMP_TEAMS, "gimple_omp_teams", GSS_OMP_PARALLEL_LAYOUT) CLAUSES is an OMP_CLAUSE chain holding the associated clauses. */ DEFGSCODE(GIMPLE_OMP_ORDERED, "gimple_omp_ordered", GSS_OMP_SINGLE_LAYOUT) -/* GIMPLE_OMP_GRID_BODY <BODY> represents a parallel loop lowered for execution - on a GPU. It is an artificial statement created by omp lowering. */ -DEFGSCODE(GIMPLE_OMP_GRID_BODY, "gimple_omp_gpukernel", GSS_OMP) - /* GIMPLE_PREDICT <PREDICT, OUTCOME> specifies a hint for branch prediction. PREDICT is one of the predictors from predict.def. diff --git a/gcc/gimple.h b/gcc/gimple.h index d64c47a..6f7c7ff 100644 --- a/gcc/gimple.h +++ b/gcc/gimple.h @@ -150,7 +150,6 @@ enum gf_mask { GF_CALL_BY_DESCRIPTOR = 1 << 10, GF_CALL_NOCF_CHECK = 1 << 11, GF_OMP_PARALLEL_COMBINED = 1 << 0, - GF_OMP_PARALLEL_GRID_PHONY = 1 << 1, GF_OMP_TASK_TASKLOOP = 1 << 0, GF_OMP_TASK_TASKWAIT = 1 << 1, GF_OMP_FOR_KIND_MASK = (1 << 3) - 1, @@ -158,17 +157,9 @@ enum gf_mask { GF_OMP_FOR_KIND_DISTRIBUTE = 1, GF_OMP_FOR_KIND_TASKLOOP = 2, GF_OMP_FOR_KIND_OACC_LOOP = 4, - GF_OMP_FOR_KIND_GRID_LOOP = 5, - GF_OMP_FOR_KIND_SIMD = 6, + GF_OMP_FOR_KIND_SIMD = 5, GF_OMP_FOR_COMBINED = 1 << 3, GF_OMP_FOR_COMBINED_INTO = 1 << 4, - /* The following flag must not be used on GF_OMP_FOR_KIND_GRID_LOOP loop - statements. */ - GF_OMP_FOR_GRID_PHONY = 1 << 5, - /* The following two flags should only be set on GF_OMP_FOR_KIND_GRID_LOOP - loop statements. */ - GF_OMP_FOR_GRID_INTRA_GROUP = 1 << 5, - GF_OMP_FOR_GRID_GROUP_ITER = 1 << 6, GF_OMP_TARGET_KIND_MASK = (1 << 4) - 1, GF_OMP_TARGET_KIND_REGION = 0, GF_OMP_TARGET_KIND_DATA = 1, @@ -183,8 +174,7 @@ enum gf_mask { GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA = 10, GF_OMP_TARGET_KIND_OACC_DECLARE = 11, GF_OMP_TARGET_KIND_OACC_HOST_DATA = 12, - GF_OMP_TEAMS_GRID_PHONY = 1 << 0, - GF_OMP_TEAMS_HOST = 1 << 1, + GF_OMP_TEAMS_HOST = 1 << 0, /* True on an GIMPLE_OMP_RETURN statement if the return does not require a thread synchronization via some sort of barrier. The exact barrier @@ -1559,7 +1549,6 @@ gomp_task *gimple_build_omp_task (gimple_seq, tree, tree, tree, tree, tree, tree); gimple *gimple_build_omp_section (gimple_seq); gimple *gimple_build_omp_master (gimple_seq); -gimple *gimple_build_omp_grid_body (gimple_seq); gimple *gimple_build_omp_taskgroup (gimple_seq, tree); gomp_continue *gimple_build_omp_continue (tree, tree); gomp_ordered *gimple_build_omp_ordered (gimple_seq, tree); @@ -1830,7 +1819,6 @@ gimple_has_substatements (gimple *g) case GIMPLE_OMP_CRITICAL: case GIMPLE_WITH_CLEANUP_EXPR: case GIMPLE_TRANSACTION: - case GIMPLE_OMP_GRID_BODY: return true; default: @@ -5440,76 +5428,6 @@ gimple_omp_for_set_pre_body (gimple *gs, gimple_seq pre_body) omp_for_stmt->pre_body = pre_body; } -/* Return the kernel_phony of OMP_FOR statement. */ - -static inline bool -gimple_omp_for_grid_phony (const gomp_for *omp_for) -{ - gcc_checking_assert (gimple_omp_for_kind (omp_for) - != GF_OMP_FOR_KIND_GRID_LOOP); - return (gimple_omp_subcode (omp_for) & GF_OMP_FOR_GRID_PHONY) != 0; -} - -/* Set kernel_phony flag of OMP_FOR to VALUE. */ - -static inline void -gimple_omp_for_set_grid_phony (gomp_for *omp_for, bool value) -{ - gcc_checking_assert (gimple_omp_for_kind (omp_for) - != GF_OMP_FOR_KIND_GRID_LOOP); - if (value) - omp_for->subcode |= GF_OMP_FOR_GRID_PHONY; - else - omp_for->subcode &= ~GF_OMP_FOR_GRID_PHONY; -} - -/* Return the kernel_intra_group of a GRID_LOOP OMP_FOR statement. */ - -static inline bool -gimple_omp_for_grid_intra_group (const gomp_for *omp_for) -{ - gcc_checking_assert (gimple_omp_for_kind (omp_for) - == GF_OMP_FOR_KIND_GRID_LOOP); - return (gimple_omp_subcode (omp_for) & GF_OMP_FOR_GRID_INTRA_GROUP) != 0; -} - -/* Set kernel_intra_group flag of OMP_FOR to VALUE. */ - -static inline void -gimple_omp_for_set_grid_intra_group (gomp_for *omp_for, bool value) -{ - gcc_checking_assert (gimple_omp_for_kind (omp_for) - == GF_OMP_FOR_KIND_GRID_LOOP); - if (value) - omp_for->subcode |= GF_OMP_FOR_GRID_INTRA_GROUP; - else - omp_for->subcode &= ~GF_OMP_FOR_GRID_INTRA_GROUP; -} - -/* Return true if iterations of a grid OMP_FOR statement correspond to HSA - groups. */ - -static inline bool -gimple_omp_for_grid_group_iter (const gomp_for *omp_for) -{ - gcc_checking_assert (gimple_omp_for_kind (omp_for) - == GF_OMP_FOR_KIND_GRID_LOOP); - return (gimple_omp_subcode (omp_for) & GF_OMP_FOR_GRID_GROUP_ITER) != 0; -} - -/* Set group_iter flag of OMP_FOR to VALUE. */ - -static inline void -gimple_omp_for_set_grid_group_iter (gomp_for *omp_for, bool value) -{ - gcc_checking_assert (gimple_omp_for_kind (omp_for) - == GF_OMP_FOR_KIND_GRID_LOOP); - if (value) - omp_for->subcode |= GF_OMP_FOR_GRID_GROUP_ITER; - else - omp_for->subcode &= ~GF_OMP_FOR_GRID_GROUP_ITER; -} - /* Return the clauses associated with OMP_PARALLEL GS. */ static inline tree @@ -5595,25 +5513,6 @@ gimple_omp_parallel_set_data_arg (gomp_parallel *omp_parallel_stmt, omp_parallel_stmt->data_arg = data_arg; } -/* Return the kernel_phony flag of OMP_PARALLEL_STMT. */ - -static inline bool -gimple_omp_parallel_grid_phony (const gomp_parallel *stmt) -{ - return (gimple_omp_subcode (stmt) & GF_OMP_PARALLEL_GRID_PHONY) != 0; -} - -/* Set kernel_phony flag of OMP_PARALLEL_STMT to VALUE. */ - -static inline void -gimple_omp_parallel_set_grid_phony (gomp_parallel *stmt, bool value) -{ - if (value) - stmt->subcode |= GF_OMP_PARALLEL_GRID_PHONY; - else - stmt->subcode &= ~GF_OMP_PARALLEL_GRID_PHONY; -} - /* Return the clauses associated with OMP_TASK GS. */ static inline tree @@ -6165,25 +6064,6 @@ gimple_omp_teams_set_data_arg (gomp_teams *omp_teams_stmt, tree data_arg) omp_teams_stmt->data_arg = data_arg; } -/* Return the kernel_phony flag of an OMP_TEAMS_STMT. */ - -static inline bool -gimple_omp_teams_grid_phony (const gomp_teams *omp_teams_stmt) -{ - return (gimple_omp_subcode (omp_teams_stmt) & GF_OMP_TEAMS_GRID_PHONY) != 0; -} - -/* Set kernel_phony flag of an OMP_TEAMS_STMT to VALUE. */ - -static inline void -gimple_omp_teams_set_grid_phony (gomp_teams *omp_teams_stmt, bool value) -{ - if (value) - omp_teams_stmt->subcode |= GF_OMP_TEAMS_GRID_PHONY; - else - omp_teams_stmt->subcode &= ~GF_OMP_TEAMS_GRID_PHONY; -} - /* Return the host flag of an OMP_TEAMS_STMT. */ static inline bool @@ -6547,8 +6427,7 @@ gimple_return_set_retval (greturn *gs, tree retval) case GIMPLE_OMP_RETURN: \ case GIMPLE_OMP_ATOMIC_LOAD: \ case GIMPLE_OMP_ATOMIC_STORE: \ - case GIMPLE_OMP_CONTINUE: \ - case GIMPLE_OMP_GRID_BODY + case GIMPLE_OMP_CONTINUE static inline bool is_gimple_omp (const gimple *stmt) diff --git a/gcc/hsa-brig.c b/gcc/hsa-brig.c deleted file mode 100644 index 545385f..0000000 --- a/gcc/hsa-brig.c +++ /dev/null @@ -1,2612 +0,0 @@ -/* Producing binary form of HSA BRIG from our internal representation. - Copyright (C) 2013-2020 Free Software Foundation, Inc. - Contributed by Martin Jambor <mjambor@suse.cz> and - Martin Liska <mliska@suse.cz>. - -This file is part of GCC. - -GCC is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 3, or (at your option) -any later version. - -GCC is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with GCC; see the file COPYING3. If not see -<http://www.gnu.org/licenses/>. */ - -#include "config.h" -#include "system.h" -#include "coretypes.h" -#include "tm.h" -#include "target.h" -#include "memmodel.h" -#include "tm_p.h" -#include "is-a.h" -#include "vec.h" -#include "hash-table.h" -#include "hash-map.h" -#include "tree.h" -#include "tree-iterator.h" -#include "stor-layout.h" -#include "output.h" -#include "basic-block.h" -#include "function.h" -#include "cfg.h" -#include "fold-const.h" -#include "stringpool.h" -#include "gimple-pretty-print.h" -#include "diagnostic-core.h" -#include "cgraph.h" -#include "dumpfile.h" -#include "print-tree.h" -#include "alloc-pool.h" -#include "symbol-summary.h" -#include "hsa-common.h" -#include "gomp-constants.h" - -/* Convert VAL to little endian form, if necessary. */ - -static uint16_t -lendian16 (uint16_t val) -{ -#if GCC_VERSION >= 4008 -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - return val; -#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - return __builtin_bswap16 (val); -#else /* __ORDER_PDP_ENDIAN__ */ - return val; -#endif -#else -// provide a safe slower default, with shifts and masking -#ifndef WORDS_BIGENDIAN - return val; -#else - return (val >> 8) | (val << 8); -#endif -#endif -} - -/* Convert VAL to little endian form, if necessary. */ - -static uint32_t -lendian32 (uint32_t val) -{ -#if GCC_VERSION >= 4006 -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - return val; -#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - return __builtin_bswap32 (val); -#else /* __ORDER_PDP_ENDIAN__ */ - return (val >> 16) | (val << 16); -#endif -#else -// provide a safe slower default, with shifts and masking -#ifndef WORDS_BIGENDIAN - return val; -#else - val = ((val & 0xff00ff00) >> 8) | ((val & 0xff00ff) << 8); - return (val >> 16) | (val << 16); -#endif -#endif -} - -/* Convert VAL to little endian form, if necessary. */ - -static uint64_t -lendian64 (uint64_t val) -{ -#if GCC_VERSION >= 4006 -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - return val; -#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - return __builtin_bswap64 (val); -#else /* __ORDER_PDP_ENDIAN__ */ - return (((val & 0xffffll) << 48) - | ((val & 0xffff0000ll) << 16) - | ((val & 0xffff00000000ll) >> 16) - | ((val & 0xffff000000000000ll) >> 48)); -#endif -#else -// provide a safe slower default, with shifts and masking -#ifndef WORDS_BIGENDIAN - return val; -#else - val = (((val & 0xff00ff00ff00ff00ll) >> 8) - | ((val & 0x00ff00ff00ff00ffll) << 8)); - val = ((( val & 0xffff0000ffff0000ll) >> 16) - | (( val & 0x0000ffff0000ffffll) << 16)); - return (val >> 32) | (val << 32); -#endif -#endif -} - -#define BRIG_ELF_SECTION_NAME ".brig" -#define BRIG_LABEL_STRING "hsa_brig" -#define BRIG_SECTION_DATA_NAME "hsa_data" -#define BRIG_SECTION_CODE_NAME "hsa_code" -#define BRIG_SECTION_OPERAND_NAME "hsa_operand" - -#define BRIG_CHUNK_MAX_SIZE (64 * 1024) - -/* Required HSA section alignment. */ - -#define HSA_SECTION_ALIGNMENT 16 - -/* Chunks of BRIG binary data. */ - -struct hsa_brig_data_chunk -{ - /* Size of the data already stored into a chunk. */ - unsigned size; - - /* Pointer to the data. */ - char *data; -}; - -/* Structure representing a BRIG section, holding and writing its data. */ - -struct hsa_brig_section -{ - /* Section name that will be output to the BRIG. */ - const char *section_name; - /* Size in bytes of all data stored in the section. */ - unsigned total_size; - /* The size of the header of the section including padding. */ - unsigned header_byte_count; - /* The size of the header of the section without any padding. */ - unsigned header_byte_delta; - - void init (const char *name); - void release (); - void output (); - unsigned add (const void *data, unsigned len, void **output = NULL); - void round_size_up (int factor); - void *get_ptr_by_offset (unsigned int offset); - -private: - void allocate_new_chunk (); - - /* Buffers of binary data, each containing BRIG_CHUNK_MAX_SIZE bytes. */ - vec <struct hsa_brig_data_chunk> chunks; - - /* More convenient access to the last chunk from the vector above. */ - struct hsa_brig_data_chunk *cur_chunk; -}; - -static struct hsa_brig_section brig_data, brig_code, brig_operand; -static uint32_t brig_insn_count; -static bool brig_initialized = false; - -/* Mapping between emitted HSA functions and their offset in code segment. */ -static hash_map<tree, BrigCodeOffset32_t> *function_offsets; - -/* Hash map of emitted function declarations. */ -static hash_map <tree, BrigDirectiveExecutable *> *emitted_declarations; - -/* Hash table of emitted internal function declaration offsets. */ -hash_table <hsa_internal_fn_hasher> *hsa_emitted_internal_decls; - -/* List of sbr instructions. */ -static vec <hsa_insn_sbr *> *switch_instructions; - -class function_linkage_pair -{ -public: - function_linkage_pair (tree decl, unsigned int off) - : function_decl (decl), offset (off) {} - - /* Declaration of called function. */ - tree function_decl; - - /* Offset in operand section. */ - unsigned int offset; -}; - -/* Vector of function calls where we need to resolve function offsets. */ -static auto_vec <function_linkage_pair> function_call_linkage; - -/* Add a new chunk, allocate data for it and initialize it. */ - -void -hsa_brig_section::allocate_new_chunk () -{ - struct hsa_brig_data_chunk new_chunk; - - new_chunk.data = XCNEWVEC (char, BRIG_CHUNK_MAX_SIZE); - new_chunk.size = 0; - cur_chunk = chunks.safe_push (new_chunk); -} - -/* Initialize the brig section. */ - -void -hsa_brig_section::init (const char *name) -{ - section_name = name; - /* While the following computation is basically wrong, because the intent - certainly wasn't to have the first character of name and padding, which - are a part of sizeof (BrigSectionHeader), included in the first addend, - this is what the disassembler expects. */ - total_size = sizeof (BrigSectionHeader) + strlen (section_name); - chunks.create (1); - allocate_new_chunk (); - header_byte_delta = total_size; - round_size_up (4); - header_byte_count = total_size; -} - -/* Free all data in the section. */ - -void -hsa_brig_section::release () -{ - for (unsigned i = 0; i < chunks.length (); i++) - free (chunks[i].data); - chunks.release (); - cur_chunk = NULL; -} - -/* Write the section to the output file to a section with the name given at - initialization. Switches the output section and does not restore it. */ - -void -hsa_brig_section::output () -{ - struct BrigSectionHeader section_header; - char padding[8]; - - section_header.byteCount = lendian64 (total_size); - section_header.headerByteCount = lendian32 (header_byte_count); - section_header.nameLength = lendian32 (strlen (section_name)); - assemble_string ((const char *) §ion_header, 16); - assemble_string (section_name, (section_header.nameLength)); - memset (&padding, 0, sizeof (padding)); - /* This is also a consequence of the wrong header size computation described - in a comment in hsa_brig_section::init. */ - assemble_string (padding, 8); - for (unsigned i = 0; i < chunks.length (); i++) - assemble_string (chunks[i].data, chunks[i].size); -} - -/* Add to the stream LEN bytes of opaque binary DATA. Return the offset at - which it was stored. If OUTPUT is not NULL, store into it the pointer to - the place where DATA was actually stored. */ - -unsigned -hsa_brig_section::add (const void *data, unsigned len, void **output) -{ - unsigned offset = total_size; - - gcc_assert (len <= BRIG_CHUNK_MAX_SIZE); - if (cur_chunk->size > (BRIG_CHUNK_MAX_SIZE - len)) - allocate_new_chunk (); - - char *dst = cur_chunk->data + cur_chunk->size; - memcpy (dst, data, len); - if (output) - *output = dst; - cur_chunk->size += len; - total_size += len; - - return offset; -} - -/* Add padding to section so that its size is divisible by FACTOR. */ - -void -hsa_brig_section::round_size_up (int factor) -{ - unsigned padding, res = total_size % factor; - - if (res == 0) - return; - - padding = factor - res; - total_size += padding; - if (cur_chunk->size > (BRIG_CHUNK_MAX_SIZE - padding)) - { - padding -= BRIG_CHUNK_MAX_SIZE - cur_chunk->size; - cur_chunk->size = BRIG_CHUNK_MAX_SIZE; - allocate_new_chunk (); - } - - cur_chunk->size += padding; -} - -/* Return pointer to data by global OFFSET in the section. */ - -void * -hsa_brig_section::get_ptr_by_offset (unsigned int offset) -{ - gcc_assert (offset < total_size); - offset -= header_byte_delta; - - unsigned i; - for (i = 0; offset >= chunks[i].size; i++) - offset -= chunks[i].size; - - return chunks[i].data + offset; -} - -/* BRIG string data hashing. */ - -struct brig_string_slot -{ - const char *s; - char prefix; - int len; - uint32_t offset; -}; - -/* Hash table helpers. */ - -struct brig_string_slot_hasher : pointer_hash <brig_string_slot> -{ - static inline hashval_t hash (const value_type); - static inline bool equal (const value_type, const compare_type); - static inline void remove (value_type); -}; - -/* Returns a hash code for DS. Adapted from libiberty's htab_hash_string - to support strings that may not end in '\0'. */ - -inline hashval_t -brig_string_slot_hasher::hash (const value_type ds) -{ - hashval_t r = ds->len; - int i; - - for (i = 0; i < ds->len; i++) - r = r * 67 + (unsigned) ds->s[i] - 113; - r = r * 67 + (unsigned) ds->prefix - 113; - return r; -} - -/* Returns nonzero if DS1 and DS2 are equal. */ - -inline bool -brig_string_slot_hasher::equal (const value_type ds1, const compare_type ds2) -{ - if (ds1->len == ds2->len) - return ds1->prefix == ds2->prefix - && memcmp (ds1->s, ds2->s, ds1->len) == 0; - - return 0; -} - -/* Deallocate memory for DS upon its removal. */ - -inline void -brig_string_slot_hasher::remove (value_type ds) -{ - free (const_cast<char *> (ds->s)); - free (ds); -} - -/* Hash for strings we output in order not to duplicate them needlessly. */ - -static hash_table<brig_string_slot_hasher> *brig_string_htab; - -/* Emit a null terminated string STR to the data section and return its - offset in it. If PREFIX is non-zero, output it just before STR too. - Sanitize the string if SANITIZE option is set to true. */ - -static unsigned -brig_emit_string (const char *str, char prefix = 0, bool sanitize = true) -{ - unsigned slen = strlen (str); - unsigned offset, len = slen + (prefix ? 1 : 0); - uint32_t hdr_len = lendian32 (len); - brig_string_slot s_slot; - brig_string_slot **slot; - char *str2; - - str2 = xstrdup (str); - - if (sanitize) - hsa_sanitize_name (str2); - s_slot.s = str2; - s_slot.len = slen; - s_slot.prefix = prefix; - s_slot.offset = 0; - - slot = brig_string_htab->find_slot (&s_slot, INSERT); - if (*slot == NULL) - { - brig_string_slot *new_slot = XCNEW (brig_string_slot); - - /* In theory we should fill in BrigData but that would mean copying - the string to a buffer for no reason, so we just emulate it. */ - offset = brig_data.add (&hdr_len, sizeof (hdr_len)); - if (prefix) - brig_data.add (&prefix, 1); - - brig_data.add (str2, slen); - brig_data.round_size_up (4); - - /* TODO: could use the string we just copied into - brig_string->cur_chunk */ - new_slot->s = str2; - new_slot->len = slen; - new_slot->prefix = prefix; - new_slot->offset = offset; - *slot = new_slot; - } - else - { - offset = (*slot)->offset; - free (str2); - } - - return offset; -} - -/* Linked list of queued operands. */ - -static struct operand_queue -{ - /* First from the chain of queued operands. */ - hsa_op_base *first_op, *last_op; - - /* The offset at which the next operand will be enqueued. */ - unsigned projected_size; - -} op_queue; - -/* Unless already initialized, initialize infrastructure to produce BRIG. */ - -static void -brig_init (void) -{ - brig_insn_count = 0; - - if (brig_initialized) - return; - - brig_string_htab = new hash_table<brig_string_slot_hasher> (37); - brig_data.init (BRIG_SECTION_DATA_NAME); - brig_code.init (BRIG_SECTION_CODE_NAME); - brig_operand.init (BRIG_SECTION_OPERAND_NAME); - brig_initialized = true; - - struct BrigDirectiveModule moddir; - memset (&moddir, 0, sizeof (moddir)); - moddir.base.byteCount = lendian16 (sizeof (moddir)); - - char *modname; - if (main_input_filename && *main_input_filename != '\0') - { - const char *part = strrchr (main_input_filename, '/'); - if (!part) - part = main_input_filename; - else - part++; - modname = concat ("&__hsa_module_", part, NULL); - char *extension = strchr (modname, '.'); - if (extension) - *extension = '\0'; - - /* As in LTO mode, we have to emit a different module names. */ - if (flag_ltrans) - { - part = strrchr (asm_file_name, '/'); - if (!part) - part = asm_file_name; - else - part++; - char *modname2; - modname2 = xasprintf ("%s_%s", modname, part); - free (modname); - modname = modname2; - } - - hsa_sanitize_name (modname); - moddir.name = brig_emit_string (modname); - free (modname); - } - else - moddir.name = brig_emit_string ("__hsa_module_unnamed", '&'); - moddir.base.kind = lendian16 (BRIG_KIND_DIRECTIVE_MODULE); - moddir.hsailMajor = lendian32 (BRIG_VERSION_HSAIL_MAJOR); - moddir.hsailMinor = lendian32 (BRIG_VERSION_HSAIL_MINOR); - moddir.profile = hsa_full_profile_p () ? BRIG_PROFILE_FULL: BRIG_PROFILE_BASE; - if (hsa_machine_large_p ()) - moddir.machineModel = BRIG_MACHINE_LARGE; - else - moddir.machineModel = BRIG_MACHINE_SMALL; - moddir.defaultFloatRound = BRIG_ROUND_FLOAT_DEFAULT; - brig_code.add (&moddir, sizeof (moddir)); -} - -/* Free all BRIG data. */ - -static void -brig_release_data (void) -{ - delete brig_string_htab; - brig_data.release (); - brig_code.release (); - brig_operand.release (); - - brig_initialized = 0; -} - -/* Enqueue operation OP. Return the offset at which it will be stored. */ - -static unsigned int -enqueue_op (hsa_op_base *op) -{ - unsigned ret; - - if (op->m_brig_op_offset) - return op->m_brig_op_offset; - - ret = op_queue.projected_size; - op->m_brig_op_offset = op_queue.projected_size; - - if (!op_queue.first_op) - op_queue.first_op = op; - else - op_queue.last_op->m_next = op; - op_queue.last_op = op; - - if (is_a <hsa_op_immed *> (op)) - op_queue.projected_size += sizeof (struct BrigOperandConstantBytes); - else if (is_a <hsa_op_reg *> (op)) - op_queue.projected_size += sizeof (struct BrigOperandRegister); - else if (is_a <hsa_op_address *> (op)) - op_queue.projected_size += sizeof (struct BrigOperandAddress); - else if (is_a <hsa_op_code_ref *> (op)) - op_queue.projected_size += sizeof (struct BrigOperandCodeRef); - else if (is_a <hsa_op_code_list *> (op)) - op_queue.projected_size += sizeof (struct BrigOperandCodeList); - else if (is_a <hsa_op_operand_list *> (op)) - op_queue.projected_size += sizeof (struct BrigOperandOperandList); - else - gcc_unreachable (); - return ret; -} - -static void emit_immediate_operand (hsa_op_immed *imm); - -/* Emit directive describing a symbol if it has not been emitted already. - Return the offset of the directive. */ - -static unsigned -emit_directive_variable (class hsa_symbol *symbol) -{ - struct BrigDirectiveVariable dirvar; - unsigned name_offset; - static unsigned res_name_offset; - - if (symbol->m_directive_offset) - return symbol->m_directive_offset; - - memset (&dirvar, 0, sizeof (dirvar)); - dirvar.base.byteCount = lendian16 (sizeof (dirvar)); - dirvar.base.kind = lendian16 (BRIG_KIND_DIRECTIVE_VARIABLE); - dirvar.allocation = symbol->m_allocation; - - char prefix = symbol->m_global_scope_p ? '&' : '%'; - - if (symbol->m_decl && TREE_CODE (symbol->m_decl) == RESULT_DECL) - { - if (res_name_offset == 0) - res_name_offset = brig_emit_string (symbol->m_name, '%'); - name_offset = res_name_offset; - } - else if (symbol->m_name) - name_offset = brig_emit_string (symbol->m_name, prefix); - else - { - char buf[64]; - snprintf (buf, 64, "__%s_%i", hsa_seg_name (symbol->m_segment), - symbol->m_name_number); - name_offset = brig_emit_string (buf, prefix); - } - - dirvar.name = lendian32 (name_offset); - - if (symbol->m_decl && TREE_CODE (symbol->m_decl) == CONST_DECL) - { - hsa_op_immed *tmp = new hsa_op_immed (DECL_INITIAL (symbol->m_decl)); - dirvar.init = lendian32 (enqueue_op (tmp)); - } - else - dirvar.init = 0; - dirvar.type = lendian16 (symbol->m_type); - dirvar.segment = symbol->m_segment; - dirvar.align = symbol->m_align; - dirvar.linkage = symbol->m_linkage; - dirvar.dim.lo = symbol->m_dim; - dirvar.dim.hi = symbol->m_dim >> 32; - - /* Global variables are just declared and linked via HSA runtime. */ - if (symbol->m_linkage != BRIG_ALLOCATION_PROGRAM) - dirvar.modifier |= BRIG_VARIABLE_DEFINITION; - dirvar.reserved = 0; - - if (symbol->m_cst_value) - { - dirvar.modifier |= BRIG_VARIABLE_CONST; - dirvar.init = lendian32 (enqueue_op (symbol->m_cst_value)); - } - - symbol->m_directive_offset = brig_code.add (&dirvar, sizeof (dirvar)); - return symbol->m_directive_offset; -} - -/* Emit directives describing either a function declaration or definition F and - return the produced BrigDirectiveExecutable structure. The function does - not take into account any instructions when calculating nextModuleEntry - field of the produced BrigDirectiveExecutable structure so when emitting - actual definitions, this field needs to be updated after all of the function - is actually added to the code section. */ - -static BrigDirectiveExecutable * -emit_function_directives (hsa_function_representation *f, bool is_declaration) -{ - struct BrigDirectiveExecutable fndir; - unsigned name_offset, inarg_off, scoped_off, next_toplev_off; - int count = 0; - void *ptr_to_fndir; - hsa_symbol *sym; - - if (!f->m_declaration_p) - for (int i = 0; f->m_global_symbols.iterate (i, &sym); i++) - { - gcc_assert (!sym->m_emitted_to_brig); - sym->m_emitted_to_brig = true; - emit_directive_variable (sym); - brig_insn_count++; - } - - name_offset = brig_emit_string (f->m_name, '&'); - inarg_off = brig_code.total_size + sizeof (fndir) - + (f->m_output_arg ? sizeof (struct BrigDirectiveVariable) : 0); - scoped_off = inarg_off - + f->m_input_args.length () * sizeof (struct BrigDirectiveVariable); - - if (!f->m_declaration_p) - { - count += f->m_spill_symbols.length (); - count += f->m_private_variables.length (); - } - - next_toplev_off = scoped_off + count * sizeof (struct BrigDirectiveVariable); - - memset (&fndir, 0, sizeof (fndir)); - fndir.base.byteCount = lendian16 (sizeof (fndir)); - fndir.base.kind = lendian16 (f->m_kern_p ? BRIG_KIND_DIRECTIVE_KERNEL - : BRIG_KIND_DIRECTIVE_FUNCTION); - fndir.name = lendian32 (name_offset); - fndir.inArgCount = lendian16 (f->m_input_args.length ()); - fndir.outArgCount = lendian16 (f->m_output_arg ? 1 : 0); - fndir.firstInArg = lendian32 (inarg_off); - fndir.firstCodeBlockEntry = lendian32 (scoped_off); - fndir.nextModuleEntry = lendian32 (next_toplev_off); - fndir.linkage = f->get_linkage (); - if (!f->m_declaration_p) - fndir.modifier |= BRIG_EXECUTABLE_DEFINITION; - memset (&fndir.reserved, 0, sizeof (fndir.reserved)); - - /* Once we put a definition of function_offsets, we should not overwrite - it with a declaration of the function. */ - if (f->m_internal_fn == NULL) - { - if (!function_offsets->get (f->m_decl) || !is_declaration) - function_offsets->put (f->m_decl, brig_code.total_size); - } - else - { - /* Internal function. */ - hsa_internal_fn **slot - = hsa_emitted_internal_decls->find_slot (f->m_internal_fn, INSERT); - hsa_internal_fn *int_fn = new hsa_internal_fn (f->m_internal_fn); - int_fn->m_offset = brig_code.total_size; - *slot = int_fn; - } - - brig_code.add (&fndir, sizeof (fndir), &ptr_to_fndir); - - if (f->m_output_arg) - emit_directive_variable (f->m_output_arg); - for (unsigned i = 0; i < f->m_input_args.length (); i++) - emit_directive_variable (f->m_input_args[i]); - - if (!f->m_declaration_p) - { - for (int i = 0; f->m_spill_symbols.iterate (i, &sym); i++) - { - emit_directive_variable (sym); - brig_insn_count++; - } - for (unsigned i = 0; i < f->m_private_variables.length (); i++) - { - emit_directive_variable (f->m_private_variables[i]); - brig_insn_count++; - } - } - - return (BrigDirectiveExecutable *) ptr_to_fndir; -} - -/* Emit a label directive for the given HBB. We assume it is about to start on - the current offset in the code section. */ - -static void -emit_bb_label_directive (hsa_bb *hbb) -{ - struct BrigDirectiveLabel lbldir; - - lbldir.base.byteCount = lendian16 (sizeof (lbldir)); - lbldir.base.kind = lendian16 (BRIG_KIND_DIRECTIVE_LABEL); - char buf[32]; - snprintf (buf, 32, "BB_%u_%i", DECL_UID (current_function_decl), - hbb->m_index); - lbldir.name = lendian32 (brig_emit_string (buf, '@')); - - hbb->m_label_ref.m_directive_offset = brig_code.add (&lbldir, - sizeof (lbldir)); - brig_insn_count++; -} - -/* Map a normal HSAIL type to the type of the equivalent BRIG operand - holding such, for constants and registers. */ - -static BrigType16_t -regtype_for_type (BrigType16_t t) -{ - switch (t) - { - case BRIG_TYPE_B1: - return BRIG_TYPE_B1; - - case BRIG_TYPE_U8: - case BRIG_TYPE_U16: - case BRIG_TYPE_U32: - case BRIG_TYPE_S8: - case BRIG_TYPE_S16: - case BRIG_TYPE_S32: - case BRIG_TYPE_B8: - case BRIG_TYPE_B16: - case BRIG_TYPE_B32: - case BRIG_TYPE_F16: - case BRIG_TYPE_F32: - case BRIG_TYPE_U8X4: - case BRIG_TYPE_U16X2: - case BRIG_TYPE_S8X4: - case BRIG_TYPE_S16X2: - case BRIG_TYPE_F16X2: - return BRIG_TYPE_B32; - - case BRIG_TYPE_U64: - case BRIG_TYPE_S64: - case BRIG_TYPE_F64: - case BRIG_TYPE_B64: - case BRIG_TYPE_U8X8: - case BRIG_TYPE_U16X4: - case BRIG_TYPE_U32X2: - case BRIG_TYPE_S8X8: - case BRIG_TYPE_S16X4: - case BRIG_TYPE_S32X2: - case BRIG_TYPE_F16X4: - case BRIG_TYPE_F32X2: - return BRIG_TYPE_B64; - - case BRIG_TYPE_B128: - case BRIG_TYPE_U8X16: - case BRIG_TYPE_U16X8: - case BRIG_TYPE_U32X4: - case BRIG_TYPE_U64X2: - case BRIG_TYPE_S8X16: - case BRIG_TYPE_S16X8: - case BRIG_TYPE_S32X4: - case BRIG_TYPE_S64X2: - case BRIG_TYPE_F16X8: - case BRIG_TYPE_F32X4: - case BRIG_TYPE_F64X2: - return BRIG_TYPE_B128; - - default: - gcc_unreachable (); - } -} - -/* Return the length of the BRIG type TYPE that is going to be streamed out as - an immediate constant (so it must not be B1). */ - -unsigned -hsa_get_imm_brig_type_len (BrigType16_t type) -{ - BrigType16_t base_type = type & BRIG_TYPE_BASE_MASK; - BrigType16_t pack_type = type & BRIG_TYPE_PACK_MASK; - - switch (pack_type) - { - case BRIG_TYPE_PACK_NONE: - break; - case BRIG_TYPE_PACK_32: - return 4; - case BRIG_TYPE_PACK_64: - return 8; - case BRIG_TYPE_PACK_128: - return 16; - default: - gcc_unreachable (); - } - - switch (base_type) - { - case BRIG_TYPE_U8: - case BRIG_TYPE_S8: - case BRIG_TYPE_B8: - return 1; - case BRIG_TYPE_U16: - case BRIG_TYPE_S16: - case BRIG_TYPE_F16: - case BRIG_TYPE_B16: - return 2; - case BRIG_TYPE_U32: - case BRIG_TYPE_S32: - case BRIG_TYPE_F32: - case BRIG_TYPE_B32: - return 4; - case BRIG_TYPE_U64: - case BRIG_TYPE_S64: - case BRIG_TYPE_F64: - case BRIG_TYPE_B64: - return 8; - case BRIG_TYPE_B128: - return 16; - default: - gcc_unreachable (); - } -} - -/* Emit one scalar VALUE to the buffer DATA intended for BRIG emission. - If NEED_LEN is not equal to zero, shrink or extend the value - to NEED_LEN bytes. Return how many bytes were written. */ - -static int -emit_immediate_scalar_to_buffer (tree value, char *data, unsigned need_len) -{ - union hsa_bytes bytes; - - memset (&bytes, 0, sizeof (bytes)); - tree type = TREE_TYPE (value); - gcc_checking_assert (TREE_CODE (type) != VECTOR_TYPE); - - unsigned data_len = tree_to_uhwi (TYPE_SIZE (type)) / BITS_PER_UNIT; - if (INTEGRAL_TYPE_P (type) - || (POINTER_TYPE_P (type) && TREE_CODE (value) == INTEGER_CST)) - switch (data_len) - { - case 1: - bytes.b8 = (uint8_t) TREE_INT_CST_LOW (value); - break; - case 2: - bytes.b16 = (uint16_t) TREE_INT_CST_LOW (value); - break; - case 4: - bytes.b32 = (uint32_t) TREE_INT_CST_LOW (value); - break; - case 8: - bytes.b64 = (uint64_t) TREE_INT_CST_LOW (value); - break; - default: - gcc_unreachable (); - } - else if (SCALAR_FLOAT_TYPE_P (type)) - { - if (data_len == 2) - { - sorry ("Support for HSA does not implement immediate 16 bit FPU " - "operands"); - return 2; - } - unsigned int_len = GET_MODE_SIZE (SCALAR_FLOAT_TYPE_MODE (type)); - /* There are always 32 bits in each long, no matter the size of - the hosts long. */ - long tmp[6]; - - real_to_target (tmp, TREE_REAL_CST_PTR (value), TYPE_MODE (type)); - - if (int_len == 4) - bytes.b32 = (uint32_t) tmp[0]; - else - { - bytes.b64 = (uint64_t)(uint32_t) tmp[1]; - bytes.b64 <<= 32; - bytes.b64 |= (uint32_t) tmp[0]; - } - } - else - gcc_unreachable (); - - int len; - if (need_len == 0) - len = data_len; - else - len = need_len; - - memcpy (data, &bytes, len); - return len; -} - -char * -hsa_op_immed::emit_to_buffer (unsigned *brig_repr_size) -{ - char *brig_repr; - *brig_repr_size = hsa_get_imm_brig_type_len (m_type); - - if (m_tree_value != NULL_TREE) - { - /* Update brig_repr_size for special tree values. */ - if (TREE_CODE (m_tree_value) == STRING_CST) - *brig_repr_size = TREE_STRING_LENGTH (m_tree_value); - else if (TREE_CODE (m_tree_value) == CONSTRUCTOR) - *brig_repr_size - = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (m_tree_value))); - - unsigned total_len = *brig_repr_size; - - /* As we can have a constructor with fewer elements, fill the memory - with zeros. */ - brig_repr = XCNEWVEC (char, total_len); - char *p = brig_repr; - - if (TREE_CODE (m_tree_value) == VECTOR_CST) - { - /* Variable-length vectors aren't supported. */ - int i, num = VECTOR_CST_NELTS (m_tree_value).to_constant (); - for (i = 0; i < num; i++) - { - tree v = VECTOR_CST_ELT (m_tree_value, i); - unsigned actual = emit_immediate_scalar_to_buffer (v, p, 0); - total_len -= actual; - p += actual; - } - /* Vectors should have the exact size. */ - gcc_assert (total_len == 0); - } - else if (TREE_CODE (m_tree_value) == STRING_CST) - memcpy (brig_repr, TREE_STRING_POINTER (m_tree_value), - TREE_STRING_LENGTH (m_tree_value)); - else if (TREE_CODE (m_tree_value) == COMPLEX_CST) - { - gcc_assert (total_len % 2 == 0); - unsigned actual; - actual - = emit_immediate_scalar_to_buffer (TREE_REALPART (m_tree_value), p, - total_len / 2); - - gcc_assert (actual == total_len / 2); - p += actual; - - actual - = emit_immediate_scalar_to_buffer (TREE_IMAGPART (m_tree_value), p, - total_len / 2); - gcc_assert (actual == total_len / 2); - } - else if (TREE_CODE (m_tree_value) == CONSTRUCTOR) - { - unsigned len = CONSTRUCTOR_NELTS (m_tree_value); - for (unsigned i = 0; i < len; i++) - { - tree v = CONSTRUCTOR_ELT (m_tree_value, i)->value; - unsigned actual = emit_immediate_scalar_to_buffer (v, p, 0); - total_len -= actual; - p += actual; - } - } - else - emit_immediate_scalar_to_buffer (m_tree_value, p, total_len); - } - else - { - hsa_bytes bytes; - - switch (*brig_repr_size) - { - case 1: - bytes.b8 = (uint8_t) m_int_value; - break; - case 2: - bytes.b16 = (uint16_t) m_int_value; - break; - case 4: - bytes.b32 = (uint32_t) m_int_value; - break; - case 8: - bytes.b64 = (uint64_t) m_int_value; - break; - default: - gcc_unreachable (); - } - - brig_repr = XNEWVEC (char, *brig_repr_size); - memcpy (brig_repr, &bytes, *brig_repr_size); - } - - return brig_repr; -} - -/* Emit an immediate BRIG operand IMM. The BRIG type of the immediate might - have been massaged to comply with various HSA/BRIG type requirements, so the - only important aspect of that is the length (because HSAIL might expect - smaller constants or become bit-data). The data should be represented - according to what is in the tree representation. */ - -static void -emit_immediate_operand (hsa_op_immed *imm) -{ - unsigned brig_repr_size; - char *brig_repr = imm->emit_to_buffer (&brig_repr_size); - struct BrigOperandConstantBytes out; - - memset (&out, 0, sizeof (out)); - out.base.byteCount = lendian16 (sizeof (out)); - out.base.kind = lendian16 (BRIG_KIND_OPERAND_CONSTANT_BYTES); - uint32_t byteCount = lendian32 (brig_repr_size); - out.type = lendian16 (imm->m_type); - out.bytes = lendian32 (brig_data.add (&byteCount, sizeof (byteCount))); - brig_operand.add (&out, sizeof (out)); - brig_data.add (brig_repr, brig_repr_size); - brig_data.round_size_up (4); - - free (brig_repr); -} - -/* Emit a register BRIG operand REG. */ - -static void -emit_register_operand (hsa_op_reg *reg) -{ - struct BrigOperandRegister out; - - out.base.byteCount = lendian16 (sizeof (out)); - out.base.kind = lendian16 (BRIG_KIND_OPERAND_REGISTER); - out.regNum = lendian32 (reg->m_hard_num); - - switch (regtype_for_type (reg->m_type)) - { - case BRIG_TYPE_B32: - out.regKind = BRIG_REGISTER_KIND_SINGLE; - break; - case BRIG_TYPE_B64: - out.regKind = BRIG_REGISTER_KIND_DOUBLE; - break; - case BRIG_TYPE_B128: - out.regKind = BRIG_REGISTER_KIND_QUAD; - break; - case BRIG_TYPE_B1: - out.regKind = BRIG_REGISTER_KIND_CONTROL; - break; - default: - gcc_unreachable (); - } - - brig_operand.add (&out, sizeof (out)); -} - -/* Emit an address BRIG operand ADDR. */ - -static void -emit_address_operand (hsa_op_address *addr) -{ - struct BrigOperandAddress out; - - out.base.byteCount = lendian16 (sizeof (out)); - out.base.kind = lendian16 (BRIG_KIND_OPERAND_ADDRESS); - out.symbol = addr->m_symbol - ? lendian32 (emit_directive_variable (addr->m_symbol)) : 0; - out.reg = addr->m_reg ? lendian32 (enqueue_op (addr->m_reg)) : 0; - - if (sizeof (addr->m_imm_offset) == 8) - { - out.offset.lo = lendian32 (addr->m_imm_offset); - out.offset.hi = lendian32 (addr->m_imm_offset >> 32); - } - else - { - gcc_assert (sizeof (addr->m_imm_offset) == 4); - out.offset.lo = lendian32 (addr->m_imm_offset); - out.offset.hi = 0; - } - - brig_operand.add (&out, sizeof (out)); -} - -/* Emit a code reference operand REF. */ - -static void -emit_code_ref_operand (hsa_op_code_ref *ref) -{ - struct BrigOperandCodeRef out; - - out.base.byteCount = lendian16 (sizeof (out)); - out.base.kind = lendian16 (BRIG_KIND_OPERAND_CODE_REF); - out.ref = lendian32 (ref->m_directive_offset); - brig_operand.add (&out, sizeof (out)); -} - -/* Emit a code list operand CODE_LIST. */ - -static void -emit_code_list_operand (hsa_op_code_list *code_list) -{ - struct BrigOperandCodeList out; - unsigned args = code_list->m_offsets.length (); - - for (unsigned i = 0; i < args; i++) - gcc_assert (code_list->m_offsets[i]); - - out.base.byteCount = lendian16 (sizeof (out)); - out.base.kind = lendian16 (BRIG_KIND_OPERAND_CODE_LIST); - - uint32_t byteCount = lendian32 (4 * args); - - out.elements = lendian32 (brig_data.add (&byteCount, sizeof (byteCount))); - brig_data.add (code_list->m_offsets.address (), args * sizeof (uint32_t)); - brig_data.round_size_up (4); - brig_operand.add (&out, sizeof (out)); -} - -/* Emit an operand list operand OPERAND_LIST. */ - -static void -emit_operand_list_operand (hsa_op_operand_list *operand_list) -{ - struct BrigOperandOperandList out; - unsigned args = operand_list->m_offsets.length (); - - for (unsigned i = 0; i < args; i++) - gcc_assert (operand_list->m_offsets[i]); - - out.base.byteCount = lendian16 (sizeof (out)); - out.base.kind = lendian16 (BRIG_KIND_OPERAND_OPERAND_LIST); - - uint32_t byteCount = lendian32 (4 * args); - - out.elements = lendian32 (brig_data.add (&byteCount, sizeof (byteCount))); - brig_data.add (operand_list->m_offsets.address (), args * sizeof (uint32_t)); - brig_data.round_size_up (4); - brig_operand.add (&out, sizeof (out)); -} - -/* Emit all operands queued for writing. */ - -static void -emit_queued_operands (void) -{ - for (hsa_op_base *op = op_queue.first_op; op; op = op->m_next) - { - gcc_assert (op->m_brig_op_offset == brig_operand.total_size); - if (hsa_op_immed *imm = dyn_cast <hsa_op_immed *> (op)) - emit_immediate_operand (imm); - else if (hsa_op_reg *reg = dyn_cast <hsa_op_reg *> (op)) - emit_register_operand (reg); - else if (hsa_op_address *addr = dyn_cast <hsa_op_address *> (op)) - emit_address_operand (addr); - else if (hsa_op_code_ref *ref = dyn_cast <hsa_op_code_ref *> (op)) - emit_code_ref_operand (ref); - else if (hsa_op_code_list *code_list = dyn_cast <hsa_op_code_list *> (op)) - emit_code_list_operand (code_list); - else if (hsa_op_operand_list *l = dyn_cast <hsa_op_operand_list *> (op)) - emit_operand_list_operand (l); - else - gcc_unreachable (); - } -} - -/* Emit directives describing the function that is used for - a function declaration. */ - -static BrigDirectiveExecutable * -emit_function_declaration (tree decl) -{ - hsa_function_representation *f = hsa_generate_function_declaration (decl); - - BrigDirectiveExecutable *e = emit_function_directives (f, true); - emit_queued_operands (); - - delete f; - - return e; -} - -/* Emit directives describing the function that is used for - an internal function declaration. */ - -static BrigDirectiveExecutable * -emit_internal_fn_decl (hsa_internal_fn *fn) -{ - hsa_function_representation *f = hsa_generate_internal_fn_decl (fn); - - BrigDirectiveExecutable *e = emit_function_directives (f, true); - emit_queued_operands (); - - delete f; - - return e; -} - -/* Enqueue all operands of INSN and return offset to BRIG data section - to list of operand offsets. */ - -static unsigned -emit_insn_operands (hsa_insn_basic *insn) -{ - auto_vec<BrigOperandOffset32_t, HSA_BRIG_INT_STORAGE_OPERANDS> - operand_offsets; - - unsigned l = insn->operand_count (); - - /* We have N operands so use 4 * N for the byte_count. */ - uint32_t byte_count = lendian32 (4 * l); - unsigned offset = brig_data.add (&byte_count, sizeof (byte_count)); - if (l > 0) - { - operand_offsets.safe_grow (l); - for (unsigned i = 0; i < l; i++) - operand_offsets[i] = lendian32 (enqueue_op (insn->get_op (i))); - - brig_data.add (operand_offsets.address (), - l * sizeof (BrigOperandOffset32_t)); - } - brig_data.round_size_up (4); - return offset; -} - -/* Enqueue operand OP0, OP1, OP2 (if different from NULL) and return offset - to BRIG data section to list of operand offsets. */ - -static unsigned -emit_operands (hsa_op_base *op0, hsa_op_base *op1 = NULL, - hsa_op_base *op2 = NULL) -{ - auto_vec<BrigOperandOffset32_t, HSA_BRIG_INT_STORAGE_OPERANDS> - operand_offsets; - - gcc_checking_assert (op0 != NULL); - operand_offsets.safe_push (enqueue_op (op0)); - - if (op1 != NULL) - { - operand_offsets.safe_push (enqueue_op (op1)); - if (op2 != NULL) - operand_offsets.safe_push (enqueue_op (op2)); - } - - unsigned l = operand_offsets.length (); - - /* We have N operands so use 4 * N for the byte_count. */ - uint32_t byte_count = lendian32 (4 * l); - - unsigned offset = brig_data.add (&byte_count, sizeof (byte_count)); - brig_data.add (operand_offsets.address (), - l * sizeof (BrigOperandOffset32_t)); - - brig_data.round_size_up (4); - - return offset; -} - -/* Emit an HSA memory instruction and all necessary directives, schedule - necessary operands for writing. */ - -static void -emit_memory_insn (hsa_insn_mem *mem) -{ - struct BrigInstMem repr; - gcc_checking_assert (mem->operand_count () == 2); - - hsa_op_address *addr = as_a <hsa_op_address *> (mem->get_op (1)); - - /* This is necessary because of the erroneous typedef of - BrigMemoryModifier8_t which introduces padding which may then contain - random stuff (which we do not want so that we can test things don't - change). */ - memset (&repr, 0, sizeof (repr)); - repr.base.base.byteCount = lendian16 (sizeof (repr)); - repr.base.base.kind = lendian16 (BRIG_KIND_INST_MEM); - repr.base.opcode = lendian16 (mem->m_opcode); - repr.base.type = lendian16 (mem->m_type); - repr.base.operands = lendian32 (emit_insn_operands (mem)); - - if (addr->m_symbol) - repr.segment = addr->m_symbol->m_segment; - else - repr.segment = BRIG_SEGMENT_FLAT; - repr.modifier = 0; - repr.equivClass = mem->m_equiv_class; - repr.align = mem->m_align; - if (mem->m_opcode == BRIG_OPCODE_LD) - repr.width = BRIG_WIDTH_1; - else - repr.width = BRIG_WIDTH_NONE; - memset (&repr.reserved, 0, sizeof (repr.reserved)); - brig_code.add (&repr, sizeof (repr)); - brig_insn_count++; -} - -/* Emit an HSA signal memory instruction and all necessary directives, schedule - necessary operands for writing. */ - -static void -emit_signal_insn (hsa_insn_signal *mem) -{ - struct BrigInstSignal repr; - - memset (&repr, 0, sizeof (repr)); - repr.base.base.byteCount = lendian16 (sizeof (repr)); - repr.base.base.kind = lendian16 (BRIG_KIND_INST_SIGNAL); - repr.base.opcode = lendian16 (mem->m_opcode); - repr.base.type = lendian16 (mem->m_type); - repr.base.operands = lendian32 (emit_insn_operands (mem)); - - repr.memoryOrder = mem->m_memory_order; - repr.signalOperation = mem->m_signalop; - repr.signalType = hsa_machine_large_p () ? BRIG_TYPE_SIG64 : BRIG_TYPE_SIG32; - - brig_code.add (&repr, sizeof (repr)); - brig_insn_count++; -} - -/* Emit an HSA atomic memory instruction and all necessary directives, schedule - necessary operands for writing. */ - -static void -emit_atomic_insn (hsa_insn_atomic *mem) -{ - struct BrigInstAtomic repr; - - /* Either operand[0] or operand[1] must be an address operand. */ - hsa_op_address *addr = NULL; - if (is_a <hsa_op_address *> (mem->get_op (0))) - addr = as_a <hsa_op_address *> (mem->get_op (0)); - else - addr = as_a <hsa_op_address *> (mem->get_op (1)); - - memset (&repr, 0, sizeof (repr)); - repr.base.base.byteCount = lendian16 (sizeof (repr)); - repr.base.base.kind = lendian16 (BRIG_KIND_INST_ATOMIC); - repr.base.opcode = lendian16 (mem->m_opcode); - repr.base.type = lendian16 (mem->m_type); - repr.base.operands = lendian32 (emit_insn_operands (mem)); - - if (addr->m_symbol) - repr.segment = addr->m_symbol->m_segment; - else - repr.segment = BRIG_SEGMENT_FLAT; - repr.memoryOrder = mem->m_memoryorder; - repr.memoryScope = mem->m_memoryscope; - repr.atomicOperation = mem->m_atomicop; - - brig_code.add (&repr, sizeof (repr)); - brig_insn_count++; -} - -/* Emit an HSA LDA instruction and all necessary directives, schedule - necessary operands for writing. */ - -static void -emit_addr_insn (hsa_insn_basic *insn) -{ - struct BrigInstAddr repr; - - hsa_op_address *addr = as_a <hsa_op_address *> (insn->get_op (1)); - - repr.base.base.byteCount = lendian16 (sizeof (repr)); - repr.base.base.kind = lendian16 (BRIG_KIND_INST_ADDR); - repr.base.opcode = lendian16 (insn->m_opcode); - repr.base.type = lendian16 (insn->m_type); - repr.base.operands = lendian32 (emit_insn_operands (insn)); - - if (addr->m_symbol) - repr.segment = addr->m_symbol->m_segment; - else - repr.segment = BRIG_SEGMENT_FLAT; - memset (&repr.reserved, 0, sizeof (repr.reserved)); - - brig_code.add (&repr, sizeof (repr)); - brig_insn_count++; -} - -/* Emit an HSA segment conversion instruction and all necessary directives, - schedule necessary operands for writing. */ - -static void -emit_segment_insn (hsa_insn_seg *seg) -{ - struct BrigInstSegCvt repr; - - repr.base.base.byteCount = lendian16 (sizeof (repr)); - repr.base.base.kind = lendian16 (BRIG_KIND_INST_SEG_CVT); - repr.base.opcode = lendian16 (seg->m_opcode); - repr.base.type = lendian16 (seg->m_type); - repr.base.operands = lendian32 (emit_insn_operands (seg)); - repr.sourceType = lendian16 (as_a <hsa_op_reg *> (seg->get_op (1))->m_type); - repr.segment = seg->m_segment; - repr.modifier = 0; - - brig_code.add (&repr, sizeof (repr)); - - brig_insn_count++; -} - -/* Emit an HSA alloca instruction and all necessary directives, - schedule necessary operands for writing. */ - -static void -emit_alloca_insn (hsa_insn_alloca *alloca) -{ - struct BrigInstMem repr; - gcc_checking_assert (alloca->operand_count () == 2); - - memset (&repr, 0, sizeof (repr)); - repr.base.base.byteCount = lendian16 (sizeof (repr)); - repr.base.base.kind = lendian16 (BRIG_KIND_INST_MEM); - repr.base.opcode = lendian16 (alloca->m_opcode); - repr.base.type = lendian16 (alloca->m_type); - repr.base.operands = lendian32 (emit_insn_operands (alloca)); - repr.segment = BRIG_SEGMENT_PRIVATE; - repr.modifier = 0; - repr.equivClass = 0; - repr.align = alloca->m_align; - repr.width = BRIG_WIDTH_NONE; - memset (&repr.reserved, 0, sizeof (repr.reserved)); - brig_code.add (&repr, sizeof (repr)); - brig_insn_count++; -} - -/* Emit an HSA comparison instruction and all necessary directives, - schedule necessary operands for writing. */ - -static void -emit_cmp_insn (hsa_insn_cmp *cmp) -{ - struct BrigInstCmp repr; - - memset (&repr, 0, sizeof (repr)); - repr.base.base.byteCount = lendian16 (sizeof (repr)); - repr.base.base.kind = lendian16 (BRIG_KIND_INST_CMP); - repr.base.opcode = lendian16 (cmp->m_opcode); - repr.base.type = lendian16 (cmp->m_type); - repr.base.operands = lendian32 (emit_insn_operands (cmp)); - - if (is_a <hsa_op_reg *> (cmp->get_op (1))) - repr.sourceType - = lendian16 (as_a <hsa_op_reg *> (cmp->get_op (1))->m_type); - else - repr.sourceType - = lendian16 (as_a <hsa_op_immed *> (cmp->get_op (1))->m_type); - repr.modifier = 0; - repr.compare = cmp->m_compare; - repr.pack = 0; - - brig_code.add (&repr, sizeof (repr)); - brig_insn_count++; -} - -/* Emit an HSA generic branching/sycnronization instruction. */ - -static void -emit_generic_branch_insn (hsa_insn_br *br) -{ - struct BrigInstBr repr; - repr.base.base.byteCount = lendian16 (sizeof (repr)); - repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR); - repr.base.opcode = lendian16 (br->m_opcode); - repr.width = br->m_width; - repr.base.type = lendian16 (br->m_type); - repr.base.operands = lendian32 (emit_insn_operands (br)); - memset (&repr.reserved, 0, sizeof (repr.reserved)); - - brig_code.add (&repr, sizeof (repr)); - brig_insn_count++; -} - -/* Emit an HSA conditional branching instruction and all necessary directives, - schedule necessary operands for writing. */ - -static void -emit_cond_branch_insn (hsa_insn_cbr *br) -{ - struct BrigInstBr repr; - - basic_block target = NULL; - edge_iterator ei; - edge e; - - /* At the moment we only handle direct conditional jumps. */ - gcc_assert (br->m_opcode == BRIG_OPCODE_CBR); - repr.base.base.byteCount = lendian16 (sizeof (repr)); - repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR); - repr.base.opcode = lendian16 (br->m_opcode); - repr.width = br->m_width; - /* For Conditional jumps the type is always B1. */ - repr.base.type = lendian16 (BRIG_TYPE_B1); - - FOR_EACH_EDGE (e, ei, br->m_bb->succs) - if (e->flags & EDGE_TRUE_VALUE) - { - target = e->dest; - break; - } - gcc_assert (target); - - repr.base.operands - = lendian32 (emit_operands (br->get_op (0), - &hsa_bb_for_bb (target)->m_label_ref)); - memset (&repr.reserved, 0, sizeof (repr.reserved)); - - brig_code.add (&repr, sizeof (repr)); - brig_insn_count++; -} - -/* Emit an HSA unconditional jump branching instruction that points to - a label REFERENCE. */ - -static void -emit_unconditional_jump (hsa_op_code_ref *reference) -{ - struct BrigInstBr repr; - - repr.base.base.byteCount = lendian16 (sizeof (repr)); - repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR); - repr.base.opcode = lendian16 (BRIG_OPCODE_BR); - repr.base.type = lendian16 (BRIG_TYPE_NONE); - /* Direct branches to labels must be width(all). */ - repr.width = BRIG_WIDTH_ALL; - - repr.base.operands = lendian32 (emit_operands (reference)); - memset (&repr.reserved, 0, sizeof (repr.reserved)); - brig_code.add (&repr, sizeof (repr)); - brig_insn_count++; -} - -/* Emit an HSA switch jump instruction that uses a jump table to - jump to a destination label. */ - -static void -emit_switch_insn (hsa_insn_sbr *sbr) -{ - struct BrigInstBr repr; - - gcc_assert (sbr->m_opcode == BRIG_OPCODE_SBR); - repr.base.base.byteCount = lendian16 (sizeof (repr)); - repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR); - repr.base.opcode = lendian16 (sbr->m_opcode); - repr.width = BRIG_WIDTH_1; - /* For Conditional jumps the type is always B1. */ - hsa_op_reg *index = as_a <hsa_op_reg *> (sbr->get_op (0)); - repr.base.type = lendian16 (index->m_type); - repr.base.operands - = lendian32 (emit_operands (sbr->get_op (0), sbr->m_label_code_list)); - memset (&repr.reserved, 0, sizeof (repr.reserved)); - - brig_code.add (&repr, sizeof (repr)); - brig_insn_count++; -} - -/* Emit a HSA convert instruction and all necessary directives, schedule - necessary operands for writing. */ - -static void -emit_cvt_insn (hsa_insn_cvt *insn) -{ - struct BrigInstCvt repr; - BrigType16_t srctype; - - repr.base.base.byteCount = lendian16 (sizeof (repr)); - repr.base.base.kind = lendian16 (BRIG_KIND_INST_CVT); - repr.base.opcode = lendian16 (insn->m_opcode); - repr.base.type = lendian16 (insn->m_type); - repr.base.operands = lendian32 (emit_insn_operands (insn)); - - if (is_a <hsa_op_reg *> (insn->get_op (1))) - srctype = as_a <hsa_op_reg *> (insn->get_op (1))->m_type; - else - srctype = as_a <hsa_op_immed *> (insn->get_op (1))->m_type; - repr.sourceType = lendian16 (srctype); - repr.modifier = 0; - /* float to smaller float requires a rounding setting (we default - to 'near'. */ - if (hsa_type_float_p (insn->m_type) - && (!hsa_type_float_p (srctype) - || ((insn->m_type & BRIG_TYPE_BASE_MASK) - < (srctype & BRIG_TYPE_BASE_MASK)))) - repr.round = BRIG_ROUND_FLOAT_NEAR_EVEN; - else if (hsa_type_integer_p (insn->m_type) && - hsa_type_float_p (srctype)) - repr.round = BRIG_ROUND_INTEGER_ZERO; - else - repr.round = BRIG_ROUND_NONE; - brig_code.add (&repr, sizeof (repr)); - brig_insn_count++; -} - -/* Emit call instruction INSN, where this instruction must be closed - within a call block instruction. */ - -static void -emit_call_insn (hsa_insn_call *call) -{ - struct BrigInstBr repr; - - repr.base.base.byteCount = lendian16 (sizeof (repr)); - repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR); - repr.base.opcode = lendian16 (BRIG_OPCODE_CALL); - repr.base.type = lendian16 (BRIG_TYPE_NONE); - - repr.base.operands - = lendian32 (emit_operands (call->m_result_code_list, &call->m_func, - call->m_args_code_list)); - - /* Internal functions have not set m_called_function. */ - if (call->m_called_function) - { - function_linkage_pair pair (call->m_called_function, - call->m_func.m_brig_op_offset); - function_call_linkage.safe_push (pair); - } - else - { - hsa_internal_fn *slot - = hsa_emitted_internal_decls->find (call->m_called_internal_fn); - gcc_assert (slot); - gcc_assert (slot->m_offset > 0); - call->m_func.m_directive_offset = slot->m_offset; - } - - repr.width = BRIG_WIDTH_ALL; - memset (&repr.reserved, 0, sizeof (repr.reserved)); - - brig_code.add (&repr, sizeof (repr)); - brig_insn_count++; -} - -/* Emit argument block directive. */ - -static void -emit_arg_block_insn (hsa_insn_arg_block *insn) -{ - switch (insn->m_kind) - { - case BRIG_KIND_DIRECTIVE_ARG_BLOCK_START: - { - struct BrigDirectiveArgBlock repr; - repr.base.byteCount = lendian16 (sizeof (repr)); - repr.base.kind = lendian16 (insn->m_kind); - brig_code.add (&repr, sizeof (repr)); - - for (unsigned i = 0; i < insn->m_call_insn->m_input_args.length (); i++) - { - insn->m_call_insn->m_args_code_list->m_offsets[i] - = lendian32 (emit_directive_variable - (insn->m_call_insn->m_input_args[i])); - brig_insn_count++; - } - - if (insn->m_call_insn->m_output_arg) - { - insn->m_call_insn->m_result_code_list->m_offsets[0] - = lendian32 (emit_directive_variable - (insn->m_call_insn->m_output_arg)); - brig_insn_count++; - } - - break; - } - case BRIG_KIND_DIRECTIVE_ARG_BLOCK_END: - { - struct BrigDirectiveArgBlock repr; - repr.base.byteCount = lendian16 (sizeof (repr)); - repr.base.kind = lendian16 (insn->m_kind); - brig_code.add (&repr, sizeof (repr)); - break; - } - default: - gcc_unreachable (); - } - - brig_insn_count++; -} - -/* Emit comment directive. */ - -static void -emit_comment_insn (hsa_insn_comment *insn) -{ - struct BrigDirectiveComment repr; - memset (&repr, 0, sizeof (repr)); - - repr.base.byteCount = lendian16 (sizeof (repr)); - repr.base.kind = lendian16 (insn->m_opcode); - repr.name = brig_emit_string (insn->m_comment, '\0', false); - brig_code.add (&repr, sizeof (repr)); -} - -/* Emit queue instruction INSN. */ - -static void -emit_queue_insn (hsa_insn_queue *insn) -{ - BrigInstQueue repr; - memset (&repr, 0, sizeof (repr)); - - repr.base.base.byteCount = lendian16 (sizeof (repr)); - repr.base.base.kind = lendian16 (BRIG_KIND_INST_QUEUE); - repr.base.opcode = lendian16 (insn->m_opcode); - repr.base.type = lendian16 (insn->m_type); - repr.segment = insn->m_segment; - repr.memoryOrder = insn->m_memory_order; - repr.base.operands = lendian32 (emit_insn_operands (insn)); - brig_data.round_size_up (4); - brig_code.add (&repr, sizeof (repr)); - - brig_insn_count++; -} - -/* Emit source type instruction INSN. */ - -static void -emit_srctype_insn (hsa_insn_srctype *insn) -{ - /* We assume that BrigInstMod has a BrigInstBasic prefix. */ - struct BrigInstSourceType repr; - unsigned operand_count = insn->operand_count (); - gcc_checking_assert (operand_count >= 2); - - memset (&repr, 0, sizeof (repr)); - repr.sourceType = lendian16 (insn->m_source_type); - repr.base.base.byteCount = lendian16 (sizeof (repr)); - repr.base.base.kind = lendian16 (BRIG_KIND_INST_SOURCE_TYPE); - repr.base.opcode = lendian16 (insn->m_opcode); - repr.base.type = lendian16 (insn->m_type); - - repr.base.operands = lendian32 (emit_insn_operands (insn)); - brig_code.add (&repr, sizeof (struct BrigInstSourceType)); - brig_insn_count++; -} - -/* Emit packed instruction INSN. */ - -static void -emit_packed_insn (hsa_insn_packed *insn) -{ - /* We assume that BrigInstMod has a BrigInstBasic prefix. */ - struct BrigInstSourceType repr; - unsigned operand_count = insn->operand_count (); - gcc_checking_assert (operand_count >= 2); - - memset (&repr, 0, sizeof (repr)); - repr.sourceType = lendian16 (insn->m_source_type); - repr.base.base.byteCount = lendian16 (sizeof (repr)); - repr.base.base.kind = lendian16 (BRIG_KIND_INST_SOURCE_TYPE); - repr.base.opcode = lendian16 (insn->m_opcode); - repr.base.type = lendian16 (insn->m_type); - - if (insn->m_opcode == BRIG_OPCODE_COMBINE) - { - /* Create operand list for packed type. */ - for (unsigned i = 1; i < operand_count; i++) - { - gcc_checking_assert (insn->get_op (i)); - insn->m_operand_list->m_offsets[i - 1] - = lendian32 (enqueue_op (insn->get_op (i))); - } - - repr.base.operands = lendian32 (emit_operands (insn->get_op (0), - insn->m_operand_list)); - } - else if (insn->m_opcode == BRIG_OPCODE_EXPAND) - { - /* Create operand list for packed type. */ - for (unsigned i = 0; i < operand_count - 1; i++) - { - gcc_checking_assert (insn->get_op (i)); - insn->m_operand_list->m_offsets[i] - = lendian32 (enqueue_op (insn->get_op (i))); - } - - unsigned ops = emit_operands (insn->m_operand_list, - insn->get_op (insn->operand_count () - 1)); - repr.base.operands = lendian32 (ops); - } - - - brig_code.add (&repr, sizeof (struct BrigInstSourceType)); - brig_insn_count++; -} - -/* Emit a basic HSA instruction and all necessary directives, schedule - necessary operands for writing. */ - -static void -emit_basic_insn (hsa_insn_basic *insn) -{ - /* We assume that BrigInstMod has a BrigInstBasic prefix. */ - struct BrigInstMod repr; - BrigType16_t type; - - memset (&repr, 0, sizeof (repr)); - repr.base.base.byteCount = lendian16 (sizeof (BrigInstBasic)); - repr.base.base.kind = lendian16 (BRIG_KIND_INST_BASIC); - repr.base.opcode = lendian16 (insn->m_opcode); - switch (insn->m_opcode) - { - /* And the bit-logical operations need bit types and whine about - arithmetic types :-/ */ - case BRIG_OPCODE_AND: - case BRIG_OPCODE_OR: - case BRIG_OPCODE_XOR: - case BRIG_OPCODE_NOT: - type = regtype_for_type (insn->m_type); - break; - default: - type = insn->m_type; - break; - } - repr.base.type = lendian16 (type); - repr.base.operands = lendian32 (emit_insn_operands (insn)); - - if (hsa_type_packed_p (type)) - { - if (hsa_type_float_p (type) - && !hsa_opcode_floating_bit_insn_p (insn->m_opcode)) - repr.round = BRIG_ROUND_FLOAT_NEAR_EVEN; - else - repr.round = 0; - /* We assume that destination and sources agree in packing layout. */ - if (insn->num_used_ops () >= 2) - repr.pack = BRIG_PACK_PP; - else - repr.pack = BRIG_PACK_P; - repr.reserved = 0; - repr.base.base.byteCount = lendian16 (sizeof (BrigInstMod)); - repr.base.base.kind = lendian16 (BRIG_KIND_INST_MOD); - brig_code.add (&repr, sizeof (struct BrigInstMod)); - } - else - brig_code.add (&repr, sizeof (struct BrigInstBasic)); - brig_insn_count++; -} - -/* Emit an HSA instruction and all necessary directives, schedule necessary - operands for writing. */ - -static void -emit_insn (hsa_insn_basic *insn) -{ - gcc_assert (!is_a <hsa_insn_phi *> (insn)); - - insn->m_brig_offset = brig_code.total_size; - - if (hsa_insn_signal *signal = dyn_cast <hsa_insn_signal *> (insn)) - emit_signal_insn (signal); - else if (hsa_insn_atomic *atom = dyn_cast <hsa_insn_atomic *> (insn)) - emit_atomic_insn (atom); - else if (hsa_insn_mem *mem = dyn_cast <hsa_insn_mem *> (insn)) - emit_memory_insn (mem); - else if (insn->m_opcode == BRIG_OPCODE_LDA) - emit_addr_insn (insn); - else if (hsa_insn_seg *seg = dyn_cast <hsa_insn_seg *> (insn)) - emit_segment_insn (seg); - else if (hsa_insn_cmp *cmp = dyn_cast <hsa_insn_cmp *> (insn)) - emit_cmp_insn (cmp); - else if (hsa_insn_cbr *br = dyn_cast <hsa_insn_cbr *> (insn)) - emit_cond_branch_insn (br); - else if (hsa_insn_sbr *sbr = dyn_cast <hsa_insn_sbr *> (insn)) - { - if (switch_instructions == NULL) - switch_instructions = new vec <hsa_insn_sbr *> (); - - switch_instructions->safe_push (sbr); - emit_switch_insn (sbr); - } - else if (hsa_insn_br *br = dyn_cast <hsa_insn_br *> (insn)) - emit_generic_branch_insn (br); - else if (hsa_insn_arg_block *block = dyn_cast <hsa_insn_arg_block *> (insn)) - emit_arg_block_insn (block); - else if (hsa_insn_call *call = dyn_cast <hsa_insn_call *> (insn)) - emit_call_insn (call); - else if (hsa_insn_comment *comment = dyn_cast <hsa_insn_comment *> (insn)) - emit_comment_insn (comment); - else if (hsa_insn_queue *queue = dyn_cast <hsa_insn_queue *> (insn)) - emit_queue_insn (queue); - else if (hsa_insn_srctype *srctype = dyn_cast <hsa_insn_srctype *> (insn)) - emit_srctype_insn (srctype); - else if (hsa_insn_packed *packed = dyn_cast <hsa_insn_packed *> (insn)) - emit_packed_insn (packed); - else if (hsa_insn_cvt *cvt = dyn_cast <hsa_insn_cvt *> (insn)) - emit_cvt_insn (cvt); - else if (hsa_insn_alloca *alloca = dyn_cast <hsa_insn_alloca *> (insn)) - emit_alloca_insn (alloca); - else - emit_basic_insn (insn); -} - -/* We have just finished emitting BB and are about to emit NEXT_BB if non-NULL, - or we are about to finish emitting code, if it is NULL. If the fall through - edge from BB does not lead to NEXT_BB, emit an unconditional jump. */ - -static void -perhaps_emit_branch (basic_block bb, basic_block next_bb) -{ - basic_block t_bb = NULL, ff = NULL; - - edge_iterator ei; - edge e; - - /* If the last instruction of BB is a switch, ignore emission of all - edges. */ - if (hsa_bb_for_bb (bb)->m_last_insn - && is_a <hsa_insn_sbr *> (hsa_bb_for_bb (bb)->m_last_insn)) - return; - - FOR_EACH_EDGE (e, ei, bb->succs) - if (e->flags & EDGE_TRUE_VALUE) - { - gcc_assert (!t_bb); - t_bb = e->dest; - } - else - { - gcc_assert (!ff); - ff = e->dest; - } - - if (!ff || ff == next_bb || ff == EXIT_BLOCK_PTR_FOR_FN (cfun)) - return; - - emit_unconditional_jump (&hsa_bb_for_bb (ff)->m_label_ref); -} - -/* Emit the a function with name NAME to the various brig sections. */ - -void -hsa_brig_emit_function (void) -{ - basic_block bb, prev_bb; - hsa_insn_basic *insn; - BrigDirectiveExecutable *ptr_to_fndir; - - brig_init (); - - brig_insn_count = 0; - memset (&op_queue, 0, sizeof (op_queue)); - op_queue.projected_size = brig_operand.total_size; - - if (!function_offsets) - function_offsets = new hash_map<tree, BrigCodeOffset32_t> (); - - if (!emitted_declarations) - emitted_declarations = new hash_map <tree, BrigDirectiveExecutable *> (); - - for (unsigned i = 0; i < hsa_cfun->m_called_functions.length (); i++) - { - tree called = hsa_cfun->m_called_functions[i]; - - /* If the function has no definition, emit a declaration. */ - if (!emitted_declarations->get (called)) - { - BrigDirectiveExecutable *e = emit_function_declaration (called); - emitted_declarations->put (called, e); - } - } - - for (unsigned i = 0; i < hsa_cfun->m_called_internal_fns.length (); i++) - { - hsa_internal_fn *called = hsa_cfun->m_called_internal_fns[i]; - emit_internal_fn_decl (called); - } - - ptr_to_fndir = emit_function_directives (hsa_cfun, false); - for (insn = hsa_bb_for_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun))->m_first_insn; - insn; - insn = insn->m_next) - emit_insn (insn); - prev_bb = ENTRY_BLOCK_PTR_FOR_FN (cfun); - FOR_EACH_BB_FN (bb, cfun) - { - perhaps_emit_branch (prev_bb, bb); - emit_bb_label_directive (hsa_bb_for_bb (bb)); - for (insn = hsa_bb_for_bb (bb)->m_first_insn; insn; insn = insn->m_next) - emit_insn (insn); - prev_bb = bb; - } - perhaps_emit_branch (prev_bb, NULL); - ptr_to_fndir->nextModuleEntry = lendian32 (brig_code.total_size); - - /* Fill up label references for all sbr instructions. */ - if (switch_instructions) - { - for (unsigned i = 0; i < switch_instructions->length (); i++) - { - hsa_insn_sbr *sbr = (*switch_instructions)[i]; - for (unsigned j = 0; j < sbr->m_jump_table.length (); j++) - { - hsa_bb *hbb = hsa_bb_for_bb (sbr->m_jump_table[j]); - sbr->m_label_code_list->m_offsets[j] - = hbb->m_label_ref.m_directive_offset; - } - } - - switch_instructions->release (); - delete switch_instructions; - switch_instructions = NULL; - } - - if (dump_file) - { - fprintf (dump_file, "------- After BRIG emission: -------\n"); - dump_hsa_cfun (dump_file); - } - - emit_queued_operands (); -} - -/* Emit all OMP symbols related to OMP. */ - -void -hsa_brig_emit_omp_symbols (void) -{ - brig_init (); - emit_directive_variable (hsa_num_threads); -} - -/* Create and return __hsa_global_variables symbol that contains - all informations consumed by libgomp to link global variables - with their string names used by an HSA kernel. */ - -static tree -hsa_output_global_variables () -{ - unsigned l = hsa_global_variable_symbols->elements (); - - tree variable_info_type = make_node (RECORD_TYPE); - tree id_f1 = build_decl (BUILTINS_LOCATION, FIELD_DECL, - get_identifier ("name"), ptr_type_node); - DECL_CHAIN (id_f1) = NULL_TREE; - tree id_f2 = build_decl (BUILTINS_LOCATION, FIELD_DECL, - get_identifier ("omp_data_size"), - ptr_type_node); - DECL_CHAIN (id_f2) = id_f1; - finish_builtin_struct (variable_info_type, "__hsa_variable_info", id_f2, - NULL_TREE); - - tree int_num_of_global_vars; - int_num_of_global_vars = build_int_cst (uint32_type_node, l); - tree global_vars_num_index_type = build_index_type (int_num_of_global_vars); - tree global_vars_array_type = build_array_type (variable_info_type, - global_vars_num_index_type); - TYPE_ARTIFICIAL (global_vars_array_type) = 1; - - vec<constructor_elt, va_gc> *global_vars_vec = NULL; - - for (hash_table <hsa_noop_symbol_hasher>::iterator it - = hsa_global_variable_symbols->begin (); - it != hsa_global_variable_symbols->end (); ++it) - { - unsigned len = strlen ((*it)->m_name); - char *copy = XNEWVEC (char, len + 2); - copy[0] = '&'; - memcpy (copy + 1, (*it)->m_name, len); - copy[len + 1] = '\0'; - len++; - hsa_sanitize_name (copy); - - tree var_name = build_string (len, copy); - TREE_TYPE (var_name) - = build_array_type (char_type_node, build_index_type (size_int (len))); - free (copy); - - vec<constructor_elt, va_gc> *variable_info_vec = NULL; - CONSTRUCTOR_APPEND_ELT (variable_info_vec, NULL_TREE, - build1 (ADDR_EXPR, - build_pointer_type (TREE_TYPE (var_name)), - var_name)); - CONSTRUCTOR_APPEND_ELT (variable_info_vec, NULL_TREE, - build_fold_addr_expr ((*it)->m_decl)); - - tree variable_info_ctor = build_constructor (variable_info_type, - variable_info_vec); - - CONSTRUCTOR_APPEND_ELT (global_vars_vec, NULL_TREE, - variable_info_ctor); - } - - tree global_vars_ctor = build_constructor (global_vars_array_type, - global_vars_vec); - - char tmp_name[64]; - ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_global_variables", 1); - tree global_vars_table = build_decl (UNKNOWN_LOCATION, VAR_DECL, - get_identifier (tmp_name), - global_vars_array_type); - TREE_STATIC (global_vars_table) = 1; - TREE_READONLY (global_vars_table) = 1; - TREE_PUBLIC (global_vars_table) = 0; - DECL_ARTIFICIAL (global_vars_table) = 1; - DECL_IGNORED_P (global_vars_table) = 1; - DECL_EXTERNAL (global_vars_table) = 0; - TREE_CONSTANT (global_vars_table) = 1; - DECL_INITIAL (global_vars_table) = global_vars_ctor; - varpool_node::finalize_decl (global_vars_table); - - return global_vars_table; -} - -/* Create __hsa_host_functions and __hsa_kernels that contain - all informations consumed by libgomp to register all kernels - in the BRIG binary. */ - -static void -hsa_output_kernels (tree *host_func_table, tree *kernels) -{ - unsigned map_count = hsa_get_number_decl_kernel_mappings (); - - tree int_num_of_kernels; - int_num_of_kernels = build_int_cst (uint32_type_node, map_count); - tree kernel_num_index_type = build_index_type (int_num_of_kernels); - tree host_functions_array_type = build_array_type (ptr_type_node, - kernel_num_index_type); - TYPE_ARTIFICIAL (host_functions_array_type) = 1; - - vec<constructor_elt, va_gc> *host_functions_vec = NULL; - for (unsigned i = 0; i < map_count; ++i) - { - tree decl = hsa_get_decl_kernel_mapping_decl (i); - tree host_fn = build_fold_addr_expr (hsa_get_host_function (decl)); - CONSTRUCTOR_APPEND_ELT (host_functions_vec, NULL_TREE, host_fn); - } - tree host_functions_ctor = build_constructor (host_functions_array_type, - host_functions_vec); - char tmp_name[64]; - ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_host_functions", 1); - tree hsa_host_func_table = build_decl (UNKNOWN_LOCATION, VAR_DECL, - get_identifier (tmp_name), - host_functions_array_type); - TREE_STATIC (hsa_host_func_table) = 1; - TREE_READONLY (hsa_host_func_table) = 1; - TREE_PUBLIC (hsa_host_func_table) = 0; - DECL_ARTIFICIAL (hsa_host_func_table) = 1; - DECL_IGNORED_P (hsa_host_func_table) = 1; - DECL_EXTERNAL (hsa_host_func_table) = 0; - TREE_CONSTANT (hsa_host_func_table) = 1; - DECL_INITIAL (hsa_host_func_table) = host_functions_ctor; - varpool_node::finalize_decl (hsa_host_func_table); - *host_func_table = hsa_host_func_table; - - /* Following code emits list of kernel_info structures. */ - - tree kernel_info_type = make_node (RECORD_TYPE); - tree id_f1 = build_decl (BUILTINS_LOCATION, FIELD_DECL, - get_identifier ("name"), ptr_type_node); - DECL_CHAIN (id_f1) = NULL_TREE; - tree id_f2 = build_decl (BUILTINS_LOCATION, FIELD_DECL, - get_identifier ("omp_data_size"), - unsigned_type_node); - DECL_CHAIN (id_f2) = id_f1; - tree id_f3 = build_decl (BUILTINS_LOCATION, FIELD_DECL, - get_identifier ("gridified_kernel_p"), - boolean_type_node); - DECL_CHAIN (id_f3) = id_f2; - tree id_f4 = build_decl (BUILTINS_LOCATION, FIELD_DECL, - get_identifier ("kernel_dependencies_count"), - unsigned_type_node); - DECL_CHAIN (id_f4) = id_f3; - tree id_f5 = build_decl (BUILTINS_LOCATION, FIELD_DECL, - get_identifier ("kernel_dependencies"), - build_pointer_type (build_pointer_type - (char_type_node))); - DECL_CHAIN (id_f5) = id_f4; - finish_builtin_struct (kernel_info_type, "__hsa_kernel_info", id_f5, - NULL_TREE); - - int_num_of_kernels = build_int_cstu (uint32_type_node, map_count); - tree kernel_info_vector_type - = build_array_type (kernel_info_type, - build_index_type (int_num_of_kernels)); - TYPE_ARTIFICIAL (kernel_info_vector_type) = 1; - - vec<constructor_elt, va_gc> *kernel_info_vector_vec = NULL; - tree kernel_dependencies_vector_type = NULL; - - for (unsigned i = 0; i < map_count; ++i) - { - tree kernel = hsa_get_decl_kernel_mapping_decl (i); - char *name = hsa_get_decl_kernel_mapping_name (i); - unsigned len = strlen (name); - char *copy = XNEWVEC (char, len + 2); - copy[0] = '&'; - memcpy (copy + 1, name, len); - copy[len + 1] = '\0'; - len++; - - tree kern_name = build_string (len, copy); - TREE_TYPE (kern_name) - = build_array_type (char_type_node, build_index_type (size_int (len))); - free (copy); - - unsigned omp_size = hsa_get_decl_kernel_mapping_omp_size (i); - tree omp_data_size = build_int_cstu (unsigned_type_node, omp_size); - bool gridified_kernel_p = hsa_get_decl_kernel_mapping_gridified (i); - tree gridified_kernel_p_tree = build_int_cstu (boolean_type_node, - gridified_kernel_p); - unsigned count = 0; - vec<constructor_elt, va_gc> *kernel_dependencies_vec = NULL; - if (hsa_decl_kernel_dependencies) - { - vec<const char *> **slot; - slot = hsa_decl_kernel_dependencies->get (kernel); - if (slot) - { - vec <const char *> *dependencies = *slot; - count = dependencies->length (); - - kernel_dependencies_vector_type - = build_array_type (build_pointer_type (char_type_node), - build_index_type (size_int (count))); - TYPE_ARTIFICIAL (kernel_dependencies_vector_type) = 1; - - for (unsigned j = 0; j < count; j++) - { - const char *d = (*dependencies)[j]; - len = strlen (d); - tree dependency_name = build_string (len, d); - TREE_TYPE (dependency_name) - = build_array_type (char_type_node, - build_index_type (size_int (len))); - - CONSTRUCTOR_APPEND_ELT - (kernel_dependencies_vec, NULL_TREE, - build1 (ADDR_EXPR, - build_pointer_type (TREE_TYPE (dependency_name)), - dependency_name)); - } - } - } - - tree dependencies_count = build_int_cstu (unsigned_type_node, count); - - vec<constructor_elt, va_gc> *kernel_info_vec = NULL; - CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, - build1 (ADDR_EXPR, - build_pointer_type (TREE_TYPE - (kern_name)), - kern_name)); - CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, omp_data_size); - CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, - gridified_kernel_p_tree); - CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, dependencies_count); - - if (count > 0) - { - ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_dependencies_list", i); - gcc_checking_assert (kernel_dependencies_vector_type); - tree dependencies_list = build_decl (UNKNOWN_LOCATION, VAR_DECL, - get_identifier (tmp_name), - kernel_dependencies_vector_type); - - TREE_STATIC (dependencies_list) = 1; - TREE_READONLY (dependencies_list) = 1; - TREE_PUBLIC (dependencies_list) = 0; - DECL_ARTIFICIAL (dependencies_list) = 1; - DECL_IGNORED_P (dependencies_list) = 1; - DECL_EXTERNAL (dependencies_list) = 0; - TREE_CONSTANT (dependencies_list) = 1; - DECL_INITIAL (dependencies_list) - = build_constructor (kernel_dependencies_vector_type, - kernel_dependencies_vec); - varpool_node::finalize_decl (dependencies_list); - - CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, - build1 (ADDR_EXPR, - build_pointer_type - (TREE_TYPE (dependencies_list)), - dependencies_list)); - } - else - CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, null_pointer_node); - - tree kernel_info_ctor = build_constructor (kernel_info_type, - kernel_info_vec); - - CONSTRUCTOR_APPEND_ELT (kernel_info_vector_vec, NULL_TREE, - kernel_info_ctor); - } - - ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_kernels", 1); - tree hsa_kernels = build_decl (UNKNOWN_LOCATION, VAR_DECL, - get_identifier (tmp_name), - kernel_info_vector_type); - - TREE_STATIC (hsa_kernels) = 1; - TREE_READONLY (hsa_kernels) = 1; - TREE_PUBLIC (hsa_kernels) = 0; - DECL_ARTIFICIAL (hsa_kernels) = 1; - DECL_IGNORED_P (hsa_kernels) = 1; - DECL_EXTERNAL (hsa_kernels) = 0; - TREE_CONSTANT (hsa_kernels) = 1; - DECL_INITIAL (hsa_kernels) = build_constructor (kernel_info_vector_type, - kernel_info_vector_vec); - varpool_node::finalize_decl (hsa_kernels); - *kernels = hsa_kernels; -} - -/* Create a static constructor that will register out brig stuff with - libgomp. */ - -static void -hsa_output_libgomp_mapping (tree brig_decl) -{ - unsigned kernel_count = hsa_get_number_decl_kernel_mappings (); - unsigned global_variable_count = hsa_global_variable_symbols->elements (); - - tree kernels; - tree host_func_table; - - hsa_output_kernels (&host_func_table, &kernels); - tree global_vars = hsa_output_global_variables (); - - tree hsa_image_desc_type = make_node (RECORD_TYPE); - tree id_f1 = build_decl (BUILTINS_LOCATION, FIELD_DECL, - get_identifier ("brig_module"), ptr_type_node); - DECL_CHAIN (id_f1) = NULL_TREE; - tree id_f2 = build_decl (BUILTINS_LOCATION, FIELD_DECL, - get_identifier ("kernel_count"), - unsigned_type_node); - - DECL_CHAIN (id_f2) = id_f1; - tree id_f3 = build_decl (BUILTINS_LOCATION, FIELD_DECL, - get_identifier ("hsa_kernel_infos"), - ptr_type_node); - DECL_CHAIN (id_f3) = id_f2; - tree id_f4 = build_decl (BUILTINS_LOCATION, FIELD_DECL, - get_identifier ("global_variable_count"), - unsigned_type_node); - DECL_CHAIN (id_f4) = id_f3; - tree id_f5 = build_decl (BUILTINS_LOCATION, FIELD_DECL, - get_identifier ("hsa_global_variable_infos"), - ptr_type_node); - DECL_CHAIN (id_f5) = id_f4; - finish_builtin_struct (hsa_image_desc_type, "__hsa_image_desc", id_f5, - NULL_TREE); - TYPE_ARTIFICIAL (hsa_image_desc_type) = 1; - - vec<constructor_elt, va_gc> *img_desc_vec = NULL; - CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE, - build_fold_addr_expr (brig_decl)); - CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE, - build_int_cstu (unsigned_type_node, kernel_count)); - CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE, - build1 (ADDR_EXPR, - build_pointer_type (TREE_TYPE (kernels)), - kernels)); - CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE, - build_int_cstu (unsigned_type_node, - global_variable_count)); - CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE, - build1 (ADDR_EXPR, - build_pointer_type (TREE_TYPE (global_vars)), - global_vars)); - - tree img_desc_ctor = build_constructor (hsa_image_desc_type, img_desc_vec); - - char tmp_name[64]; - ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_img_descriptor", 1); - tree hsa_img_descriptor = build_decl (UNKNOWN_LOCATION, VAR_DECL, - get_identifier (tmp_name), - hsa_image_desc_type); - TREE_STATIC (hsa_img_descriptor) = 1; - TREE_READONLY (hsa_img_descriptor) = 1; - TREE_PUBLIC (hsa_img_descriptor) = 0; - DECL_ARTIFICIAL (hsa_img_descriptor) = 1; - DECL_IGNORED_P (hsa_img_descriptor) = 1; - DECL_EXTERNAL (hsa_img_descriptor) = 0; - TREE_CONSTANT (hsa_img_descriptor) = 1; - DECL_INITIAL (hsa_img_descriptor) = img_desc_ctor; - varpool_node::finalize_decl (hsa_img_descriptor); - - /* Construct the "host_table" libgomp expects. */ - tree index_type = build_index_type (build_int_cst (integer_type_node, 4)); - tree libgomp_host_table_type = build_array_type (ptr_type_node, index_type); - TYPE_ARTIFICIAL (libgomp_host_table_type) = 1; - vec<constructor_elt, va_gc> *libgomp_host_table_vec = NULL; - tree host_func_table_addr = build_fold_addr_expr (host_func_table); - CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE, - host_func_table_addr); - offset_int func_table_size - = wi::to_offset (TYPE_SIZE_UNIT (ptr_type_node)) * kernel_count; - CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE, - fold_build2 (POINTER_PLUS_EXPR, - TREE_TYPE (host_func_table_addr), - host_func_table_addr, - build_int_cst (size_type_node, - func_table_size.to_uhwi - ()))); - CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE, null_pointer_node); - CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE, null_pointer_node); - tree libgomp_host_table_ctor = build_constructor (libgomp_host_table_type, - libgomp_host_table_vec); - ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_libgomp_host_table", 1); - tree hsa_libgomp_host_table = build_decl (UNKNOWN_LOCATION, VAR_DECL, - get_identifier (tmp_name), - libgomp_host_table_type); - - TREE_STATIC (hsa_libgomp_host_table) = 1; - TREE_READONLY (hsa_libgomp_host_table) = 1; - TREE_PUBLIC (hsa_libgomp_host_table) = 0; - DECL_ARTIFICIAL (hsa_libgomp_host_table) = 1; - DECL_IGNORED_P (hsa_libgomp_host_table) = 1; - DECL_EXTERNAL (hsa_libgomp_host_table) = 0; - TREE_CONSTANT (hsa_libgomp_host_table) = 1; - DECL_INITIAL (hsa_libgomp_host_table) = libgomp_host_table_ctor; - varpool_node::finalize_decl (hsa_libgomp_host_table); - - /* Generate an initializer with a call to the registration routine. */ - - tree offload_register - = builtin_decl_explicit (BUILT_IN_GOMP_OFFLOAD_REGISTER); - gcc_checking_assert (offload_register); - - tree *hsa_ctor_stmts = hsa_get_ctor_statements (); - append_to_statement_list - (build_call_expr (offload_register, 4, - build_int_cstu (unsigned_type_node, - GOMP_VERSION_PACK (GOMP_VERSION, - GOMP_VERSION_HSA)), - build_fold_addr_expr (hsa_libgomp_host_table), - build_int_cst (integer_type_node, GOMP_DEVICE_HSA), - build_fold_addr_expr (hsa_img_descriptor)), - hsa_ctor_stmts); - - cgraph_build_static_cdtor ('I', *hsa_ctor_stmts, DEFAULT_INIT_PRIORITY); - - tree offload_unregister - = builtin_decl_explicit (BUILT_IN_GOMP_OFFLOAD_UNREGISTER); - gcc_checking_assert (offload_unregister); - - tree *hsa_dtor_stmts = hsa_get_dtor_statements (); - append_to_statement_list - (build_call_expr (offload_unregister, 4, - build_int_cstu (unsigned_type_node, - GOMP_VERSION_PACK (GOMP_VERSION, - GOMP_VERSION_HSA)), - build_fold_addr_expr (hsa_libgomp_host_table), - build_int_cst (integer_type_node, GOMP_DEVICE_HSA), - build_fold_addr_expr (hsa_img_descriptor)), - hsa_dtor_stmts); - cgraph_build_static_cdtor ('D', *hsa_dtor_stmts, DEFAULT_INIT_PRIORITY); -} - -/* Emit the brig module we have compiled to a section in the final assembly and - also create a compile unit static constructor that will register the brig - module with libgomp. */ - -void -hsa_output_brig (void) -{ - section *saved_section; - - if (!brig_initialized) - return; - - for (unsigned i = 0; i < function_call_linkage.length (); i++) - { - function_linkage_pair p = function_call_linkage[i]; - - BrigCodeOffset32_t *func_offset = function_offsets->get (p.function_decl); - gcc_assert (*func_offset); - BrigOperandCodeRef *code_ref - = (BrigOperandCodeRef *) (brig_operand.get_ptr_by_offset (p.offset)); - gcc_assert (code_ref->base.kind == BRIG_KIND_OPERAND_CODE_REF); - code_ref->ref = lendian32 (*func_offset); - } - - /* Iterate all function declarations and if we meet a function that should - have module linkage and we are unable to emit HSAIL for the function, - then change the linkage to program linkage. Doing so, we will emit - a valid BRIG image. */ - if (hsa_failed_functions != NULL && emitted_declarations != NULL) - for (hash_map <tree, BrigDirectiveExecutable *>::iterator it - = emitted_declarations->begin (); - it != emitted_declarations->end (); - ++it) - { - if (hsa_failed_functions->contains ((*it).first)) - (*it).second->linkage = BRIG_LINKAGE_PROGRAM; - } - - saved_section = in_section; - - switch_to_section (get_section (BRIG_ELF_SECTION_NAME, SECTION_NOTYPE, NULL)); - char tmp_name[64]; - ASM_GENERATE_INTERNAL_LABEL (tmp_name, BRIG_LABEL_STRING, 1); - ASM_OUTPUT_LABEL (asm_out_file, tmp_name); - tree brig_id = get_identifier (tmp_name); - tree brig_decl = build_decl (UNKNOWN_LOCATION, VAR_DECL, brig_id, - char_type_node); - SET_DECL_ASSEMBLER_NAME (brig_decl, brig_id); - TREE_ADDRESSABLE (brig_decl) = 1; - TREE_READONLY (brig_decl) = 1; - DECL_ARTIFICIAL (brig_decl) = 1; - DECL_IGNORED_P (brig_decl) = 1; - TREE_STATIC (brig_decl) = 1; - TREE_PUBLIC (brig_decl) = 0; - TREE_USED (brig_decl) = 1; - DECL_INITIAL (brig_decl) = brig_decl; - TREE_ASM_WRITTEN (brig_decl) = 1; - - BrigModuleHeader module_header; - memcpy (&module_header.identification, "HSA BRIG", - sizeof (module_header.identification)); - module_header.brigMajor = lendian32 (BRIG_VERSION_BRIG_MAJOR); - module_header.brigMinor = lendian32 (BRIG_VERSION_BRIG_MINOR); - uint64_t section_index[3]; - - int data_padding, code_padding, operand_padding; - data_padding = HSA_SECTION_ALIGNMENT - - brig_data.total_size % HSA_SECTION_ALIGNMENT; - code_padding = HSA_SECTION_ALIGNMENT - - brig_code.total_size % HSA_SECTION_ALIGNMENT; - operand_padding = HSA_SECTION_ALIGNMENT - - brig_operand.total_size % HSA_SECTION_ALIGNMENT; - - uint64_t module_size = sizeof (module_header) - + sizeof (section_index) - + brig_data.total_size - + data_padding - + brig_code.total_size - + code_padding - + brig_operand.total_size - + operand_padding; - gcc_assert ((module_size % 16) == 0); - module_header.byteCount = lendian64 (module_size); - memset (&module_header.hash, 0, sizeof (module_header.hash)); - module_header.reserved = 0; - module_header.sectionCount = lendian32 (3); - module_header.sectionIndex = lendian64 (sizeof (module_header)); - assemble_string ((const char *) &module_header, sizeof (module_header)); - uint64_t off = sizeof (module_header) + sizeof (section_index); - section_index[0] = lendian64 (off); - off += brig_data.total_size + data_padding; - section_index[1] = lendian64 (off); - off += brig_code.total_size + code_padding; - section_index[2] = lendian64 (off); - assemble_string ((const char *) §ion_index, sizeof (section_index)); - - char padding[HSA_SECTION_ALIGNMENT]; - memset (padding, 0, sizeof (padding)); - - brig_data.output (); - assemble_string (padding, data_padding); - brig_code.output (); - assemble_string (padding, code_padding); - brig_operand.output (); - assemble_string (padding, operand_padding); - - if (saved_section) - switch_to_section (saved_section); - - hsa_output_libgomp_mapping (brig_decl); - - hsa_free_decl_kernel_mapping (); - brig_release_data (); - hsa_deinit_compilation_unit_data (); - - delete emitted_declarations; - emitted_declarations = NULL; - delete function_offsets; - function_offsets = NULL; -} diff --git a/gcc/hsa-builtins.def b/gcc/hsa-builtins.def deleted file mode 100644 index dc60e42..0000000 --- a/gcc/hsa-builtins.def +++ /dev/null @@ -1,39 +0,0 @@ -/* This file contains the definitions and documentation for the - Offloading and Multi Processing builtins used in the GNU compiler. - Copyright (C) 2005-2020 Free Software Foundation, Inc. - -This file is part of GCC. - -GCC is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free -Software Foundation; either version 3, or (at your option) any later -version. - -GCC is distributed in the hope that it will be useful, but WITHOUT ANY -WARRANTY; without even the implied warranty of MERCHANTABILITY or -FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. - -You should have received a copy of the GNU General Public License -along with GCC; see the file COPYING3. If not see -<http://www.gnu.org/licenses/>. */ - -/* Before including this file, you should define a macro: - - DEF_HSA_BUILTIN (ENUM, NAME, TYPE, ATTRS) - - See builtins.def for details. */ - -/* The reason why they aren't in gcc/builtins.def is that the Fortran front end - doesn't source those. */ - -DEF_HSA_BUILTIN (BUILT_IN_HSA_WORKGROUPID, "hsa_workgroupid", - BT_FN_UINT_UINT, ATTR_CONST_NOTHROW_LEAF_LIST) -DEF_HSA_BUILTIN (BUILT_IN_HSA_WORKITEMID, "hsa_workitemid", - BT_FN_UINT_UINT, ATTR_CONST_NOTHROW_LEAF_LIST) -DEF_HSA_BUILTIN (BUILT_IN_HSA_WORKITEMABSID, "hsa_workitemabsid", - BT_FN_UINT_UINT, ATTR_CONST_NOTHROW_LEAF_LIST) -DEF_HSA_BUILTIN (BUILT_IN_HSA_GRIDSIZE, "hsa_gridsize", - BT_FN_UINT_UINT, ATTR_CONST_NOTHROW_LEAF_LIST) -DEF_HSA_BUILTIN (BUILT_IN_HSA_CURRENTWORKGROUPSIZE, "hsa_currentworkgroupsize", - BT_FN_UINT_UINT, ATTR_CONST_NOTHROW_LEAF_LIST) diff --git a/gcc/hsa-common.c b/gcc/hsa-common.c deleted file mode 100644 index 4b06791..0000000 --- a/gcc/hsa-common.c +++ /dev/null @@ -1,996 +0,0 @@ -/* Implementation of commonly needed HSAIL related functions and methods. - Copyright (C) 2013-2020 Free Software Foundation, Inc. - Contributed by Martin Jambor <mjambor@suse.cz> and - Martin Liska <mliska@suse.cz>. - -This file is part of GCC. - -GCC is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 3, or (at your option) -any later version. - -GCC is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with GCC; see the file COPYING3. If not see -<http://www.gnu.org/licenses/>. */ - -#include "config.h" -#include "system.h" -#include "coretypes.h" -#include "tm.h" -#include "is-a.h" -#include "hash-set.h" -#include "hash-map.h" -#include "vec.h" -#include "tree.h" -#include "dumpfile.h" -#include "gimple-pretty-print.h" -#include "diagnostic-core.h" -#include "alloc-pool.h" -#include "cgraph.h" -#include "print-tree.h" -#include "stringpool.h" -#include "symbol-summary.h" -#include "hsa-common.h" -#include "internal-fn.h" -#include "ctype.h" -#include "builtins.h" -#include "stringpool.h" -#include "attribs.h" - -/* Structure containing intermediate HSA representation of the generated - function. */ -class hsa_function_representation *hsa_cfun; - -/* Element of the mapping vector between a host decl and an HSA kernel. */ - -struct GTY(()) hsa_decl_kernel_map_element -{ - /* The decl of the host function. */ - tree decl; - /* Name of the HSA kernel in BRIG. */ - char * GTY((skip)) name; - /* Size of OMP data, if the kernel contains a kernel dispatch. */ - unsigned omp_data_size; - /* True if the function is gridified kernel. */ - bool gridified_kernel_p; -}; - -/* Mapping between decls and corresponding HSA kernels in this compilation - unit. */ - -static GTY (()) vec<hsa_decl_kernel_map_element, va_gc> - *hsa_decl_kernel_mapping; - -/* Mapping between decls and corresponding HSA kernels - called by the function. */ -hash_map <tree, vec <const char *> *> *hsa_decl_kernel_dependencies; - -/* Hash function to lookup a symbol for a decl. */ -hash_table <hsa_noop_symbol_hasher> *hsa_global_variable_symbols; - -/* HSA summaries. */ -hsa_summary_t *hsa_summaries = NULL; - -/* HSA number of threads. */ -hsa_symbol *hsa_num_threads = NULL; - -/* HSA function that cannot be expanded to HSAIL. */ -hash_set <tree> *hsa_failed_functions = NULL; - -/* True if compilation unit-wide data are already allocated and initialized. */ -static bool compilation_unit_data_initialized; - -/* Return true if FNDECL represents an HSA-callable function. */ - -bool -hsa_callable_function_p (tree fndecl) -{ - return (lookup_attribute ("omp declare target", DECL_ATTRIBUTES (fndecl)) - && !lookup_attribute ("oacc function", DECL_ATTRIBUTES (fndecl))); -} - -/* Allocate HSA structures that are used when dealing with different - functions. */ - -void -hsa_init_compilation_unit_data (void) -{ - if (compilation_unit_data_initialized) - return; - - compilation_unit_data_initialized = true; - - hsa_global_variable_symbols = new hash_table <hsa_noop_symbol_hasher> (8); - hsa_failed_functions = new hash_set <tree> (); - hsa_emitted_internal_decls = new hash_table <hsa_internal_fn_hasher> (2); -} - -/* Free data structures that are used when dealing with different - functions. */ - -void -hsa_deinit_compilation_unit_data (void) -{ - gcc_assert (compilation_unit_data_initialized); - - delete hsa_failed_functions; - delete hsa_emitted_internal_decls; - - for (hash_table <hsa_noop_symbol_hasher>::iterator it - = hsa_global_variable_symbols->begin (); - it != hsa_global_variable_symbols->end (); - ++it) - { - hsa_symbol *sym = *it; - delete sym; - } - - delete hsa_global_variable_symbols; - - if (hsa_num_threads) - { - delete hsa_num_threads; - hsa_num_threads = NULL; - } - - compilation_unit_data_initialized = false; -} - -/* Return true if we are generating large HSA machine model. */ - -bool -hsa_machine_large_p (void) -{ - /* FIXME: I suppose this is technically wrong but should work for me now. */ - return (GET_MODE_BITSIZE (Pmode) == 64); -} - -/* Return the HSA profile we are using. */ - -bool -hsa_full_profile_p (void) -{ - return true; -} - -/* Return true if a register in operand number OPNUM of instruction - is an output. False if it is an input. */ - -bool -hsa_insn_basic::op_output_p (unsigned opnum) -{ - switch (m_opcode) - { - case HSA_OPCODE_PHI: - case BRIG_OPCODE_CBR: - case BRIG_OPCODE_SBR: - case BRIG_OPCODE_ST: - case BRIG_OPCODE_SIGNALNORET: - case BRIG_OPCODE_DEBUGTRAP: - /* FIXME: There are probably missing cases here, double check. */ - return false; - case BRIG_OPCODE_EXPAND: - /* Example: expand_v4_b32_b128 (dest0, dest1, dest2, dest3), src0. */ - return opnum < operand_count () - 1; - default: - return opnum == 0; - } -} - -/* Return true if OPCODE is an floating-point bit instruction opcode. */ - -bool -hsa_opcode_floating_bit_insn_p (BrigOpcode16_t opcode) -{ - switch (opcode) - { - case BRIG_OPCODE_NEG: - case BRIG_OPCODE_ABS: - case BRIG_OPCODE_CLASS: - case BRIG_OPCODE_COPYSIGN: - return true; - default: - return false; - } -} - -/* Return the number of destination operands for this INSN. */ - -unsigned -hsa_insn_basic::input_count () -{ - switch (m_opcode) - { - default: - return 1; - - case BRIG_OPCODE_NOP: - return 0; - - case BRIG_OPCODE_EXPAND: - return 2; - - case BRIG_OPCODE_LD: - /* ld_v[234] not yet handled. */ - return 1; - - case BRIG_OPCODE_ST: - return 0; - - case BRIG_OPCODE_ATOMICNORET: - return 0; - - case BRIG_OPCODE_SIGNAL: - return 1; - - case BRIG_OPCODE_SIGNALNORET: - return 0; - - case BRIG_OPCODE_MEMFENCE: - return 0; - - case BRIG_OPCODE_RDIMAGE: - case BRIG_OPCODE_LDIMAGE: - case BRIG_OPCODE_STIMAGE: - case BRIG_OPCODE_QUERYIMAGE: - case BRIG_OPCODE_QUERYSAMPLER: - sorry ("HSA image ops not handled"); - return 0; - - case BRIG_OPCODE_CBR: - case BRIG_OPCODE_BR: - return 0; - - case BRIG_OPCODE_SBR: - return 0; /* ??? */ - - case BRIG_OPCODE_WAVEBARRIER: - return 0; /* ??? */ - - case BRIG_OPCODE_BARRIER: - case BRIG_OPCODE_ARRIVEFBAR: - case BRIG_OPCODE_INITFBAR: - case BRIG_OPCODE_JOINFBAR: - case BRIG_OPCODE_LEAVEFBAR: - case BRIG_OPCODE_RELEASEFBAR: - case BRIG_OPCODE_WAITFBAR: - return 0; - - case BRIG_OPCODE_LDF: - return 1; - - case BRIG_OPCODE_ACTIVELANECOUNT: - case BRIG_OPCODE_ACTIVELANEID: - case BRIG_OPCODE_ACTIVELANEMASK: - case BRIG_OPCODE_ACTIVELANEPERMUTE: - return 1; /* ??? */ - - case BRIG_OPCODE_CALL: - case BRIG_OPCODE_SCALL: - case BRIG_OPCODE_ICALL: - return 0; - - case BRIG_OPCODE_RET: - return 0; - - case BRIG_OPCODE_ALLOCA: - return 1; - - case BRIG_OPCODE_CLEARDETECTEXCEPT: - return 0; - - case BRIG_OPCODE_SETDETECTEXCEPT: - return 0; - - case BRIG_OPCODE_PACKETCOMPLETIONSIG: - case BRIG_OPCODE_PACKETID: - case BRIG_OPCODE_CASQUEUEWRITEINDEX: - case BRIG_OPCODE_LDQUEUEREADINDEX: - case BRIG_OPCODE_LDQUEUEWRITEINDEX: - case BRIG_OPCODE_STQUEUEREADINDEX: - case BRIG_OPCODE_STQUEUEWRITEINDEX: - return 1; /* ??? */ - - case BRIG_OPCODE_ADDQUEUEWRITEINDEX: - return 1; - - case BRIG_OPCODE_DEBUGTRAP: - return 0; - - case BRIG_OPCODE_GROUPBASEPTR: - case BRIG_OPCODE_KERNARGBASEPTR: - return 1; /* ??? */ - - case HSA_OPCODE_ARG_BLOCK: - return 0; - - case BRIG_KIND_DIRECTIVE_COMMENT: - return 0; - } -} - -/* Return the number of source operands for this INSN. */ - -unsigned -hsa_insn_basic::num_used_ops () -{ - gcc_checking_assert (input_count () <= operand_count ()); - - return operand_count () - input_count (); -} - -/* Set alignment to VALUE. */ - -void -hsa_insn_mem::set_align (BrigAlignment8_t value) -{ - /* TODO: Perhaps remove this dump later on: */ - if (dump_file && (dump_flags & TDF_DETAILS) && value < m_align) - { - fprintf (dump_file, "Decreasing alignment to %u in instruction ", value); - dump_hsa_insn (dump_file, this); - } - m_align = value; -} - -/* Return size of HSA type T in bits. */ - -unsigned -hsa_type_bit_size (BrigType16_t t) -{ - switch (t) - { - case BRIG_TYPE_B1: - return 1; - - case BRIG_TYPE_U8: - case BRIG_TYPE_S8: - case BRIG_TYPE_B8: - return 8; - - case BRIG_TYPE_U16: - case BRIG_TYPE_S16: - case BRIG_TYPE_B16: - case BRIG_TYPE_F16: - return 16; - - case BRIG_TYPE_U32: - case BRIG_TYPE_S32: - case BRIG_TYPE_B32: - case BRIG_TYPE_F32: - case BRIG_TYPE_U8X4: - case BRIG_TYPE_U16X2: - case BRIG_TYPE_S8X4: - case BRIG_TYPE_S16X2: - case BRIG_TYPE_F16X2: - return 32; - - case BRIG_TYPE_U64: - case BRIG_TYPE_S64: - case BRIG_TYPE_F64: - case BRIG_TYPE_B64: - case BRIG_TYPE_U8X8: - case BRIG_TYPE_U16X4: - case BRIG_TYPE_U32X2: - case BRIG_TYPE_S8X8: - case BRIG_TYPE_S16X4: - case BRIG_TYPE_S32X2: - case BRIG_TYPE_F16X4: - case BRIG_TYPE_F32X2: - - return 64; - - case BRIG_TYPE_B128: - case BRIG_TYPE_U8X16: - case BRIG_TYPE_U16X8: - case BRIG_TYPE_U32X4: - case BRIG_TYPE_U64X2: - case BRIG_TYPE_S8X16: - case BRIG_TYPE_S16X8: - case BRIG_TYPE_S32X4: - case BRIG_TYPE_S64X2: - case BRIG_TYPE_F16X8: - case BRIG_TYPE_F32X4: - case BRIG_TYPE_F64X2: - return 128; - - default: - gcc_assert (hsa_seen_error ()); - return t; - } -} - -/* Return BRIG bit-type with BITSIZE length. */ - -BrigType16_t -hsa_bittype_for_bitsize (unsigned bitsize) -{ - switch (bitsize) - { - case 1: - return BRIG_TYPE_B1; - case 8: - return BRIG_TYPE_B8; - case 16: - return BRIG_TYPE_B16; - case 32: - return BRIG_TYPE_B32; - case 64: - return BRIG_TYPE_B64; - case 128: - return BRIG_TYPE_B128; - default: - gcc_unreachable (); - } -} - -/* Return BRIG unsigned int type with BITSIZE length. */ - -BrigType16_t -hsa_uint_for_bitsize (unsigned bitsize) -{ - switch (bitsize) - { - case 8: - return BRIG_TYPE_U8; - case 16: - return BRIG_TYPE_U16; - case 32: - return BRIG_TYPE_U32; - case 64: - return BRIG_TYPE_U64; - default: - gcc_unreachable (); - } -} - -/* Return BRIG float type with BITSIZE length. */ - -BrigType16_t -hsa_float_for_bitsize (unsigned bitsize) -{ - switch (bitsize) - { - case 16: - return BRIG_TYPE_F16; - case 32: - return BRIG_TYPE_F32; - case 64: - return BRIG_TYPE_F64; - default: - gcc_unreachable (); - } -} - -/* Return HSA bit-type with the same size as the type T. */ - -BrigType16_t -hsa_bittype_for_type (BrigType16_t t) -{ - return hsa_bittype_for_bitsize (hsa_type_bit_size (t)); -} - -/* Return HSA unsigned integer type with the same size as the type T. */ - -BrigType16_t -hsa_unsigned_type_for_type (BrigType16_t t) -{ - return hsa_uint_for_bitsize (hsa_type_bit_size (t)); -} - -/* Return true if TYPE is a packed HSA type. */ - -bool -hsa_type_packed_p (BrigType16_t type) -{ - return (type & BRIG_TYPE_PACK_MASK) != BRIG_TYPE_PACK_NONE; -} - -/* Return true if and only if TYPE is a floating point number type. */ - -bool -hsa_type_float_p (BrigType16_t type) -{ - switch (type & BRIG_TYPE_BASE_MASK) - { - case BRIG_TYPE_F16: - case BRIG_TYPE_F32: - case BRIG_TYPE_F64: - return true; - default: - return false; - } -} - -/* Return true if and only if TYPE is an integer number type. */ - -bool -hsa_type_integer_p (BrigType16_t type) -{ - switch (type & BRIG_TYPE_BASE_MASK) - { - case BRIG_TYPE_U8: - case BRIG_TYPE_U16: - case BRIG_TYPE_U32: - case BRIG_TYPE_U64: - case BRIG_TYPE_S8: - case BRIG_TYPE_S16: - case BRIG_TYPE_S32: - case BRIG_TYPE_S64: - return true; - default: - return false; - } -} - -/* Return true if and only if TYPE is an bit-type. */ - -bool -hsa_btype_p (BrigType16_t type) -{ - switch (type & BRIG_TYPE_BASE_MASK) - { - case BRIG_TYPE_B8: - case BRIG_TYPE_B16: - case BRIG_TYPE_B32: - case BRIG_TYPE_B64: - case BRIG_TYPE_B128: - return true; - default: - return false; - } -} - - -/* Return HSA alignment encoding alignment to N bits. */ - -BrigAlignment8_t -hsa_alignment_encoding (unsigned n) -{ - gcc_assert (n >= 8 && !(n & (n - 1))); - if (n >= 256) - return BRIG_ALIGNMENT_32; - - switch (n) - { - case 8: - return BRIG_ALIGNMENT_1; - case 16: - return BRIG_ALIGNMENT_2; - case 32: - return BRIG_ALIGNMENT_4; - case 64: - return BRIG_ALIGNMENT_8; - case 128: - return BRIG_ALIGNMENT_16; - default: - gcc_unreachable (); - } -} - -/* Return HSA alignment encoding alignment of T got - by get_object_alignment. */ - -BrigAlignment8_t -hsa_object_alignment (tree t) -{ - return hsa_alignment_encoding (get_object_alignment (t)); -} - -/* Return byte alignment for given BrigAlignment8_t value. */ - -unsigned -hsa_byte_alignment (BrigAlignment8_t alignment) -{ - gcc_assert (alignment != BRIG_ALIGNMENT_NONE); - - return 1 << (alignment - 1); -} - -/* Return natural alignment of HSA TYPE. */ - -BrigAlignment8_t -hsa_natural_alignment (BrigType16_t type) -{ - return hsa_alignment_encoding (hsa_type_bit_size (type & ~BRIG_TYPE_ARRAY)); -} - -/* Call the correct destructor of a HSA instruction. */ - -void -hsa_destroy_insn (hsa_insn_basic *insn) -{ - if (hsa_insn_phi *phi = dyn_cast <hsa_insn_phi *> (insn)) - phi->~hsa_insn_phi (); - else if (hsa_insn_cbr *br = dyn_cast <hsa_insn_cbr *> (insn)) - br->~hsa_insn_cbr (); - else if (hsa_insn_cmp *cmp = dyn_cast <hsa_insn_cmp *> (insn)) - cmp->~hsa_insn_cmp (); - else if (hsa_insn_mem *mem = dyn_cast <hsa_insn_mem *> (insn)) - mem->~hsa_insn_mem (); - else if (hsa_insn_atomic *atomic = dyn_cast <hsa_insn_atomic *> (insn)) - atomic->~hsa_insn_atomic (); - else if (hsa_insn_seg *seg = dyn_cast <hsa_insn_seg *> (insn)) - seg->~hsa_insn_seg (); - else if (hsa_insn_call *call = dyn_cast <hsa_insn_call *> (insn)) - call->~hsa_insn_call (); - else if (hsa_insn_arg_block *block = dyn_cast <hsa_insn_arg_block *> (insn)) - block->~hsa_insn_arg_block (); - else if (hsa_insn_sbr *sbr = dyn_cast <hsa_insn_sbr *> (insn)) - sbr->~hsa_insn_sbr (); - else if (hsa_insn_br *br = dyn_cast <hsa_insn_br *> (insn)) - br->~hsa_insn_br (); - else if (hsa_insn_comment *comment = dyn_cast <hsa_insn_comment *> (insn)) - comment->~hsa_insn_comment (); - else - insn->~hsa_insn_basic (); -} - -/* Call the correct destructor of a HSA operand. */ - -void -hsa_destroy_operand (hsa_op_base *op) -{ - if (hsa_op_code_list *list = dyn_cast <hsa_op_code_list *> (op)) - list->~hsa_op_code_list (); - else if (hsa_op_operand_list *list = dyn_cast <hsa_op_operand_list *> (op)) - list->~hsa_op_operand_list (); - else if (hsa_op_reg *reg = dyn_cast <hsa_op_reg *> (op)) - reg->~hsa_op_reg (); - else if (hsa_op_immed *immed = dyn_cast <hsa_op_immed *> (op)) - immed->~hsa_op_immed (); - else - op->~hsa_op_base (); -} - -/* Create a mapping between the original function DECL and kernel name NAME. */ - -void -hsa_add_kern_decl_mapping (tree decl, char *name, unsigned omp_data_size, - bool gridified_kernel_p) -{ - hsa_decl_kernel_map_element dkm; - dkm.decl = decl; - dkm.name = name; - dkm.omp_data_size = omp_data_size; - dkm.gridified_kernel_p = gridified_kernel_p; - vec_safe_push (hsa_decl_kernel_mapping, dkm); -} - -/* Return the number of kernel decl name mappings. */ - -unsigned -hsa_get_number_decl_kernel_mappings (void) -{ - return vec_safe_length (hsa_decl_kernel_mapping); -} - -/* Return the decl in the Ith kernel decl name mapping. */ - -tree -hsa_get_decl_kernel_mapping_decl (unsigned i) -{ - return (*hsa_decl_kernel_mapping)[i].decl; -} - -/* Return the name in the Ith kernel decl name mapping. */ - -char * -hsa_get_decl_kernel_mapping_name (unsigned i) -{ - return (*hsa_decl_kernel_mapping)[i].name; -} - -/* Return maximum OMP size for kernel decl name mapping. */ - -unsigned -hsa_get_decl_kernel_mapping_omp_size (unsigned i) -{ - return (*hsa_decl_kernel_mapping)[i].omp_data_size; -} - -/* Return if the function is gridified kernel in decl name mapping. */ - -bool -hsa_get_decl_kernel_mapping_gridified (unsigned i) -{ - return (*hsa_decl_kernel_mapping)[i].gridified_kernel_p; -} - -/* Free the mapping between original decls and kernel names. */ - -void -hsa_free_decl_kernel_mapping (void) -{ - if (hsa_decl_kernel_mapping == NULL) - return; - - for (unsigned i = 0; i < hsa_decl_kernel_mapping->length (); ++i) - free ((*hsa_decl_kernel_mapping)[i].name); - ggc_free (hsa_decl_kernel_mapping); -} - -/* Add new kernel dependency. */ - -void -hsa_add_kernel_dependency (tree caller, const char *called_function) -{ - if (hsa_decl_kernel_dependencies == NULL) - hsa_decl_kernel_dependencies = new hash_map<tree, vec<const char *> *> (); - - vec <const char *> *s = NULL; - vec <const char *> **slot = hsa_decl_kernel_dependencies->get (caller); - if (slot == NULL) - { - s = new vec <const char *> (); - hsa_decl_kernel_dependencies->put (caller, s); - } - else - s = *slot; - - s->safe_push (called_function); -} - -/* Expansion to HSA needs a few gc roots to hold types, constructors etc. In - order to minimize the number of GTY roots, we'll root them all in the - following array. The individual elements should only be accessed by the - very simple getters (of a pointer-to-tree) below. */ - -static GTY(()) tree hsa_tree_gt_roots[3]; - -tree * -hsa_get_ctor_statements (void) -{ - return &hsa_tree_gt_roots[0]; -} - -tree * -hsa_get_dtor_statements (void) -{ - return &hsa_tree_gt_roots[1]; -} - -tree * -hsa_get_kernel_dispatch_type (void) -{ - return &hsa_tree_gt_roots[2]; -} - -/* Modify the name P in-place so that it is a valid HSA identifier. */ - -void -hsa_sanitize_name (char *p) -{ - for (; *p; p++) - if (*p == '.' || *p == '-') - *p = '_'; -} - -/* Clone the name P, set trailing ampersand and sanitize the name. */ - -char * -hsa_brig_function_name (const char *p) -{ - unsigned len = strlen (p); - char *buf = XNEWVEC (char, len + 2); - - buf[0] = '&'; - buf[len + 1] = '\0'; - memcpy (buf + 1, p, len); - - hsa_sanitize_name (buf); - return buf; -} - -/* Add a flatten attribute and disable vectorization for gpu implementation - function decl GDECL. */ - -void hsa_summary_t::process_gpu_implementation_attributes (tree gdecl) -{ - DECL_ATTRIBUTES (gdecl) - = tree_cons (get_identifier ("flatten"), NULL_TREE, - DECL_ATTRIBUTES (gdecl)); - - tree fn_opts = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (gdecl); - if (fn_opts == NULL_TREE) - fn_opts = optimization_default_node; - fn_opts = copy_node (fn_opts); - TREE_OPTIMIZATION (fn_opts)->x_flag_tree_loop_vectorize = false; - TREE_OPTIMIZATION (fn_opts)->x_flag_tree_slp_vectorize = false; - DECL_FUNCTION_SPECIFIC_OPTIMIZATION (gdecl) = fn_opts; -} - -void -hsa_summary_t::link_functions (cgraph_node *gpu, cgraph_node *host, - hsa_function_kind kind, bool gridified_kernel_p) -{ - hsa_function_summary *gpu_summary = get_create (gpu); - hsa_function_summary *host_summary = get_create (host); - - gpu_summary->m_kind = kind; - host_summary->m_kind = kind; - - gpu_summary->m_gpu_implementation_p = true; - host_summary->m_gpu_implementation_p = false; - - gpu_summary->m_gridified_kernel_p = gridified_kernel_p; - host_summary->m_gridified_kernel_p = gridified_kernel_p; - - gpu_summary->m_bound_function = host; - host_summary->m_bound_function = gpu; - - process_gpu_implementation_attributes (gpu->decl); - - /* Create reference between a kernel and a corresponding host implementation - to quarantee LTO streaming to a same LTRANS. */ - if (kind == HSA_KERNEL) - gpu->create_reference (host, IPA_REF_ADDR); -} - -/* Add a HOST function to HSA summaries. */ - -void -hsa_register_kernel (cgraph_node *host) -{ - if (hsa_summaries == NULL) - hsa_summaries = new hsa_summary_t (symtab); - hsa_function_summary *s = hsa_summaries->get_create (host); - s->m_kind = HSA_KERNEL; -} - -/* Add a pair of functions to HSA summaries. GPU is an HSA implementation of - a HOST function. */ - -void -hsa_register_kernel (cgraph_node *gpu, cgraph_node *host) -{ - if (hsa_summaries == NULL) - hsa_summaries = new hsa_summary_t (symtab); - hsa_summaries->link_functions (gpu, host, HSA_KERNEL, true); -} - -/* Return true if expansion of the current HSA function has already failed. */ - -bool -hsa_seen_error (void) -{ - return hsa_cfun->m_seen_error; -} - -/* Mark current HSA function as failed. */ - -void -hsa_fail_cfun (void) -{ - hsa_failed_functions->add (hsa_cfun->m_decl); - hsa_cfun->m_seen_error = true; -} - -char * -hsa_internal_fn::name () -{ - char *name = xstrdup (internal_fn_name (m_fn)); - for (char *ptr = name; *ptr; ptr++) - *ptr = TOLOWER (*ptr); - - const char *suffix = NULL; - if (m_type_bit_size == 32) - suffix = "f"; - - if (suffix) - { - char *name2 = concat (name, suffix, NULL); - free (name); - name = name2; - } - - hsa_sanitize_name (name); - return name; -} - -unsigned -hsa_internal_fn::get_arity () -{ - switch (m_fn) - { - case IFN_ACOS: - case IFN_ASIN: - case IFN_ATAN: - case IFN_COS: - case IFN_EXP: - case IFN_EXP10: - case IFN_EXP2: - case IFN_EXPM1: - case IFN_LOG: - case IFN_LOG10: - case IFN_LOG1P: - case IFN_LOG2: - case IFN_LOGB: - case IFN_SIGNIFICAND: - case IFN_SIN: - case IFN_SQRT: - case IFN_TAN: - case IFN_CEIL: - case IFN_FLOOR: - case IFN_NEARBYINT: - case IFN_RINT: - case IFN_ROUND: - case IFN_TRUNC: - return 1; - case IFN_ATAN2: - case IFN_COPYSIGN: - case IFN_FMOD: - case IFN_POW: - case IFN_REMAINDER: - case IFN_SCALB: - case IFN_LDEXP: - return 2; - case IFN_CLRSB: - case IFN_CLZ: - case IFN_CTZ: - case IFN_FFS: - case IFN_PARITY: - case IFN_POPCOUNT: - default: - /* As we produce sorry message for unknown internal functions, - reaching this label is definitely a bug. */ - gcc_unreachable (); - } -} - -BrigType16_t -hsa_internal_fn::get_argument_type (int n) -{ - switch (m_fn) - { - case IFN_ACOS: - case IFN_ASIN: - case IFN_ATAN: - case IFN_COS: - case IFN_EXP: - case IFN_EXP10: - case IFN_EXP2: - case IFN_EXPM1: - case IFN_LOG: - case IFN_LOG10: - case IFN_LOG1P: - case IFN_LOG2: - case IFN_LOGB: - case IFN_SIGNIFICAND: - case IFN_SIN: - case IFN_SQRT: - case IFN_TAN: - case IFN_CEIL: - case IFN_FLOOR: - case IFN_NEARBYINT: - case IFN_RINT: - case IFN_ROUND: - case IFN_TRUNC: - case IFN_ATAN2: - case IFN_COPYSIGN: - case IFN_FMOD: - case IFN_POW: - case IFN_REMAINDER: - case IFN_SCALB: - return hsa_float_for_bitsize (m_type_bit_size); - case IFN_LDEXP: - { - if (n == -1 || n == 0) - return hsa_float_for_bitsize (m_type_bit_size); - else - return BRIG_TYPE_S32; - } - default: - /* As we produce sorry message for unknown internal functions, - reaching this label is definitely a bug. */ - gcc_unreachable (); - } -} - -#include "gt-hsa-common.h" diff --git a/gcc/hsa-common.h b/gcc/hsa-common.h deleted file mode 100644 index ffeaaba..0000000 --- a/gcc/hsa-common.h +++ /dev/null @@ -1,1419 +0,0 @@ -/* HSAIL and BRIG related macros and definitions. - Copyright (C) 2013-2020 Free Software Foundation, Inc. - -This file is part of GCC. - -GCC is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 3, or (at your option) -any later version. - -GCC is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with GCC; see the file COPYING3. If not see -<http://www.gnu.org/licenses/>. */ - -#ifndef HSA_H -#define HSA_H - -#include "hsa-brig-format.h" -#include "is-a.h" -#include "predict.h" -#include "tree.h" -#include "vec.h" -#include "hash-table.h" -#include "basic-block.h" -#include "bitmap.h" - - -/* Return true if the compiler should produce HSAIL. */ - -static inline bool -hsa_gen_requested_p (void) -{ -#ifndef ENABLE_HSA - return false; -#endif - return !flag_disable_hsa; -} - -/* Standard warning message if we failed to generate HSAIL for a function. */ - -#define HSA_SORRY_MSG "could not emit HSAIL for the function" - -class hsa_op_immed; -class hsa_op_cst_list; -class hsa_insn_basic; -class hsa_op_address; -class hsa_op_reg; -class hsa_bb; - -/* Class representing an input argument, output argument (result) or a - variable, that will eventually end up being a symbol directive. */ - -class hsa_symbol -{ -public: - /* Constructor. */ - hsa_symbol (BrigType16_t type, BrigSegment8_t segment, - BrigLinkage8_t linkage, bool global_scope_p = false, - BrigAllocation allocation = BRIG_ALLOCATION_AUTOMATIC, - BrigAlignment8_t align = BRIG_ALIGNMENT_8); - - /* Return total size of the symbol. */ - unsigned HOST_WIDE_INT total_byte_size (); - - /* Fill in those values into the symbol according to DECL, which are - determined independently from whether it is parameter, result, - or a variable, local or global. */ - void fillup_for_decl (tree decl); - - /* Pointer to the original tree, which is PARM_DECL for input parameters and - RESULT_DECL for the output parameters. Also can be CONST_DECL for Fortran - constants which need to be put into readonly segment. */ - tree m_decl; - - /* Name of the symbol, that will be written into output and dumps. Can be - NULL, see name_number below. */ - const char *m_name; - - /* If name is NULL, artificial name will be formed from the segment name and - this number. */ - int m_name_number; - - /* Once written, this is the offset of the associated symbol directive. Zero - means the symbol has not been written yet. */ - unsigned m_directive_offset; - - /* HSA type of the parameter. */ - BrigType16_t m_type; - - /* The HSA segment this will eventually end up in. */ - BrigSegment8_t m_segment; - - /* The HSA kind of linkage. */ - BrigLinkage8_t m_linkage; - - /* Array dimension, if non-zero. */ - unsigned HOST_WIDE_INT m_dim; - - /* Constant value, used for string constants. */ - hsa_op_immed *m_cst_value; - - /* Is in global scope. */ - bool m_global_scope_p; - - /* True if an error has been seen for the symbol. */ - bool m_seen_error; - - /* Symbol allocation. */ - BrigAllocation m_allocation; - - /* Flag used for global variables if a variable is already emitted or not. */ - bool m_emitted_to_brig; - - /* Alignment of the symbol. */ - BrigAlignment8_t m_align; - -private: - /* Default constructor. */ - hsa_symbol (); -}; - -/* Abstract class for HSA instruction operands. */ - -class hsa_op_base -{ -public: - /* Next operand scheduled to be written when writing BRIG operand - section. */ - hsa_op_base *m_next; - - /* Offset to which the associated operand structure will be written. Zero if - yet not scheduled for writing. */ - unsigned m_brig_op_offset; - - /* The type of a particular operand. */ - BrigKind16_t m_kind; - -protected: - hsa_op_base (BrigKind16_t k); -private: - /* Make the default constructor inaccessible. */ - hsa_op_base () {} -}; - -/* Common abstract ancestor for operands which have a type. */ - -class hsa_op_with_type : public hsa_op_base -{ -public: - /* The type. */ - BrigType16_t m_type; - - /* Convert an operand to a destination type DTYPE and attach insns - to HBB if needed. */ - hsa_op_with_type *get_in_type (BrigType16_t dtype, hsa_bb *hbb); - /* If this operand has integer type smaller than 32 bits, extend it to 32 - bits, adding instructions to HBB if needed. */ - hsa_op_with_type *extend_int_to_32bit (hsa_bb *hbb); - -protected: - hsa_op_with_type (BrigKind16_t k, BrigType16_t t); -private: - /* Make the default constructor inaccessible. */ - hsa_op_with_type () : hsa_op_base (BRIG_KIND_NONE) {} -}; - -/* An immediate HSA operand. */ - -class hsa_op_immed : public hsa_op_with_type -{ -public: - hsa_op_immed (tree tree_val, bool min32int = true); - hsa_op_immed (HOST_WIDE_INT int_value, BrigType16_t type); - void *operator new (size_t); - ~hsa_op_immed (); - void set_type (BrigKind16_t t); - - /* Function returns pointer to a buffer that contains binary representation - of the immeadiate value. The buffer has length of BRIG_SIZE and - a caller is responsible for deallocation of the buffer. */ - char *emit_to_buffer (unsigned *brig_size); - - /* Value as represented by middle end. */ - tree m_tree_value; - - /* Integer value representation. */ - HOST_WIDE_INT m_int_value; - -private: - /* Make the default constructor inaccessible. */ - hsa_op_immed (); - /* All objects are deallocated by destroying their pool, so make delete - inaccessible too. */ - void operator delete (void *) {} -}; - -/* Report whether or not P is an immediate operand. */ - -template <> -template <> -inline bool -is_a_helper <hsa_op_immed *>::test (hsa_op_base *p) -{ - return p->m_kind == BRIG_KIND_OPERAND_CONSTANT_BYTES; -} - -/* Likewise, but for a more specified base. */ - -template <> -template <> -inline bool -is_a_helper <hsa_op_immed *>::test (hsa_op_with_type *p) -{ - return p->m_kind == BRIG_KIND_OPERAND_CONSTANT_BYTES; -} - - -/* HSA register operand. */ - -class hsa_op_reg : public hsa_op_with_type -{ - friend class hsa_insn_basic; - friend class hsa_insn_phi; -public: - hsa_op_reg (BrigType16_t t); - void *operator new (size_t); - - /* Verify register operand. */ - void verify_ssa (); - - /* If NON-NULL, gimple SSA that we come from. NULL if none. */ - tree m_gimple_ssa; - - /* Defining instruction while still in the SSA. */ - hsa_insn_basic *m_def_insn; - - /* If the register allocator decides to spill the register, this is the - appropriate spill symbol. */ - hsa_symbol *m_spill_sym; - - /* Number of this register structure in the order in which they were - allocated. */ - int m_order; - int m_lr_begin, m_lr_end; - - /* Zero if the register is not yet allocated. After, allocation, this must - be 'c', 's', 'd' or 'q'. */ - char m_reg_class; - /* If allocated, the number of the HW register (within its HSA register - class). */ - char m_hard_num; - -private: - /* Make the default constructor inaccessible. */ - hsa_op_reg () : hsa_op_with_type (BRIG_KIND_NONE, BRIG_TYPE_NONE) {} - /* All objects are deallocated by destroying their pool, so make delete - inaccessible too. */ - void operator delete (void *) {} - /* Set definition where the register is defined. */ - void set_definition (hsa_insn_basic *insn); - /* Uses of the value while still in SSA. */ - auto_vec <hsa_insn_basic *> m_uses; -}; - -/* Report whether or not P is a register operand. */ - -template <> -template <> -inline bool -is_a_helper <hsa_op_reg *>::test (hsa_op_base *p) -{ - return p->m_kind == BRIG_KIND_OPERAND_REGISTER; -} - -/* Report whether or not P is a register operand. */ - -template <> -template <> -inline bool -is_a_helper <hsa_op_reg *>::test (hsa_op_with_type *p) -{ - return p->m_kind == BRIG_KIND_OPERAND_REGISTER; -} - -/* An address HSA operand. */ - -class hsa_op_address : public hsa_op_base -{ -public: - /* set up a new address operand consisting of base symbol SYM, register R and - immediate OFFSET. If the machine model is not large and offset is 64 bit, - the upper, 32 bits have to be zero. */ - hsa_op_address (hsa_symbol *sym, hsa_op_reg *reg, - HOST_WIDE_INT offset = 0); - - void *operator new (size_t); - - /* Set up a new address operand consisting of base symbol SYM and - immediate OFFSET. If the machine model is not large and offset is 64 bit, - the upper, 32 bits have to be zero. */ - hsa_op_address (hsa_symbol *sym, HOST_WIDE_INT offset = 0); - - /* Set up a new address operand consisting of register R and - immediate OFFSET. If the machine model is not large and offset is 64 bit, - the upper, 32 bits have to be zero. */ - hsa_op_address (hsa_op_reg *reg, HOST_WIDE_INT offset = 0); - - /* Symbol base of the address. Can be NULL if there is none. */ - hsa_symbol *m_symbol; - - /* Register offset. Can be NULL if there is none. */ - hsa_op_reg *m_reg; - - /* Immediate byte offset. */ - HOST_WIDE_INT m_imm_offset; - -private: - /* Make the default constructor inaccessible. */ - hsa_op_address () : hsa_op_base (BRIG_KIND_NONE) {} - /* All objects are deallocated by destroying their pool, so make delete - inaccessible too. */ - void operator delete (void *) {} -}; - -/* Report whether or not P is an address operand. */ - -template <> -template <> -inline bool -is_a_helper <hsa_op_address *>::test (hsa_op_base *p) -{ - return p->m_kind == BRIG_KIND_OPERAND_ADDRESS; -} - -/* A reference to code HSA operand. It can be either reference - to a start of a BB or a start of a function. */ - -class hsa_op_code_ref : public hsa_op_base -{ -public: - hsa_op_code_ref (); - - /* Offset in the code section that this refers to. */ - unsigned m_directive_offset; -}; - -/* Report whether or not P is a code reference operand. */ - -template <> -template <> -inline bool -is_a_helper <hsa_op_code_ref *>::test (hsa_op_base *p) -{ - return p->m_kind == BRIG_KIND_OPERAND_CODE_REF; -} - -/* Code list HSA operand. */ - -class hsa_op_code_list: public hsa_op_base -{ -public: - hsa_op_code_list (unsigned elements); - void *operator new (size_t); - - /* Offset to variable-sized array in hsa_data section, where - are offsets to entries in the hsa_code section. */ - auto_vec<unsigned> m_offsets; -private: - /* Make the default constructor inaccessible. */ - hsa_op_code_list () : hsa_op_base (BRIG_KIND_NONE) {} - /* All objects are deallocated by destroying their pool, so make delete - inaccessible too. */ - void operator delete (void *) {} -}; - -/* Report whether or not P is a code list operand. */ - -template <> -template <> -inline bool -is_a_helper <hsa_op_code_list *>::test (hsa_op_base *p) -{ - return p->m_kind == BRIG_KIND_OPERAND_CODE_LIST; -} - -/* Operand list HSA operand. */ - -class hsa_op_operand_list: public hsa_op_base -{ -public: - hsa_op_operand_list (unsigned elements); - ~hsa_op_operand_list (); - void *operator new (size_t); - - /* Offset to variable-sized array in hsa_data section, where - are offsets to entries in the hsa_code section. */ - auto_vec<unsigned> m_offsets; -private: - /* Make the default constructor inaccessible. */ - hsa_op_operand_list () : hsa_op_base (BRIG_KIND_NONE) {} - /* All objects are deallocated by destroying their pool, so make delete - inaccessible too. */ - void operator delete (void *) {} -}; - -/* Report whether or not P is a code list operand. */ - -template <> -template <> -inline bool -is_a_helper <hsa_op_operand_list *>::test (hsa_op_base *p) -{ - return p->m_kind == BRIG_KIND_OPERAND_OPERAND_LIST; -} - -/* Opcodes of instructions that are not part of HSA but that we use to - represent it nevertheless. */ - -#define HSA_OPCODE_PHI (-1) -#define HSA_OPCODE_ARG_BLOCK (-2) - -/* The number of operand pointers we can directly in an instruction. */ -#define HSA_BRIG_INT_STORAGE_OPERANDS 5 - -/* Class representing an HSA instruction. Unlike typical ancestors for - specialized classes, this one is also directly used for all instructions - that are then represented as BrigInstBasic. */ - -class hsa_insn_basic -{ -public: - hsa_insn_basic (unsigned nops, int opc); - hsa_insn_basic (unsigned nops, int opc, BrigType16_t t, - hsa_op_base *arg0 = NULL, - hsa_op_base *arg1 = NULL, - hsa_op_base *arg2 = NULL, - hsa_op_base *arg3 = NULL); - - void *operator new (size_t); - void set_op (int index, hsa_op_base *op); - hsa_op_base *get_op (int index); - hsa_op_base **get_op_addr (int index); - unsigned int operand_count (); - void verify (); - unsigned input_count (); - unsigned num_used_ops (); - void set_output_in_type (hsa_op_reg *dest, unsigned op_index, hsa_bb *hbb); - bool op_output_p (unsigned opnum); - - /* The previous and next instruction in the basic block. */ - hsa_insn_basic *m_prev, *m_next; - - /* Basic block this instruction belongs to. */ - basic_block m_bb; - - /* Operand code distinguishing different types of instructions. Eventually - these should only be BRIG_INST_* values from the BrigOpcode16_t range but - initially we use negative values for PHI nodes and such. */ - int m_opcode; - - /* Linearized number assigned to the instruction by HSA RA. */ - int m_number; - - /* Type of the destination of the operations. */ - BrigType16_t m_type; - - /* BRIG offset of the instruction in code section. */ - unsigned int m_brig_offset; - -private: - /* Make the default constructor inaccessible. */ - hsa_insn_basic () {} - /* All objects are deallocated by destroying their pool, so make delete - inaccessible too. */ - void operator delete (void *) {} - /* The individual operands. All instructions but PHI nodes have five or - fewer instructions and so will fit the internal storage. */ - /* TODO: Vast majority of instructions have three or fewer operands, so we - may actually try reducing it. */ - auto_vec<hsa_op_base *, HSA_BRIG_INT_STORAGE_OPERANDS> m_operands; -}; - -/* Class representing a PHI node of the SSA form of HSA virtual - registers. */ - -class hsa_insn_phi : public hsa_insn_basic -{ -public: - hsa_insn_phi (unsigned nops, hsa_op_reg *dst); - - /* Destination. */ - hsa_op_reg *m_dest; - -private: - /* Make the default constructor inaccessible. */ - hsa_insn_phi () : hsa_insn_basic (1, HSA_OPCODE_PHI) {} -}; - -/* Report whether or not P is a PHI node. */ - -template <> -template <> -inline bool -is_a_helper <hsa_insn_phi *>::test (hsa_insn_basic *p) -{ - return p->m_opcode == HSA_OPCODE_PHI; -} - -/* HSA instruction for */ -class hsa_insn_br : public hsa_insn_basic -{ -public: - hsa_insn_br (unsigned nops, int opc, BrigType16_t t, BrigWidth8_t width, - hsa_op_base *arg0 = NULL, hsa_op_base *arg1 = NULL, - hsa_op_base *arg2 = NULL, hsa_op_base *arg3 = NULL); - - /* Number of work-items affected in the same way by the instruction. */ - BrigWidth8_t m_width; - -private: - /* Make the default constructor inaccessible. */ - hsa_insn_br () : hsa_insn_basic (0, BRIG_OPCODE_BR) {} -}; - -/* Return true if P is a branching/synchronization instruction. */ - -template <> -template <> -inline bool -is_a_helper <hsa_insn_br *>::test (hsa_insn_basic *p) -{ - return p->m_opcode == BRIG_OPCODE_BARRIER - || p->m_opcode == BRIG_OPCODE_BR; -} - -/* HSA instruction for conditional branches. Structurally the same as - hsa_insn_br but we represent it specially because of inherent control - flow it represents. */ - -class hsa_insn_cbr : public hsa_insn_br -{ -public: - hsa_insn_cbr (hsa_op_reg *ctrl); - -private: - /* Make the default constructor inaccessible. */ - hsa_insn_cbr () : hsa_insn_br (0, BRIG_OPCODE_CBR, BRIG_TYPE_B1, - BRIG_WIDTH_1) {} -}; - -/* Report whether P is a contitional branching instruction. */ - -template <> -template <> -inline bool -is_a_helper <hsa_insn_cbr *>::test (hsa_insn_basic *p) -{ - return p->m_opcode == BRIG_OPCODE_CBR; -} - -/* HSA instruction for switch branches. */ - -class hsa_insn_sbr : public hsa_insn_basic -{ -public: - hsa_insn_sbr (hsa_op_reg *index, unsigned jump_count); - - /* Default destructor. */ - ~hsa_insn_sbr (); - - void replace_all_labels (basic_block old_bb, basic_block new_bb); - - /* Width as described in HSA documentation. */ - BrigWidth8_t m_width; - - /* Jump table. */ - vec <basic_block> m_jump_table; - - /* Code list for label references. */ - hsa_op_code_list *m_label_code_list; - -private: - /* Make the default constructor inaccessible. */ - hsa_insn_sbr () : hsa_insn_basic (1, BRIG_OPCODE_SBR) {} -}; - -/* Report whether P is a switch branching instruction. */ - -template <> -template <> -inline bool -is_a_helper <hsa_insn_sbr *>::test (hsa_insn_basic *p) -{ - return p->m_opcode == BRIG_OPCODE_SBR; -} - -/* HSA instruction for comparisons. */ - -class hsa_insn_cmp : public hsa_insn_basic -{ -public: - hsa_insn_cmp (BrigCompareOperation8_t cmp, BrigType16_t t, - hsa_op_base *arg0 = NULL, hsa_op_base *arg1 = NULL, - hsa_op_base *arg2 = NULL); - - /* Source type should be derived from operand types. */ - - /* The comparison operation. */ - BrigCompareOperation8_t m_compare; - - /* TODO: Modifiers and packing control are missing but so are everywhere - else. */ -private: - /* Make the default constructor inaccessible. */ - hsa_insn_cmp () : hsa_insn_basic (1, BRIG_OPCODE_CMP) {} -}; - -/* Report whether or not P is a comparison instruction. */ - -template <> -template <> -inline bool -is_a_helper <hsa_insn_cmp *>::test (hsa_insn_basic *p) -{ - return p->m_opcode == BRIG_OPCODE_CMP; -} - -/* HSA instruction for memory operations. */ - -class hsa_insn_mem : public hsa_insn_basic -{ -public: - hsa_insn_mem (int opc, BrigType16_t t, hsa_op_base *arg0, hsa_op_base *arg1); - - /* Set alignment to VALUE. */ - - void set_align (BrigAlignment8_t value); - - /* The segment is of the memory access is either the segment of the symbol in - the address operand or flat address is there is no symbol there. */ - - /* Required alignment of the memory operation. */ - BrigAlignment8_t m_align; - - /* HSA equiv class, basically an alias set number. */ - uint8_t m_equiv_class; - - /* TODO: Add width modifier, perhaps also other things. */ -protected: - hsa_insn_mem (unsigned nops, int opc, BrigType16_t t, - hsa_op_base *arg0 = NULL, hsa_op_base *arg1 = NULL, - hsa_op_base *arg2 = NULL, hsa_op_base *arg3 = NULL); - -private: - /* Make the default constructor inaccessible. */ - hsa_insn_mem () : hsa_insn_basic (1, BRIG_OPCODE_LD) {} -}; - -/* Report whether or not P is a memory instruction. */ - -template <> -template <> -inline bool -is_a_helper <hsa_insn_mem *>::test (hsa_insn_basic *p) -{ - return (p->m_opcode == BRIG_OPCODE_LD - || p->m_opcode == BRIG_OPCODE_ST); -} - -/* HSA instruction for atomic operations. */ - -class hsa_insn_atomic : public hsa_insn_mem -{ -public: - hsa_insn_atomic (int nops, int opc, enum BrigAtomicOperation aop, - BrigType16_t t, BrigMemoryOrder memorder, - hsa_op_base *arg0 = NULL, hsa_op_base *arg1 = NULL, - hsa_op_base *arg2 = NULL, hsa_op_base *arg3 = NULL); - - /* The operation itself. */ - enum BrigAtomicOperation m_atomicop; - - /* Things like acquire/release/aligned. */ - enum BrigMemoryOrder m_memoryorder; - - /* Scope of the atomic operation. */ - enum BrigMemoryScope m_memoryscope; - -private: - /* Make the default constructor inaccessible. */ - hsa_insn_atomic () : hsa_insn_mem (1, BRIG_KIND_NONE, BRIG_TYPE_NONE) {} -}; - -/* Report whether or not P is an atomic instruction. */ - -template <> -template <> -inline bool -is_a_helper <hsa_insn_atomic *>::test (hsa_insn_basic *p) -{ - return (p->m_opcode == BRIG_OPCODE_ATOMIC - || p->m_opcode == BRIG_OPCODE_ATOMICNORET); -} - -/* HSA instruction for signal operations. */ - -class hsa_insn_signal : public hsa_insn_basic -{ -public: - hsa_insn_signal (int nops, int opc, enum BrigAtomicOperation sop, - BrigType16_t t, BrigMemoryOrder memorder, - hsa_op_base *arg0 = NULL, hsa_op_base *arg1 = NULL, - hsa_op_base *arg2 = NULL, hsa_op_base *arg3 = NULL); - - /* Things like acquire/release/aligned. */ - enum BrigMemoryOrder m_memory_order; - - /* The operation itself. */ - enum BrigAtomicOperation m_signalop; -}; - -/* Report whether or not P is a signal instruction. */ - -template <> -template <> -inline bool -is_a_helper <hsa_insn_signal *>::test (hsa_insn_basic *p) -{ - return (p->m_opcode == BRIG_OPCODE_SIGNAL - || p->m_opcode == BRIG_OPCODE_SIGNALNORET); -} - -/* HSA instruction to convert between flat addressing and segments. */ - -class hsa_insn_seg : public hsa_insn_basic -{ -public: - hsa_insn_seg (int opc, BrigType16_t destt, BrigType16_t srct, - BrigSegment8_t seg, hsa_op_base *arg0, hsa_op_base *arg1); - - /* Source type. Depends on the source addressing/segment. */ - BrigType16_t m_src_type; - /* The segment we are converting from or to. */ - BrigSegment8_t m_segment; -private: - /* Make the default constructor inaccessible. */ - hsa_insn_seg () : hsa_insn_basic (1, BRIG_OPCODE_STOF) {} -}; - -/* Report whether or not P is a segment conversion instruction. */ - -template <> -template <> -inline bool -is_a_helper <hsa_insn_seg *>::test (hsa_insn_basic *p) -{ - return (p->m_opcode == BRIG_OPCODE_STOF - || p->m_opcode == BRIG_OPCODE_FTOS); -} - -/* Class for internal functions for purpose of HSA emission. */ - -class hsa_internal_fn -{ -public: - hsa_internal_fn (enum internal_fn fn, unsigned type_bit_size): - m_fn (fn), m_type_bit_size (type_bit_size), m_offset (0) {} - - hsa_internal_fn (const hsa_internal_fn *f): - m_fn (f->m_fn), m_type_bit_size (f->m_type_bit_size), - m_offset (f->m_offset) {} - - /* Return arity of the internal function. */ - unsigned get_arity (); - - /* Return BRIG type of N-th argument, if -1 is passed, return value type - is received. */ - BrigType16_t get_argument_type (int n); - - /* Return function name. The memory must be released by a caller. */ - char *name (); - - /* Internal function. */ - enum internal_fn m_fn; - - /* Bit width of return type. */ - unsigned m_type_bit_size; - - /* BRIG offset of declaration of the function. */ - BrigCodeOffset32_t m_offset; -}; - -/* HSA instruction for function call. */ - -class hsa_insn_call : public hsa_insn_basic -{ -public: - hsa_insn_call (tree callee); - hsa_insn_call (hsa_internal_fn *fn); - - /* Default destructor. */ - ~hsa_insn_call (); - - /* Called function. */ - tree m_called_function; - - /* Called internal function. */ - hsa_internal_fn *m_called_internal_fn; - - /* Input formal arguments. */ - auto_vec <hsa_symbol *> m_input_args; - - /* Input arguments store instructions. */ - auto_vec <hsa_insn_mem *> m_input_arg_insns; - - /* Output argument, can be NULL for void functions. */ - hsa_symbol *m_output_arg; - - /* Called function code reference. */ - hsa_op_code_ref m_func; - - /* Code list for arguments of the function. */ - hsa_op_code_list *m_args_code_list; - - /* Code list for result of the function. */ - hsa_op_code_list *m_result_code_list; -private: - /* Make the default constructor inaccessible. */ - hsa_insn_call () : hsa_insn_basic (0, BRIG_OPCODE_CALL) {} -}; - -/* Report whether or not P is a call instruction. */ - -template <> -template <> -inline bool -is_a_helper <hsa_insn_call *>::test (hsa_insn_basic *p) -{ - return (p->m_opcode == BRIG_OPCODE_CALL); -} - -/* HSA call instruction block encapsulates definition of arguments, - result type, corresponding loads and a possible store. - Moreover, it contains a single call instruction. - Emission of the instruction will produce multiple - HSAIL instructions. */ - -class hsa_insn_arg_block : public hsa_insn_basic -{ -public: - hsa_insn_arg_block (BrigKind brig_kind, hsa_insn_call * call); - - /* Kind of argument block. */ - BrigKind m_kind; - - /* Call instruction. */ - hsa_insn_call *m_call_insn; -}; - -/* Report whether or not P is a call block instruction. */ - -template <> -template <> -inline bool -is_a_helper <hsa_insn_arg_block *>::test (hsa_insn_basic *p) -{ - return (p->m_opcode == HSA_OPCODE_ARG_BLOCK); -} - -/* HSA comment instruction. */ - -class hsa_insn_comment: public hsa_insn_basic -{ -public: - /* Constructor of class representing the comment in HSAIL. */ - hsa_insn_comment (const char *s); - - /* Default destructor. */ - ~hsa_insn_comment (); - - char *m_comment; -}; - -/* Report whether or not P is a call block instruction. */ - -template <> -template <> -inline bool -is_a_helper <hsa_insn_comment *>::test (hsa_insn_basic *p) -{ - return (p->m_opcode == BRIG_KIND_DIRECTIVE_COMMENT); -} - -/* HSA queue instruction. */ - -class hsa_insn_queue: public hsa_insn_basic -{ -public: - hsa_insn_queue (int nops, int opcode, BrigSegment segment, - BrigMemoryOrder memory_order, - hsa_op_base *arg0 = NULL, hsa_op_base *arg1 = NULL, - hsa_op_base *arg2 = NULL, hsa_op_base *arg3 = NULL); - - /* Destructor. */ - ~hsa_insn_queue (); - - /* Segment used to refer to the queue. Must be global or flat. */ - BrigSegment m_segment; - /* Memory order used to specify synchronization. */ - BrigMemoryOrder m_memory_order; -}; - -/* Report whether or not P is a queue instruction. */ - -template <> -template <> -inline bool -is_a_helper <hsa_insn_queue *>::test (hsa_insn_basic *p) -{ - return (p->m_opcode == BRIG_OPCODE_ADDQUEUEWRITEINDEX - || p->m_opcode == BRIG_OPCODE_CASQUEUEWRITEINDEX - || p->m_opcode == BRIG_OPCODE_LDQUEUEREADINDEX - || p->m_opcode == BRIG_OPCODE_LDQUEUEWRITEINDEX - || p->m_opcode == BRIG_OPCODE_STQUEUEREADINDEX - || p->m_opcode == BRIG_OPCODE_STQUEUEWRITEINDEX); -} - -/* HSA source type instruction. */ - -class hsa_insn_srctype: public hsa_insn_basic -{ -public: - hsa_insn_srctype (int nops, BrigOpcode opcode, BrigType16_t destt, - BrigType16_t srct, hsa_op_base *arg0, hsa_op_base *arg1, - hsa_op_base *arg2); - - /* Source type. */ - BrigType16_t m_source_type; - - /* Destructor. */ - ~hsa_insn_srctype (); -}; - -/* Report whether or not P is a source type instruction. */ - -template <> -template <> -inline bool -is_a_helper <hsa_insn_srctype *>::test (hsa_insn_basic *p) -{ - return (p->m_opcode == BRIG_OPCODE_POPCOUNT - || p->m_opcode == BRIG_OPCODE_FIRSTBIT - || p->m_opcode == BRIG_OPCODE_LASTBIT); -} - -/* HSA packed instruction. */ - -class hsa_insn_packed : public hsa_insn_srctype -{ -public: - hsa_insn_packed (int nops, BrigOpcode opcode, BrigType16_t destt, - BrigType16_t srct, hsa_op_base *arg0, hsa_op_base *arg1, - hsa_op_base *arg2); - - /* Operand list for an operand of the instruction. */ - hsa_op_operand_list *m_operand_list; - - /* Destructor. */ - ~hsa_insn_packed (); -}; - -/* Report whether or not P is a combine instruction. */ - -template <> -template <> -inline bool -is_a_helper <hsa_insn_packed *>::test (hsa_insn_basic *p) -{ - return (p->m_opcode == BRIG_OPCODE_COMBINE - || p->m_opcode == BRIG_OPCODE_EXPAND); -} - -/* HSA convert instruction. */ - -class hsa_insn_cvt: public hsa_insn_basic -{ -public: - hsa_insn_cvt (hsa_op_with_type *dest, hsa_op_with_type *src); -}; - -/* Report whether or not P is a convert instruction. */ - -template <> -template <> -inline bool -is_a_helper <hsa_insn_cvt *>::test (hsa_insn_basic *p) -{ - return (p->m_opcode == BRIG_OPCODE_CVT); -} - -/* HSA alloca instruction. */ - -class hsa_insn_alloca: public hsa_insn_basic -{ -public: - hsa_insn_alloca (hsa_op_with_type *dest, hsa_op_with_type *size, - unsigned alignment = 0); - - /* Required alignment of the allocation. */ - BrigAlignment8_t m_align; -}; - -/* Report whether or not P is an alloca instruction. */ - -template <> -template <> -inline bool -is_a_helper <hsa_insn_alloca *>::test (hsa_insn_basic *p) -{ - return (p->m_opcode == BRIG_OPCODE_ALLOCA); -} - -/* Basic block of HSA instructions. */ - -class hsa_bb -{ -public: - hsa_bb (basic_block cfg_bb); - hsa_bb (basic_block cfg_bb, int idx); - - /* Append an instruction INSN into the basic block. */ - void append_insn (hsa_insn_basic *insn); - - /* Add a PHI instruction. */ - void append_phi (hsa_insn_phi *phi); - - /* The real CFG BB that this HBB belongs to. */ - basic_block m_bb; - - /* The operand that refers to the label to this BB. */ - hsa_op_code_ref m_label_ref; - - /* The first and last instruction. */ - hsa_insn_basic *m_first_insn, *m_last_insn; - /* The first and last phi node. */ - hsa_insn_phi *m_first_phi, *m_last_phi; - - /* Just a number to construct names from. */ - int m_index; - - auto_bitmap m_liveout, m_livein; -private: - /* Make the default constructor inaccessible. */ - hsa_bb (); - /* All objects are deallocated by destroying their pool, so make delete - inaccessible too. */ - void operator delete (void *) {} -}; - -/* Return the corresponding HSA basic block structure for the given control - flow basic_block BB. */ - -static inline hsa_bb * -hsa_bb_for_bb (basic_block bb) -{ - return (class hsa_bb *) bb->aux; -} - -/* Class for hashing local hsa_symbols. */ - -struct hsa_noop_symbol_hasher : nofree_ptr_hash <hsa_symbol> -{ - static inline hashval_t hash (const value_type); - static inline bool equal (const value_type, const compare_type); -}; - -/* Hash hsa_symbol. */ - -inline hashval_t -hsa_noop_symbol_hasher::hash (const value_type item) -{ - return DECL_UID (item->m_decl); -} - -/* Return true if the DECL_UIDs of decls both symbols refer to are equal. */ - -inline bool -hsa_noop_symbol_hasher::equal (const value_type a, const compare_type b) -{ - return (DECL_UID (a->m_decl) == DECL_UID (b->m_decl)); -} - -/* Structure that encapsulates intermediate representation of a HSA - function. */ - -class hsa_function_representation -{ -public: - hsa_function_representation (tree fdecl, bool kernel_p, - unsigned ssa_names_count, - bool modified_cfg = false); - hsa_function_representation (hsa_internal_fn *fn); - ~hsa_function_representation (); - - /* Builds a shadow register that is utilized to a kernel dispatch. */ - hsa_op_reg *get_shadow_reg (); - - /* Return true if we are in a function that has kernel dispatch - shadow register. */ - bool has_shadow_reg_p (); - - /* The entry/exit blocks don't contain incoming code, - but the HSA generator might use them to put code into, - so we need hsa_bb instances of them. */ - void init_extra_bbs (); - - /* Update CFG dominators if m_modified_cfg flag is set. */ - void update_dominance (); - - /* Return linkage of the representation. */ - BrigLinkage8_t get_linkage (); - - /* Create a private symbol of requested TYPE. */ - hsa_symbol *create_hsa_temporary (BrigType16_t type); - - /* Lookup or create a HSA pseudo register for a given gimple SSA name. */ - hsa_op_reg *reg_for_gimple_ssa (tree ssa); - - /* Name of the function. */ - char *m_name; - - /* Number of allocated register structures. */ - int m_reg_count; - - /* Input arguments. */ - vec <hsa_symbol *> m_input_args; - - /* Output argument or NULL if there is none. */ - hsa_symbol *m_output_arg; - - /* Hash table of local variable symbols. */ - hash_table <hsa_noop_symbol_hasher> *m_local_symbols; - - /* Hash map for string constants. */ - hash_map <tree, hsa_symbol *> m_string_constants_map; - - /* Vector of pointers to spill symbols. */ - vec <class hsa_symbol *> m_spill_symbols; - - /* Vector of pointers to global variables and transformed string constants - that are used by the function. */ - vec <class hsa_symbol *> m_global_symbols; - - /* Private function artificial variables. */ - vec <class hsa_symbol *> m_private_variables; - - /* Vector of called function declarations. */ - vec <tree> m_called_functions; - - /* Vector of used internal functions. */ - vec <hsa_internal_fn *> m_called_internal_fns; - - /* Number of HBB BBs. */ - int m_hbb_count; - - /* Whether or not we could check and enforce SSA properties. */ - bool m_in_ssa; - - /* True if the function is kernel function. */ - bool m_kern_p; - - /* True if the function representation is a declaration. */ - bool m_declaration_p; - - /* Function declaration tree. */ - tree m_decl; - - /* Internal function info is used for declarations of internal functions. */ - hsa_internal_fn *m_internal_fn; - - /* Runtime shadow register. */ - hsa_op_reg *m_shadow_reg; - - /* Number of kernel dispatched which take place in the function. */ - unsigned m_kernel_dispatch_count; - - /* If the function representation contains a kernel dispatch, - OMP data size is necessary memory that is used for copying before - a kernel dispatch. */ - unsigned m_maximum_omp_data_size; - - /* Return true if there's an HSA-specific warning already seen. */ - bool m_seen_error; - - /* Counter for temporary symbols created in the function representation. */ - unsigned m_temp_symbol_count; - - /* SSA names mapping. */ - vec <hsa_op_reg *> m_ssa_map; - - /* Flag whether a function needs update of dominators before RA. */ - bool m_modified_cfg; -}; - -enum hsa_function_kind -{ - HSA_INVALID, - HSA_KERNEL, - HSA_FUNCTION -}; - -class hsa_function_summary -{ -public: - /* Default constructor. */ - hsa_function_summary (); - - /* Kind of GPU/host function. */ - hsa_function_kind m_kind; - - /* Pointer to a cgraph node which is a HSA implementation of the function. - In case of the function is a HSA function, the bound function points - to the host function. */ - cgraph_node *m_bound_function; - - /* Identifies if the function is an HSA function or a host function. */ - bool m_gpu_implementation_p; - - /* True if the function is a gridified kernel. */ - bool m_gridified_kernel_p; -}; - -inline -hsa_function_summary::hsa_function_summary (): m_kind (HSA_INVALID), - m_bound_function (NULL), m_gpu_implementation_p (false) -{ -} - -/* Function summary for HSA functions. */ -class hsa_summary_t: public function_summary <hsa_function_summary *> -{ -public: - hsa_summary_t (symbol_table *table): - function_summary<hsa_function_summary *> (table) - { - disable_insertion_hook (); - } - - /* Couple GPU and HOST as gpu-specific and host-specific implementation of - the same function. KIND determines whether GPU is a host-invokable kernel - or gpu-callable function and GRIDIFIED_KERNEL_P is set if the function was - gridified in OMP. */ - - void link_functions (cgraph_node *gpu, cgraph_node *host, - hsa_function_kind kind, bool gridified_kernel_p); - -private: - void process_gpu_implementation_attributes (tree gdecl); -}; - -/* OMP simple builtin describes behavior that should be done for - the routine. */ -class omp_simple_builtin -{ -public: - omp_simple_builtin (const char *name, const char *warning_message, - bool sorry, hsa_op_immed *return_value = NULL): - m_name (name), m_warning_message (warning_message), m_sorry (sorry), - m_return_value (return_value) - {} - - /* Generate HSAIL instructions for the builtin or produce warning message. */ - void generate (gimple *stmt, hsa_bb *hbb); - - /* Name of function. */ - const char *m_name; - - /* Warning message. */ - const char *m_warning_message; - - /* Flag if we should sorry after the warning message is printed. */ - bool m_sorry; - - /* Return value of the function. */ - hsa_op_immed *m_return_value; - - /* Emission function. */ - void (*m_emit_func) (gimple *stmt, hsa_bb *); -}; - -/* Class for hashing hsa_internal_fn. */ - -struct hsa_internal_fn_hasher: free_ptr_hash <hsa_internal_fn> -{ - static inline hashval_t hash (const value_type); - static inline bool equal (const value_type, const compare_type); -}; - -/* Hash hsa_symbol. */ - -inline hashval_t -hsa_internal_fn_hasher::hash (const value_type item) -{ - return item->m_fn; -} - -/* Return true if the DECL_UIDs of decls both symbols refer to are equal. */ - -inline bool -hsa_internal_fn_hasher::equal (const value_type a, const compare_type b) -{ - return a->m_fn == b->m_fn && a->m_type_bit_size == b->m_type_bit_size; -} - -/* in hsa-common.c */ -extern class hsa_function_representation *hsa_cfun; -extern hash_map <tree, vec <const char *> *> *hsa_decl_kernel_dependencies; -extern hsa_summary_t *hsa_summaries; -extern hsa_symbol *hsa_num_threads; -extern unsigned hsa_kernel_calls_counter; -extern hash_set <tree> *hsa_failed_functions; -extern hash_table <hsa_noop_symbol_hasher> *hsa_global_variable_symbols; - -bool hsa_callable_function_p (tree fndecl); -void hsa_init_compilation_unit_data (void); -void hsa_deinit_compilation_unit_data (void); -bool hsa_machine_large_p (void); -bool hsa_full_profile_p (void); -bool hsa_opcode_floating_bit_insn_p (BrigOpcode16_t); -unsigned hsa_type_bit_size (BrigType16_t t); -BrigType16_t hsa_bittype_for_bitsize (unsigned bitsize); -BrigType16_t hsa_uint_for_bitsize (unsigned bitsize); -BrigType16_t hsa_float_for_bitsize (unsigned bitsize); -BrigType16_t hsa_bittype_for_type (BrigType16_t t); -BrigType16_t hsa_unsigned_type_for_type (BrigType16_t t); -bool hsa_type_packed_p (BrigType16_t type); -bool hsa_type_float_p (BrigType16_t type); -bool hsa_type_integer_p (BrigType16_t type); -bool hsa_btype_p (BrigType16_t type); -BrigAlignment8_t hsa_alignment_encoding (unsigned n); -BrigAlignment8_t hsa_natural_alignment (BrigType16_t type); -BrigAlignment8_t hsa_object_alignment (tree t); -unsigned hsa_byte_alignment (BrigAlignment8_t alignment); -void hsa_destroy_operand (hsa_op_base *op); -void hsa_destroy_insn (hsa_insn_basic *insn); -void hsa_add_kern_decl_mapping (tree decl, char *name, unsigned, bool); -unsigned hsa_get_number_decl_kernel_mappings (void); -tree hsa_get_decl_kernel_mapping_decl (unsigned i); -char *hsa_get_decl_kernel_mapping_name (unsigned i); -unsigned hsa_get_decl_kernel_mapping_omp_size (unsigned i); -bool hsa_get_decl_kernel_mapping_gridified (unsigned i); -void hsa_free_decl_kernel_mapping (void); -tree *hsa_get_ctor_statements (void); -tree *hsa_get_dtor_statements (void); -tree *hsa_get_kernel_dispatch_type (void); -void hsa_add_kernel_dependency (tree caller, const char *called_function); -void hsa_sanitize_name (char *p); -char *hsa_brig_function_name (const char *p); -const char *hsa_get_declaration_name (tree decl); -void hsa_register_kernel (cgraph_node *host); -void hsa_register_kernel (cgraph_node *gpu, cgraph_node *host); -bool hsa_seen_error (void); -void hsa_fail_cfun (void); - -/* In hsa-gen.c. */ -void hsa_build_append_simple_mov (hsa_op_reg *, hsa_op_base *, hsa_bb *); -hsa_symbol *hsa_get_spill_symbol (BrigType16_t); -hsa_symbol *hsa_get_string_cst_symbol (BrigType16_t); -hsa_op_reg *hsa_spill_in (hsa_insn_basic *, hsa_op_reg *, hsa_op_reg **); -hsa_op_reg *hsa_spill_out (hsa_insn_basic *, hsa_op_reg *, hsa_op_reg **); -hsa_bb *hsa_init_new_bb (basic_block); -hsa_function_representation *hsa_generate_function_declaration (tree decl); -hsa_function_representation *hsa_generate_internal_fn_decl (hsa_internal_fn *); -tree hsa_get_host_function (tree decl); - -/* In hsa-regalloc.c. */ -void hsa_regalloc (void); - -/* In hsa-brig.c. */ -extern hash_table <hsa_internal_fn_hasher> *hsa_emitted_internal_decls; -void hsa_brig_emit_function (void); -void hsa_output_brig (void); -unsigned hsa_get_imm_brig_type_len (BrigType16_t type); -void hsa_brig_emit_omp_symbols (void); - -/* In hsa-dump.c. */ -const char *hsa_seg_name (BrigSegment8_t); -void dump_hsa_insn (FILE *f, hsa_insn_basic *insn); -void dump_hsa_bb (FILE *, hsa_bb *); -void dump_hsa_cfun (FILE *); -DEBUG_FUNCTION void debug_hsa_operand (hsa_op_base *opc); -DEBUG_FUNCTION void debug_hsa_insn (hsa_insn_basic *insn); - -union hsa_bytes -{ - uint8_t b8; - uint16_t b16; - uint32_t b32; - uint64_t b64; -}; - -/* Return true if a function DECL is an HSA implementation. */ - -static inline bool -hsa_gpu_implementation_p (tree decl) -{ - if (hsa_summaries == NULL) - return false; - - hsa_function_summary *s = hsa_summaries->get (cgraph_node::get_create (decl)); - return s != NULL && s->m_gpu_implementation_p; -} - -#endif /* HSA_H */ diff --git a/gcc/hsa-dump.c b/gcc/hsa-dump.c deleted file mode 100644 index 23aa4c9..0000000 --- a/gcc/hsa-dump.c +++ /dev/null @@ -1,1278 +0,0 @@ -/* Infrastructure to dump our HSAIL IL - Copyright (C) 2013-2020 Free Software Foundation, Inc. - Contributed by Martin Jambor <mjambor@suse.cz> and - Martin Liska <mliska@suse.cz>. - -This file is part of GCC. - -GCC is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 3, or (at your option) -any later version. - -GCC is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with GCC; see the file COPYING3. If not see -<http://www.gnu.org/licenses/>. */ - -#include "config.h" -#include "system.h" -#include "coretypes.h" -#include "tm.h" -#include "is-a.h" -#include "vec.h" -#include "tree.h" -#include "basic-block.h" -#include "function.h" -#include "cfg.h" -#include "dumpfile.h" -#include "gimple-pretty-print.h" -#include "cgraph.h" -#include "print-tree.h" -#include "alloc-pool.h" -#include "symbol-summary.h" -#include "hsa-common.h" - -/* Return textual name of TYPE. */ - -static const char * -hsa_type_name (BrigType16_t type) -{ - switch (type) - { - case BRIG_TYPE_NONE: - return "none"; - case BRIG_TYPE_U8: - return "u8"; - case BRIG_TYPE_U16: - return "u16"; - case BRIG_TYPE_U32: - return "u32"; - case BRIG_TYPE_U64: - return "u64"; - case BRIG_TYPE_S8: - return "s8"; - case BRIG_TYPE_S16: - return "s16"; - case BRIG_TYPE_S32: - return "s32"; - case BRIG_TYPE_S64: - return "s64"; - case BRIG_TYPE_F16: - return "f16"; - case BRIG_TYPE_F32: - return "f32"; - case BRIG_TYPE_F64: - return "f64"; - case BRIG_TYPE_B1: - return "b1"; - case BRIG_TYPE_B8: - return "b8"; - case BRIG_TYPE_B16: - return "b16"; - case BRIG_TYPE_B32: - return "b32"; - case BRIG_TYPE_B64: - return "b64"; - case BRIG_TYPE_B128: - return "b128"; - case BRIG_TYPE_SAMP: - return "samp"; - case BRIG_TYPE_ROIMG: - return "roimg"; - case BRIG_TYPE_WOIMG: - return "woimg"; - case BRIG_TYPE_RWIMG: - return "rwimg"; - case BRIG_TYPE_SIG32: - return "sig32"; - case BRIG_TYPE_SIG64: - return "sig64"; - case BRIG_TYPE_U8X4: - return "u8x4"; - case BRIG_TYPE_U8X8: - return "u8x8"; - case BRIG_TYPE_U8X16: - return "u8x16"; - case BRIG_TYPE_U16X2: - return "u16x2"; - case BRIG_TYPE_U16X4: - return "u16x4"; - case BRIG_TYPE_U16X8: - return "u16x8"; - case BRIG_TYPE_U32X2: - return "u32x2"; - case BRIG_TYPE_U32X4: - return "u32x4"; - case BRIG_TYPE_U64X2: - return "u64x2"; - case BRIG_TYPE_S8X4: - return "s8x4"; - case BRIG_TYPE_S8X8: - return "s8x8"; - case BRIG_TYPE_S8X16: - return "s8x16"; - case BRIG_TYPE_S16X2: - return "s16x2"; - case BRIG_TYPE_S16X4: - return "s16x4"; - case BRIG_TYPE_S16X8: - return "s16x8"; - case BRIG_TYPE_S32X2: - return "s32x2"; - case BRIG_TYPE_S32X4: - return "s32x4"; - case BRIG_TYPE_S64X2: - return "s64x2"; - case BRIG_TYPE_F16X2: - return "f16x2"; - case BRIG_TYPE_F16X4: - return "f16x4"; - case BRIG_TYPE_F16X8: - return "f16x8"; - case BRIG_TYPE_F32X2: - return "f32x2"; - case BRIG_TYPE_F32X4: - return "f32x4"; - case BRIG_TYPE_F64X2: - return "f64x2"; - default: - return "UNKNOWN_TYPE"; - } -} - -/* Return textual name of OPCODE. */ - -static const char * -hsa_opcode_name (BrigOpcode16_t opcode) -{ - switch (opcode) - { - case BRIG_OPCODE_NOP: - return "nop"; - case BRIG_OPCODE_ABS: - return "abs"; - case BRIG_OPCODE_ADD: - return "add"; - case BRIG_OPCODE_BORROW: - return "borrow"; - case BRIG_OPCODE_CARRY: - return "carry"; - case BRIG_OPCODE_CEIL: - return "ceil"; - case BRIG_OPCODE_COPYSIGN: - return "copysign"; - case BRIG_OPCODE_DIV: - return "div"; - case BRIG_OPCODE_FLOOR: - return "floor"; - case BRIG_OPCODE_FMA: - return "fma"; - case BRIG_OPCODE_FRACT: - return "fract"; - case BRIG_OPCODE_MAD: - return "mad"; - case BRIG_OPCODE_MAX: - return "max"; - case BRIG_OPCODE_MIN: - return "min"; - case BRIG_OPCODE_MUL: - return "mul"; - case BRIG_OPCODE_MULHI: - return "mulhi"; - case BRIG_OPCODE_NEG: - return "neg"; - case BRIG_OPCODE_REM: - return "rem"; - case BRIG_OPCODE_RINT: - return "rint"; - case BRIG_OPCODE_SQRT: - return "sqrt"; - case BRIG_OPCODE_SUB: - return "sub"; - case BRIG_OPCODE_TRUNC: - return "trunc"; - case BRIG_OPCODE_MAD24: - return "mad24"; - case BRIG_OPCODE_MAD24HI: - return "mad24hi"; - case BRIG_OPCODE_MUL24: - return "mul24"; - case BRIG_OPCODE_MUL24HI: - return "mul24hi"; - case BRIG_OPCODE_SHL: - return "shl"; - case BRIG_OPCODE_SHR: - return "shr"; - case BRIG_OPCODE_AND: - return "and"; - case BRIG_OPCODE_NOT: - return "not"; - case BRIG_OPCODE_OR: - return "or"; - case BRIG_OPCODE_POPCOUNT: - return "popcount"; - case BRIG_OPCODE_XOR: - return "xor"; - case BRIG_OPCODE_BITEXTRACT: - return "bitextract"; - case BRIG_OPCODE_BITINSERT: - return "bitinsert"; - case BRIG_OPCODE_BITMASK: - return "bitmask"; - case BRIG_OPCODE_BITREV: - return "bitrev"; - case BRIG_OPCODE_BITSELECT: - return "bitselect"; - case BRIG_OPCODE_FIRSTBIT: - return "firstbit"; - case BRIG_OPCODE_LASTBIT: - return "lastbit"; - case BRIG_OPCODE_COMBINE: - return "combine"; - case BRIG_OPCODE_EXPAND: - return "expand"; - case BRIG_OPCODE_LDA: - return "lda"; - case BRIG_OPCODE_MOV: - return "mov"; - case BRIG_OPCODE_SHUFFLE: - return "shuffle"; - case BRIG_OPCODE_UNPACKHI: - return "unpackhi"; - case BRIG_OPCODE_UNPACKLO: - return "unpacklo"; - case BRIG_OPCODE_PACK: - return "pack"; - case BRIG_OPCODE_UNPACK: - return "unpack"; - case BRIG_OPCODE_CMOV: - return "cmov"; - case BRIG_OPCODE_CLASS: - return "class"; - case BRIG_OPCODE_NCOS: - return "ncos"; - case BRIG_OPCODE_NEXP2: - return "nexp2"; - case BRIG_OPCODE_NFMA: - return "nfma"; - case BRIG_OPCODE_NLOG2: - return "nlog2"; - case BRIG_OPCODE_NRCP: - return "nrcp"; - case BRIG_OPCODE_NRSQRT: - return "nrsqrt"; - case BRIG_OPCODE_NSIN: - return "nsin"; - case BRIG_OPCODE_NSQRT: - return "nsqrt"; - case BRIG_OPCODE_BITALIGN: - return "bitalign"; - case BRIG_OPCODE_BYTEALIGN: - return "bytealign"; - case BRIG_OPCODE_PACKCVT: - return "packcvt"; - case BRIG_OPCODE_UNPACKCVT: - return "unpackcvt"; - case BRIG_OPCODE_LERP: - return "lerp"; - case BRIG_OPCODE_SAD: - return "sad"; - case BRIG_OPCODE_SADHI: - return "sadhi"; - case BRIG_OPCODE_SEGMENTP: - return "segmentp"; - case BRIG_OPCODE_FTOS: - return "ftos"; - case BRIG_OPCODE_STOF: - return "stof"; - case BRIG_OPCODE_CMP: - return "cmp"; - case BRIG_OPCODE_CVT: - return "cvt"; - case BRIG_OPCODE_LD: - return "ld"; - case BRIG_OPCODE_ST: - return "st"; - case BRIG_OPCODE_ATOMIC: - return "atomic"; - case BRIG_OPCODE_ATOMICNORET: - return "atomicnoret"; - case BRIG_OPCODE_SIGNAL: - return "signal"; - case BRIG_OPCODE_SIGNALNORET: - return "signalnoret"; - case BRIG_OPCODE_MEMFENCE: - return "memfence"; - case BRIG_OPCODE_RDIMAGE: - return "rdimage"; - case BRIG_OPCODE_LDIMAGE: - return "ldimage"; - case BRIG_OPCODE_STIMAGE: - return "stimage"; - case BRIG_OPCODE_QUERYIMAGE: - return "queryimage"; - case BRIG_OPCODE_QUERYSAMPLER: - return "querysampler"; - case BRIG_OPCODE_CBR: - return "cbr"; - case BRIG_OPCODE_BR: - return "br"; - case BRIG_OPCODE_SBR: - return "sbr"; - case BRIG_OPCODE_BARRIER: - return "barrier"; - case BRIG_OPCODE_WAVEBARRIER: - return "wavebarrier"; - case BRIG_OPCODE_ARRIVEFBAR: - return "arrivefbar"; - case BRIG_OPCODE_INITFBAR: - return "initfbar"; - case BRIG_OPCODE_JOINFBAR: - return "joinfbar"; - case BRIG_OPCODE_LEAVEFBAR: - return "leavefbar"; - case BRIG_OPCODE_RELEASEFBAR: - return "releasefbar"; - case BRIG_OPCODE_WAITFBAR: - return "waitfbar"; - case BRIG_OPCODE_LDF: - return "ldf"; - case BRIG_OPCODE_ACTIVELANECOUNT: - return "activelanecount"; - case BRIG_OPCODE_ACTIVELANEID: - return "activelaneid"; - case BRIG_OPCODE_ACTIVELANEMASK: - return "activelanemask"; - case BRIG_OPCODE_CALL: - return "call"; - case BRIG_OPCODE_SCALL: - return "scall"; - case BRIG_OPCODE_ICALL: - return "icall"; - case BRIG_OPCODE_RET: - return "ret"; - case BRIG_OPCODE_ALLOCA: - return "alloca"; - case BRIG_OPCODE_CURRENTWORKGROUPSIZE: - return "currentworkgroupsize"; - case BRIG_OPCODE_DIM: - return "dim"; - case BRIG_OPCODE_GRIDGROUPS: - return "gridgroups"; - case BRIG_OPCODE_GRIDSIZE: - return "gridsize"; - case BRIG_OPCODE_PACKETCOMPLETIONSIG: - return "packetcompletionsig"; - case BRIG_OPCODE_PACKETID: - return "packetid"; - case BRIG_OPCODE_WORKGROUPID: - return "workgroupid"; - case BRIG_OPCODE_WORKGROUPSIZE: - return "workgroupsize"; - case BRIG_OPCODE_WORKITEMABSID: - return "workitemabsid"; - case BRIG_OPCODE_WORKITEMFLATABSID: - return "workitemflatabsid"; - case BRIG_OPCODE_WORKITEMFLATID: - return "workitemflatid"; - case BRIG_OPCODE_WORKITEMID: - return "workitemid"; - case BRIG_OPCODE_CLEARDETECTEXCEPT: - return "cleardetectexcept"; - case BRIG_OPCODE_GETDETECTEXCEPT: - return "getdetectexcept"; - case BRIG_OPCODE_SETDETECTEXCEPT: - return "setdetectexcept"; - case BRIG_OPCODE_ADDQUEUEWRITEINDEX: - return "addqueuewriteindex"; - case BRIG_OPCODE_CASQUEUEWRITEINDEX: - return "casqueuewriteindex"; - case BRIG_OPCODE_LDQUEUEREADINDEX: - return "ldqueuereadindex"; - case BRIG_OPCODE_LDQUEUEWRITEINDEX: - return "ldqueuewriteindex"; - case BRIG_OPCODE_STQUEUEREADINDEX: - return "stqueuereadindex"; - case BRIG_OPCODE_STQUEUEWRITEINDEX: - return "stqueuewriteindex"; - case BRIG_OPCODE_CLOCK: - return "clock"; - case BRIG_OPCODE_CUID: - return "cuid"; - case BRIG_OPCODE_DEBUGTRAP: - return "debugtrap"; - case BRIG_OPCODE_GROUPBASEPTR: - return "groupbaseptr"; - case BRIG_OPCODE_KERNARGBASEPTR: - return "kernargbaseptr"; - case BRIG_OPCODE_LANEID: - return "laneid"; - case BRIG_OPCODE_MAXCUID: - return "maxcuid"; - case BRIG_OPCODE_MAXWAVEID: - return "maxwaveid"; - case BRIG_OPCODE_NULLPTR: - return "nullptr"; - case BRIG_OPCODE_WAVEID: - return "waveid"; - default: - return "UNKNOWN_OPCODE"; - } -} - -/* Return textual name of SEG. */ - -const char * -hsa_seg_name (BrigSegment8_t seg) -{ - switch (seg) - { - case BRIG_SEGMENT_NONE: - return "none"; - case BRIG_SEGMENT_FLAT: - return "flat"; - case BRIG_SEGMENT_GLOBAL: - return "global"; - case BRIG_SEGMENT_READONLY: - return "readonly"; - case BRIG_SEGMENT_KERNARG: - return "kernarg"; - case BRIG_SEGMENT_GROUP: - return "group"; - case BRIG_SEGMENT_PRIVATE: - return "private"; - case BRIG_SEGMENT_SPILL: - return "spill"; - case BRIG_SEGMENT_ARG: - return "arg"; - default: - return "UNKNOWN_SEGMENT"; - } -} - -/* Return textual name of CMPOP. */ - -static const char * -hsa_cmpop_name (BrigCompareOperation8_t cmpop) -{ - switch (cmpop) - { - case BRIG_COMPARE_EQ: - return "eq"; - case BRIG_COMPARE_NE: - return "ne"; - case BRIG_COMPARE_LT: - return "lt"; - case BRIG_COMPARE_LE: - return "le"; - case BRIG_COMPARE_GT: - return "gt"; - case BRIG_COMPARE_GE: - return "ge"; - case BRIG_COMPARE_EQU: - return "equ"; - case BRIG_COMPARE_NEU: - return "neu"; - case BRIG_COMPARE_LTU: - return "ltu"; - case BRIG_COMPARE_LEU: - return "leu"; - case BRIG_COMPARE_GTU: - return "gtu"; - case BRIG_COMPARE_GEU: - return "geu"; - case BRIG_COMPARE_NUM: - return "num"; - case BRIG_COMPARE_NAN: - return "nan"; - case BRIG_COMPARE_SEQ: - return "seq"; - case BRIG_COMPARE_SNE: - return "sne"; - case BRIG_COMPARE_SLT: - return "slt"; - case BRIG_COMPARE_SLE: - return "sle"; - case BRIG_COMPARE_SGT: - return "sgt"; - case BRIG_COMPARE_SGE: - return "sge"; - case BRIG_COMPARE_SGEU: - return "sgeu"; - case BRIG_COMPARE_SEQU: - return "sequ"; - case BRIG_COMPARE_SNEU: - return "sneu"; - case BRIG_COMPARE_SLTU: - return "sltu"; - case BRIG_COMPARE_SLEU: - return "sleu"; - case BRIG_COMPARE_SNUM: - return "snum"; - case BRIG_COMPARE_SNAN: - return "snan"; - case BRIG_COMPARE_SGTU: - return "sgtu"; - default: - return "UNKNOWN_COMPARISON"; - } -} - -/* Return textual name for memory order. */ - -static const char * -hsa_memsem_name (enum BrigMemoryOrder mo) -{ - switch (mo) - { - case BRIG_MEMORY_ORDER_NONE: - return ""; - case BRIG_MEMORY_ORDER_RELAXED: - return "rlx"; - case BRIG_MEMORY_ORDER_SC_ACQUIRE: - return "scacq"; - case BRIG_MEMORY_ORDER_SC_RELEASE: - return "screl"; - case BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE: - return "scar"; - default: - return "UNKNOWN_MEMORY_ORDER"; - } -} - -/* Return textual name for memory scope. */ - -static const char * -hsa_memscope_name (enum BrigMemoryScope scope) -{ - switch (scope) - { - case BRIG_MEMORY_SCOPE_NONE: - return ""; - case BRIG_MEMORY_SCOPE_WORKITEM: - return "wi"; - case BRIG_MEMORY_SCOPE_WAVEFRONT: - return "wave"; - case BRIG_MEMORY_SCOPE_WORKGROUP: - return "wg"; - case BRIG_MEMORY_SCOPE_AGENT: - return "agent"; - case BRIG_MEMORY_SCOPE_SYSTEM: - return "sys"; - default: - return "UNKNOWN_SCOPE"; - } -} - -/* Return textual name for atomic operation. */ - -static const char * -hsa_m_atomicop_name (enum BrigAtomicOperation op) -{ - switch (op) - { - case BRIG_ATOMIC_ADD: - return "add"; - case BRIG_ATOMIC_AND: - return "and"; - case BRIG_ATOMIC_CAS: - return "cas"; - case BRIG_ATOMIC_EXCH: - return "exch"; - case BRIG_ATOMIC_LD: - return "ld"; - case BRIG_ATOMIC_MAX: - return "max"; - case BRIG_ATOMIC_MIN: - return "min"; - case BRIG_ATOMIC_OR: - return "or"; - case BRIG_ATOMIC_ST: - return "st"; - case BRIG_ATOMIC_SUB: - return "sub"; - case BRIG_ATOMIC_WRAPDEC: - return "wrapdec"; - case BRIG_ATOMIC_WRAPINC: - return "wrapinc"; - case BRIG_ATOMIC_XOR: - return "xor"; - case BRIG_ATOMIC_WAIT_EQ: - return "wait_eq"; - case BRIG_ATOMIC_WAIT_NE: - return "wait_ne"; - case BRIG_ATOMIC_WAIT_LT: - return "wait_lt"; - case BRIG_ATOMIC_WAIT_GTE: - return "wait_gte"; - case BRIG_ATOMIC_WAITTIMEOUT_EQ: - return "waittimeout_eq"; - case BRIG_ATOMIC_WAITTIMEOUT_NE: - return "waittimeout_ne"; - case BRIG_ATOMIC_WAITTIMEOUT_LT: - return "waittimeout_lt"; - case BRIG_ATOMIC_WAITTIMEOUT_GTE: - return "waittimeout_gte"; - default: - return "UNKNOWN_ATOMIC_OP"; - } -} - -/* Return textual name for atomic operation. */ - -static const char * -hsa_width_specifier_name (BrigWidth8_t width) -{ - switch (width) - { - case BRIG_WIDTH_NONE: - return "none"; - case BRIG_WIDTH_1: - return "1"; - case BRIG_WIDTH_2: - return "2"; - case BRIG_WIDTH_4: - return "4"; - case BRIG_WIDTH_8: - return "8"; - case BRIG_WIDTH_16: - return "16"; - case BRIG_WIDTH_32: - return "32"; - case BRIG_WIDTH_64: - return "64"; - case BRIG_WIDTH_128: - return "128"; - case BRIG_WIDTH_256: - return "256"; - case BRIG_WIDTH_512: - return "512"; - case BRIG_WIDTH_1024: - return "1024"; - case BRIG_WIDTH_2048: - return "2048"; - case BRIG_WIDTH_4096: - return "4096"; - case BRIG_WIDTH_8192: - return "8192"; - case BRIG_WIDTH_16384: - return "16384"; - case BRIG_WIDTH_32768: - return "32768"; - case BRIG_WIDTH_65536: - return "65536"; - case BRIG_WIDTH_131072: - return "131072"; - case BRIG_WIDTH_262144: - return "262144"; - case BRIG_WIDTH_524288: - return "524288"; - case BRIG_WIDTH_1048576: - return "1048576"; - case BRIG_WIDTH_2097152: - return "2097152"; - case BRIG_WIDTH_4194304: - return "4194304"; - case BRIG_WIDTH_8388608: - return "8388608"; - case BRIG_WIDTH_16777216: - return "16777216"; - case BRIG_WIDTH_33554432: - return "33554432"; - case BRIG_WIDTH_67108864: - return "67108864"; - case BRIG_WIDTH_134217728: - return "134217728"; - case BRIG_WIDTH_268435456: - return "268435456"; - case BRIG_WIDTH_536870912: - return "536870912"; - case BRIG_WIDTH_1073741824: - return "1073741824"; - case BRIG_WIDTH_2147483648: - return "2147483648"; - case BRIG_WIDTH_WAVESIZE: - return "wavesize"; - case BRIG_WIDTH_ALL: - return "all"; - default: - return "UNKNOWN_WIDTH"; - } -} - -/* Dump textual representation of HSA IL register REG to file F. */ - -static void -dump_hsa_reg (FILE *f, hsa_op_reg *reg, bool dump_type = false) -{ - if (reg->m_reg_class) - fprintf (f, "$%c%i", reg->m_reg_class, reg->m_hard_num); - else - fprintf (f, "$_%i", reg->m_order); - if (dump_type) - fprintf (f, " (%s)", hsa_type_name (reg->m_type)); -} - -/* Dump textual representation of HSA IL immediate operand IMM to file F. */ - -static void -dump_hsa_immed (FILE *f, hsa_op_immed *imm) -{ - bool unsigned_int_type - = (BRIG_TYPE_U8 | BRIG_TYPE_U16 | BRIG_TYPE_U32 | BRIG_TYPE_U64) - & imm->m_type; - - if (imm->m_tree_value) - print_generic_expr (f, imm->m_tree_value); - else - { - if (unsigned_int_type) - fprintf (f, HOST_WIDE_INT_PRINT_DEC, imm->m_int_value); - else - fprintf (f, HOST_WIDE_INT_PRINT_UNSIGNED, - (unsigned HOST_WIDE_INT) imm->m_int_value); - } - - fprintf (f, " (%s)", hsa_type_name (imm->m_type)); -} - -/* Dump textual representation of HSA IL address operand ADDR to file F. */ - -static void -dump_hsa_address (FILE *f, hsa_op_address *addr) -{ - bool sth = false; - - if (addr->m_symbol) - { - sth = true; - if (addr->m_symbol->m_name) - fprintf (f, "[%%%s]", addr->m_symbol->m_name); - else - fprintf (f, "[%%__%s_%i]", hsa_seg_name (addr->m_symbol->m_segment), - addr->m_symbol->m_name_number); - } - - if (addr->m_reg) - { - fprintf (f, "["); - dump_hsa_reg (f, addr->m_reg); - if (addr->m_imm_offset != 0) - fprintf (f, " + " HOST_WIDE_INT_PRINT_DEC "]", addr->m_imm_offset); - else - fprintf (f, "]"); - } - else if (!sth || addr->m_imm_offset != 0) - fprintf (f, "[" HOST_WIDE_INT_PRINT_DEC "]", addr->m_imm_offset); -} - -/* Dump textual representation of HSA IL symbol SYMBOL to file F. */ - -static void -dump_hsa_symbol (FILE *f, hsa_symbol *symbol) -{ - const char *name; - char buf[64]; - if (symbol->m_name) - name = symbol->m_name; - else - { - sprintf (buf, "__%s_%i", hsa_seg_name (symbol->m_segment), - symbol->m_name_number); - - name = buf; - } - - fprintf (f, "align(%u) %s_%s %s", hsa_byte_alignment (symbol->m_align), - hsa_seg_name (symbol->m_segment), - hsa_type_name (symbol->m_type & ~BRIG_TYPE_ARRAY_MASK), name); - - if (symbol->m_type & BRIG_TYPE_ARRAY_MASK) - fprintf (f, "[%lu]", (unsigned long) symbol->m_dim); - - if (symbol->m_directive_offset) - fprintf (f, " /* BRIG offset: %u */", symbol->m_directive_offset); -} - -/* Dump textual representation of HSA IL operand OP to file F. */ - -static void -dump_hsa_operand (FILE *f, hsa_op_base *op, bool dump_reg_type = false) -{ - if (is_a <hsa_op_immed *> (op)) - dump_hsa_immed (f, as_a <hsa_op_immed *> (op)); - else if (is_a <hsa_op_reg *> (op)) - dump_hsa_reg (f, as_a <hsa_op_reg *> (op), dump_reg_type); - else if (is_a <hsa_op_address *> (op)) - dump_hsa_address (f, as_a <hsa_op_address *> (op)); - else - fprintf (f, "UNKNOWN_OP_KIND"); -} - -/* Dump textual representation of HSA IL operands in VEC to file F. */ - -static void -dump_hsa_operands (FILE *f, hsa_insn_basic *insn, int start = 0, - int end = -1, bool dump_reg_type = false) -{ - if (end == -1) - end = insn->operand_count (); - - for (int i = start; i < end; i++) - { - dump_hsa_operand (f, insn->get_op (i), dump_reg_type); - if (i != end - 1) - fprintf (f, ", "); - } -} - -/* Indent F stream with INDENT spaces. */ - -static void indent_stream (FILE *f, int indent) -{ - for (int i = 0; i < indent; i++) - fputc (' ', f); -} - -/* Dump textual representation of HSA IL instruction INSN to file F. Prepend - the instruction with *INDENT spaces and adjust the indentation for call - instructions as appropriate. */ - -static void -dump_hsa_insn_1 (FILE *f, hsa_insn_basic *insn, int *indent) -{ - gcc_checking_assert (insn); - - if (insn->m_number) - fprintf (f, "%5d: ", insn->m_number); - - indent_stream (f, *indent); - - if (is_a <hsa_insn_phi *> (insn)) - { - hsa_insn_phi *phi = as_a <hsa_insn_phi *> (insn); - bool first = true; - dump_hsa_reg (f, phi->m_dest, true); - fprintf (f, " = PHI <"); - unsigned count = phi->operand_count (); - for (unsigned i = 0; i < count; i++) - { - if (!phi->get_op (i)) - break; - if (!first) - fprintf (f, ", "); - else - first = false; - dump_hsa_operand (f, phi->get_op (i), true); - } - fprintf (f, ">"); - } - else if (is_a <hsa_insn_signal *> (insn)) - { - hsa_insn_signal *mem = as_a <hsa_insn_signal *> (insn); - - fprintf (f, "%s", hsa_opcode_name (mem->m_opcode)); - fprintf (f, "_%s", hsa_m_atomicop_name (mem->m_signalop)); - if (mem->m_memory_order != BRIG_MEMORY_ORDER_NONE) - fprintf (f, "_%s", hsa_memsem_name (mem->m_memory_order)); - fprintf (f, "_%s ", hsa_type_name (mem->m_type)); - - dump_hsa_operands (f, mem); - } - - else if (is_a <hsa_insn_atomic *> (insn)) - { - hsa_insn_atomic *mem = as_a <hsa_insn_atomic *> (insn); - - /* Either operand[0] or operand[1] must be an address operand. */ - hsa_op_address *addr = NULL; - if (is_a <hsa_op_address *> (mem->get_op (0))) - addr = as_a <hsa_op_address *> (mem->get_op (0)); - else - addr = as_a <hsa_op_address *> (mem->get_op (1)); - - fprintf (f, "%s", hsa_opcode_name (mem->m_opcode)); - fprintf (f, "_%s", hsa_m_atomicop_name (mem->m_atomicop)); - if (addr->m_symbol) - fprintf (f, "_%s", hsa_seg_name (addr->m_symbol->m_segment)); - if (mem->m_memoryorder != BRIG_MEMORY_ORDER_NONE) - fprintf (f, "_%s", hsa_memsem_name (mem->m_memoryorder)); - if (mem->m_memoryscope != BRIG_MEMORY_SCOPE_NONE) - fprintf (f, "_%s", hsa_memscope_name (mem->m_memoryscope)); - fprintf (f, "_%s ", hsa_type_name (mem->m_type)); - - dump_hsa_operands (f, mem); - } - else if (is_a <hsa_insn_mem *> (insn)) - { - hsa_insn_mem *mem = as_a <hsa_insn_mem *> (insn); - hsa_op_address *addr = as_a <hsa_op_address *> (mem->get_op (1)); - - fprintf (f, "%s", hsa_opcode_name (mem->m_opcode)); - if (addr->m_symbol) - fprintf (f, "_%s", hsa_seg_name (addr->m_symbol->m_segment)); - if (mem->m_align != BRIG_ALIGNMENT_NONE) - fprintf (f, "_align(%u)", hsa_byte_alignment (mem->m_align)); - if (mem->m_equiv_class != 0) - fprintf (f, "_equiv(%i)", mem->m_equiv_class); - fprintf (f, "_%s ", hsa_type_name (mem->m_type)); - - dump_hsa_operand (f, mem->get_op (0)); - fprintf (f, ", "); - dump_hsa_address (f, addr); - } - else if (insn->m_opcode == BRIG_OPCODE_LDA) - { - hsa_op_address *addr = as_a <hsa_op_address *> (insn->get_op (1)); - - fprintf (f, "%s", hsa_opcode_name (insn->m_opcode)); - if (addr->m_symbol) - fprintf (f, "_%s", hsa_seg_name (addr->m_symbol->m_segment)); - fprintf (f, "_%s ", hsa_type_name (insn->m_type)); - - dump_hsa_operand (f, insn->get_op (0)); - fprintf (f, ", "); - dump_hsa_address (f, addr); - } - else if (is_a <hsa_insn_seg *> (insn)) - { - hsa_insn_seg *seg = as_a <hsa_insn_seg *> (insn); - fprintf (f, "%s_%s_%s_%s ", hsa_opcode_name (seg->m_opcode), - hsa_seg_name (seg->m_segment), - hsa_type_name (seg->m_type), hsa_type_name (seg->m_src_type)); - dump_hsa_reg (f, as_a <hsa_op_reg *> (seg->get_op (0))); - fprintf (f, ", "); - dump_hsa_operand (f, seg->get_op (1)); - } - else if (is_a <hsa_insn_cmp *> (insn)) - { - hsa_insn_cmp *cmp = as_a <hsa_insn_cmp *> (insn); - BrigType16_t src_type; - - if (is_a <hsa_op_reg *> (cmp->get_op (1))) - src_type = as_a <hsa_op_reg *> (cmp->get_op (1))->m_type; - else - src_type = as_a <hsa_op_immed *> (cmp->get_op (1))->m_type; - - fprintf (f, "%s_%s_%s_%s ", hsa_opcode_name (cmp->m_opcode), - hsa_cmpop_name (cmp->m_compare), - hsa_type_name (cmp->m_type), hsa_type_name (src_type)); - dump_hsa_reg (f, as_a <hsa_op_reg *> (cmp->get_op (0))); - fprintf (f, ", "); - dump_hsa_operand (f, cmp->get_op (1)); - fprintf (f, ", "); - dump_hsa_operand (f, cmp->get_op (2)); - } - else if (is_a <hsa_insn_cbr *> (insn)) - { - hsa_insn_cbr *br = as_a <hsa_insn_cbr *> (insn); - basic_block target = NULL; - edge_iterator ei; - edge e; - - fprintf (f, "%s ", hsa_opcode_name (br->m_opcode)); - if (br->m_opcode == BRIG_OPCODE_CBR) - { - dump_hsa_reg (f, as_a <hsa_op_reg *> (br->get_op (0))); - fprintf (f, ", "); - } - - FOR_EACH_EDGE (e, ei, br->m_bb->succs) - if (e->flags & EDGE_TRUE_VALUE) - { - target = e->dest; - break; - } - fprintf (f, "BB %i", hsa_bb_for_bb (target)->m_index); - } - else if (is_a <hsa_insn_sbr *> (insn)) - { - hsa_insn_sbr *sbr = as_a <hsa_insn_sbr *> (insn); - - fprintf (f, "%s ", hsa_opcode_name (sbr->m_opcode)); - dump_hsa_reg (f, as_a <hsa_op_reg *> (sbr->get_op (0))); - fprintf (f, ", ["); - - for (unsigned i = 0; i < sbr->m_jump_table.length (); i++) - { - fprintf (f, "BB %i", hsa_bb_for_bb (sbr->m_jump_table[i])->m_index); - if (i != sbr->m_jump_table.length () - 1) - fprintf (f, ", "); - } - } - else if (is_a <hsa_insn_br *> (insn)) - { - hsa_insn_br *br = as_a <hsa_insn_br *> (insn); - fprintf (f, "%s_width(%s) ", hsa_opcode_name (br->m_opcode), - hsa_width_specifier_name (br->m_width)); - } - else if (is_a <hsa_insn_arg_block *> (insn)) - { - hsa_insn_arg_block *arg_block = as_a <hsa_insn_arg_block *> (insn); - bool start_p = arg_block->m_kind == BRIG_KIND_DIRECTIVE_ARG_BLOCK_START; - char c = start_p ? '{' : '}'; - - if (start_p) - { - *indent += 2; - indent_stream (f, 2); - } - - if (!start_p) - *indent -= 2; - - fprintf (f, "%c", c); - } - else if (is_a <hsa_insn_call *> (insn)) - { - hsa_insn_call *call = as_a <hsa_insn_call *> (insn); - if (call->m_called_function) - { - const char *name = hsa_get_declaration_name (call->m_called_function); - fprintf (f, "call &%s", name); - } - else - { - char *name = call->m_called_internal_fn->name (); - fprintf (f, "call &%s", name); - free (name); - } - - if (call->m_output_arg) - fprintf (f, "(%%res) "); - - fprintf (f, "("); - for (unsigned i = 0; i < call->m_input_args.length (); i++) - { - fprintf (f, "%%__arg_%u", i); - - if (i != call->m_input_args.length () - 1) - fprintf (f, ", "); - } - fprintf (f, ")"); - } - else if (is_a <hsa_insn_comment *> (insn)) - { - hsa_insn_comment *c = as_a <hsa_insn_comment *> (insn); - fprintf (f, "%s", c->m_comment); - } - else if (is_a <hsa_insn_srctype *> (insn)) - { - hsa_insn_srctype *srctype = as_a <hsa_insn_srctype *> (insn); - - fprintf (f, "%s_%s_%s ", hsa_opcode_name (srctype->m_opcode), - hsa_type_name (srctype->m_type), - hsa_type_name (srctype->m_source_type)); - - dump_hsa_operands (f, insn); - } - else if (is_a <hsa_insn_packed *> (insn)) - { - hsa_insn_packed *packed = as_a <hsa_insn_packed *> (insn); - - fprintf (f, "%s_v%u_%s_%s ", hsa_opcode_name (packed->m_opcode), - packed->operand_count () - 1, - hsa_type_name (packed->m_type), - hsa_type_name (packed->m_source_type)); - - if (packed->m_opcode == BRIG_OPCODE_COMBINE) - { - dump_hsa_operand (f, insn->get_op (0)); - fprintf (f, ", ("); - dump_hsa_operands (f, insn, 1); - fprintf (f, ")"); - } - else if (packed->m_opcode == BRIG_OPCODE_EXPAND) - { - fprintf (f, "("); - dump_hsa_operands (f, insn, 0, insn->operand_count () - 1); - fprintf (f, "), "); - dump_hsa_operand (f, insn->get_op (insn->operand_count () - 1)); - - } - else - gcc_unreachable (); - } - else if (is_a <hsa_insn_alloca *> (insn)) - { - hsa_insn_alloca *alloca = as_a <hsa_insn_alloca *> (insn); - - fprintf (f, "%s_align(%u)_%s ", hsa_opcode_name (insn->m_opcode), - hsa_byte_alignment (alloca->m_align), - hsa_type_name (insn->m_type)); - - dump_hsa_operands (f, insn); - } - else if (hsa_insn_queue *qi = dyn_cast <hsa_insn_queue *> (insn)) - { - fprintf (f, "%s_%s_%s_%s ", hsa_opcode_name (qi->m_opcode), - hsa_seg_name (qi->m_segment), - hsa_memsem_name (qi->m_memory_order), - hsa_type_name (qi->m_type)); - - dump_hsa_operands (f, qi); - } - else - { - fprintf (f, "%s_%s ", hsa_opcode_name (insn->m_opcode), - hsa_type_name (insn->m_type)); - - dump_hsa_operands (f, insn); - } - - if (insn->m_brig_offset) - { - fprintf (f, " /* BRIG offset: %u", insn->m_brig_offset); - - for (unsigned i = 0; i < insn->operand_count (); i++) - fprintf (f, ", op%u: %u", i, insn->get_op (i)->m_brig_op_offset); - - fprintf (f, " */"); - } - - fprintf (f, "\n"); -} - -/* Dump textual representation of HSA IL instruction INSN to file F. */ - -void -dump_hsa_insn (FILE *f, hsa_insn_basic *insn) -{ - int indent = 0; - dump_hsa_insn_1 (f, insn, &indent); -} - -/* Dump textual representation of HSA IL in HBB to file F. */ - -void -dump_hsa_bb (FILE *f, hsa_bb *hbb) -{ - hsa_insn_basic *insn; - edge_iterator ei; - edge e; - basic_block true_bb = NULL, other = NULL; - - fprintf (f, "BB %i:\n", hbb->m_index); - - int indent = 2; - for (insn = hbb->m_first_phi; insn; insn = insn->m_next) - dump_hsa_insn_1 (f, insn, &indent); - - for (insn = hbb->m_first_insn; insn; insn = insn->m_next) - dump_hsa_insn_1 (f, insn, &indent); - - if (hbb->m_last_insn && is_a <hsa_insn_sbr *> (hbb->m_last_insn)) - goto exit; - - FOR_EACH_EDGE (e, ei, hbb->m_bb->succs) - if (e->flags & EDGE_TRUE_VALUE) - { - gcc_assert (!true_bb); - true_bb = e->dest; - } - else - { - gcc_assert (!other); - other = e->dest; - } - - if (true_bb) - { - if (!hbb->m_last_insn - || hbb->m_last_insn->m_opcode != BRIG_OPCODE_CBR) - fprintf (f, "WARNING: No branch insn for a true edge. \n"); - } - else if (hbb->m_last_insn - && hbb->m_last_insn->m_opcode == BRIG_OPCODE_CBR) - fprintf (f, "WARNING: No true edge for a cbr statement\n"); - - if (other && other->aux) - fprintf (f, " Fall-through to BB %i\n", - hsa_bb_for_bb (other)->m_index); - else if (hbb->m_last_insn - && hbb->m_last_insn->m_opcode != BRIG_OPCODE_RET) - fprintf (f, " WARNING: Fall through to a BB with no aux!\n"); - -exit: - fprintf (f, "\n"); -} - -/* Dump textual representation of HSA IL of the current function to file F. */ - -void -dump_hsa_cfun (FILE *f) -{ - basic_block bb; - - if (hsa_cfun->m_global_symbols.length () > 0) - fprintf (f, "\nHSAIL in global scope\n"); - - for (unsigned i = 0; i < hsa_cfun->m_global_symbols.length (); i++) - { - fprintf (f, " "); - dump_hsa_symbol (f, hsa_cfun->m_global_symbols[i]); - fprintf (f, "\n"); - } - - fprintf (f, "\nHSAIL IL for %s\n", hsa_cfun->m_name); - - for (unsigned i = 0; i < hsa_cfun->m_private_variables.length (); i++) - { - fprintf (f, " "); - dump_hsa_symbol (f, hsa_cfun->m_private_variables[i]); - fprintf (f, "\n"); - } - - FOR_ALL_BB_FN (bb, cfun) - { - hsa_bb *hbb = (class hsa_bb *) bb->aux; - dump_hsa_bb (f, hbb); - } -} - -/* Dump textual representation of HSA IL instruction INSN to stderr. */ - -DEBUG_FUNCTION void -debug_hsa_insn (hsa_insn_basic *insn) -{ - dump_hsa_insn (stderr, insn); -} - -/* Dump textual representation of HSA IL in HBB to stderr. */ - -DEBUG_FUNCTION void -debug_hsa_bb (hsa_bb *hbb) -{ - dump_hsa_bb (stderr, hbb); -} - -/* Dump textual representation of HSA IL of the current function to stderr. */ - -DEBUG_FUNCTION void -debug_hsa_cfun (void) -{ - dump_hsa_cfun (stderr); -} - -/* Dump textual representation of an HSA operand to stderr. */ - -DEBUG_FUNCTION void -debug_hsa_operand (hsa_op_base *opc) -{ - dump_hsa_operand (stderr, opc, true); - fprintf (stderr, "\n"); -} - -/* Dump textual representation of as HSA symbol. */ - -DEBUG_FUNCTION void -debug_hsa_symbol (hsa_symbol *symbol) -{ - dump_hsa_symbol (stderr, symbol); - fprintf (stderr, "\n"); -} diff --git a/gcc/hsa-gen.c b/gcc/hsa-gen.c deleted file mode 100644 index 2af9990..0000000 --- a/gcc/hsa-gen.c +++ /dev/null @@ -1,6694 +0,0 @@ -/* A pass for lowering gimple to HSAIL - Copyright (C) 2013-2020 Free Software Foundation, Inc. - Contributed by Martin Jambor <mjambor@suse.cz> and - Martin Liska <mliska@suse.cz>. - -This file is part of GCC. - -GCC is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 3, or (at your option) -any later version. - -GCC is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with GCC; see the file COPYING3. If not see -<http://www.gnu.org/licenses/>. */ - -#include "config.h" -#include "system.h" -#include "coretypes.h" -#include "memmodel.h" -#include "tm.h" -#include "is-a.h" -#include "hash-table.h" -#include "vec.h" -#include "tree.h" -#include "tree-pass.h" -#include "function.h" -#include "basic-block.h" -#include "cfg.h" -#include "fold-const.h" -#include "gimple.h" -#include "gimple-iterator.h" -#include "bitmap.h" -#include "dumpfile.h" -#include "gimple-pretty-print.h" -#include "diagnostic-core.h" -#include "gimple-ssa.h" -#include "tree-phinodes.h" -#include "stringpool.h" -#include "tree-vrp.h" -#include "tree-ssanames.h" -#include "tree-dfa.h" -#include "ssa-iterators.h" -#include "cgraph.h" -#include "print-tree.h" -#include "alloc-pool.h" -#include "symbol-summary.h" -#include "hsa-common.h" -#include "cfghooks.h" -#include "tree-cfg.h" -#include "cfgloop.h" -#include "cfganal.h" -#include "builtins.h" -#include "gomp-constants.h" -#include "internal-fn.h" -#include "builtins.h" -#include "stor-layout.h" -#include "stringpool.h" -#include "attribs.h" - -/* Print a warning message and set that we have seen an error. */ - -#define HSA_SORRY_ATV(location, message, ...) \ - do \ - { \ - hsa_fail_cfun (); \ - auto_diagnostic_group d; \ - if (warning_at (EXPR_LOCATION (hsa_cfun->m_decl), OPT_Whsa, \ - HSA_SORRY_MSG)) \ - inform (location, message, __VA_ARGS__); \ - } \ - while (false) - -/* Same as previous, but highlight a location. */ - -#define HSA_SORRY_AT(location, message) \ - do \ - { \ - hsa_fail_cfun (); \ - auto_diagnostic_group d; \ - if (warning_at (EXPR_LOCATION (hsa_cfun->m_decl), OPT_Whsa, \ - HSA_SORRY_MSG)) \ - inform (location, message); \ - } \ - while (false) - -/* Default number of threads used by kernel dispatch. */ - -#define HSA_DEFAULT_NUM_THREADS 64 - -/* Following structures are defined in the final version - of HSA specification. */ - -/* HSA queue packet is shadow structure, originally provided by AMD. */ - -struct hsa_queue_packet -{ - uint16_t header; - uint16_t setup; - uint16_t workgroup_size_x; - uint16_t workgroup_size_y; - uint16_t workgroup_size_z; - uint16_t reserved0; - uint32_t grid_size_x; - uint32_t grid_size_y; - uint32_t grid_size_z; - uint32_t private_segment_size; - uint32_t group_segment_size; - uint64_t kernel_object; - void *kernarg_address; - uint64_t reserved2; - uint64_t completion_signal; -}; - -/* HSA queue is shadow structure, originally provided by AMD. */ - -struct hsa_queue -{ - int type; - uint32_t features; - void *base_address; - uint64_t doorbell_signal; - uint32_t size; - uint32_t reserved1; - uint64_t id; -}; - -static struct obstack hsa_obstack; - -/* List of pointers to all instructions that come from an object allocator. */ -static vec <hsa_insn_basic *> hsa_instructions; - -/* List of pointers to all operands that come from an object allocator. */ -static vec <hsa_op_base *> hsa_operands; - -hsa_symbol::hsa_symbol () - : m_decl (NULL_TREE), m_name (NULL), m_name_number (0), - m_directive_offset (0), m_type (BRIG_TYPE_NONE), - m_segment (BRIG_SEGMENT_NONE), m_linkage (BRIG_LINKAGE_NONE), m_dim (0), - m_cst_value (NULL), m_global_scope_p (false), m_seen_error (false), - m_allocation (BRIG_ALLOCATION_AUTOMATIC), m_emitted_to_brig (false) -{ -} - - -hsa_symbol::hsa_symbol (BrigType16_t type, BrigSegment8_t segment, - BrigLinkage8_t linkage, bool global_scope_p, - BrigAllocation allocation, BrigAlignment8_t align) - : m_decl (NULL_TREE), m_name (NULL), m_name_number (0), - m_directive_offset (0), m_type (type), m_segment (segment), - m_linkage (linkage), m_dim (0), m_cst_value (NULL), - m_global_scope_p (global_scope_p), m_seen_error (false), - m_allocation (allocation), m_emitted_to_brig (false), m_align (align) -{ -} - -unsigned HOST_WIDE_INT -hsa_symbol::total_byte_size () -{ - unsigned HOST_WIDE_INT s - = hsa_type_bit_size (~BRIG_TYPE_ARRAY_MASK & m_type); - gcc_assert (s % BITS_PER_UNIT == 0); - s /= BITS_PER_UNIT; - - if (m_dim) - s *= m_dim; - - return s; -} - -/* Forward declaration. */ - -static BrigType16_t -hsa_type_for_tree_type (const_tree type, unsigned HOST_WIDE_INT *dim_p, - bool min32int); - -void -hsa_symbol::fillup_for_decl (tree decl) -{ - m_decl = decl; - m_type = hsa_type_for_tree_type (TREE_TYPE (decl), &m_dim, false); - if (hsa_seen_error ()) - { - m_seen_error = true; - return; - } - - m_align = MAX (m_align, hsa_natural_alignment (m_type)); -} - -/* Constructor of class representing global HSA function/kernel information and - state. FNDECL is function declaration, KERNEL_P is true if the function - is going to become a HSA kernel. If the function has body, SSA_NAMES_COUNT - should be set to number of SSA names used in the function. - MODIFIED_CFG is set to true in case we modified control-flow graph - of the function. */ - -hsa_function_representation::hsa_function_representation - (tree fdecl, bool kernel_p, unsigned ssa_names_count, bool modified_cfg) - : m_name (NULL), - m_reg_count (0), m_input_args (vNULL), - m_output_arg (NULL), m_spill_symbols (vNULL), m_global_symbols (vNULL), - m_private_variables (vNULL), m_called_functions (vNULL), - m_called_internal_fns (vNULL), m_hbb_count (0), - m_in_ssa (true), m_kern_p (kernel_p), m_declaration_p (false), - m_decl (fdecl), m_internal_fn (NULL), m_shadow_reg (NULL), - m_kernel_dispatch_count (0), m_maximum_omp_data_size (0), - m_seen_error (false), m_temp_symbol_count (0), m_ssa_map (), - m_modified_cfg (modified_cfg) -{ - int sym_init_len = (vec_safe_length (cfun->local_decls) / 2) + 1; - m_local_symbols = new hash_table <hsa_noop_symbol_hasher> (sym_init_len); - m_ssa_map.safe_grow_cleared (ssa_names_count); -} - -/* Constructor of class representing HSA function information that - is derived for an internal function. */ -hsa_function_representation::hsa_function_representation (hsa_internal_fn *fn) - : m_reg_count (0), m_input_args (vNULL), - m_output_arg (NULL), m_local_symbols (NULL), - m_spill_symbols (vNULL), m_global_symbols (vNULL), - m_private_variables (vNULL), m_called_functions (vNULL), - m_called_internal_fns (vNULL), m_hbb_count (0), - m_in_ssa (true), m_kern_p (false), m_declaration_p (true), m_decl (NULL), - m_internal_fn (fn), m_shadow_reg (NULL), m_kernel_dispatch_count (0), - m_maximum_omp_data_size (0), m_seen_error (false), m_temp_symbol_count (0), - m_ssa_map () {} - -/* Destructor of class holding function/kernel-wide information and state. */ - -hsa_function_representation::~hsa_function_representation () -{ - /* Kernel names are deallocated at the end of BRIG output when deallocating - hsa_decl_kernel_mapping. */ - if (!m_kern_p || m_seen_error) - free (m_name); - - for (unsigned i = 0; i < m_input_args.length (); i++) - delete m_input_args[i]; - m_input_args.release (); - - delete m_output_arg; - delete m_local_symbols; - - for (unsigned i = 0; i < m_spill_symbols.length (); i++) - delete m_spill_symbols[i]; - m_spill_symbols.release (); - - hsa_symbol *sym; - for (unsigned i = 0; i < m_global_symbols.iterate (i, &sym); i++) - if (sym->m_linkage != BRIG_ALLOCATION_PROGRAM) - delete sym; - m_global_symbols.release (); - - for (unsigned i = 0; i < m_private_variables.length (); i++) - delete m_private_variables[i]; - m_private_variables.release (); - m_called_functions.release (); - m_ssa_map.release (); - - for (unsigned i = 0; i < m_called_internal_fns.length (); i++) - delete m_called_internal_fns[i]; -} - -hsa_op_reg * -hsa_function_representation::get_shadow_reg () -{ - /* If we compile a function with kernel dispatch and does not set - an optimization level, the function won't be inlined and - we return NULL. */ - if (!m_kern_p) - return NULL; - - if (m_shadow_reg) - return m_shadow_reg; - - /* Append the shadow argument. */ - hsa_symbol *shadow = new hsa_symbol (BRIG_TYPE_U64, BRIG_SEGMENT_KERNARG, - BRIG_LINKAGE_FUNCTION); - m_input_args.safe_push (shadow); - shadow->m_name = "hsa_runtime_shadow"; - - hsa_op_reg *r = new hsa_op_reg (BRIG_TYPE_U64); - hsa_op_address *addr = new hsa_op_address (shadow); - - hsa_insn_mem *mem = new hsa_insn_mem (BRIG_OPCODE_LD, BRIG_TYPE_U64, r, addr); - hsa_bb_for_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun))->append_insn (mem); - m_shadow_reg = r; - - return r; -} - -bool hsa_function_representation::has_shadow_reg_p () -{ - return m_shadow_reg != NULL; -} - -void -hsa_function_representation::init_extra_bbs () -{ - hsa_init_new_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun)); - hsa_init_new_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)); -} - -void -hsa_function_representation::update_dominance () -{ - if (m_modified_cfg) - { - free_dominance_info (CDI_DOMINATORS); - calculate_dominance_info (CDI_DOMINATORS); - } -} - -hsa_symbol * -hsa_function_representation::create_hsa_temporary (BrigType16_t type) -{ - hsa_symbol *s = new hsa_symbol (type, BRIG_SEGMENT_PRIVATE, - BRIG_LINKAGE_FUNCTION); - s->m_name_number = m_temp_symbol_count++; - - hsa_cfun->m_private_variables.safe_push (s); - return s; -} - -BrigLinkage8_t -hsa_function_representation::get_linkage () -{ - if (m_internal_fn) - return BRIG_LINKAGE_PROGRAM; - - return m_kern_p || TREE_PUBLIC (m_decl) ? - BRIG_LINKAGE_PROGRAM : BRIG_LINKAGE_MODULE; -} - -/* Hash map of simple OMP builtins. */ -static hash_map <nofree_string_hash, omp_simple_builtin> *omp_simple_builtins - = NULL; - -/* Warning messages for OMP builtins. */ - -#define HSA_WARN_LOCK_ROUTINE "support for HSA does not implement OpenMP " \ - "lock routines" -#define HSA_WARN_TIMING_ROUTINE "support for HSA does not implement OpenMP " \ - "timing routines" -#define HSA_WARN_MEMORY_ROUTINE "OpenMP device memory library routines have " \ - "undefined semantics within target regions, support for HSA ignores them" -#define HSA_WARN_AFFINITY "Support for HSA does not implement OpenMP " \ - "affinity feateres" - -/* Initialize hash map with simple OMP builtins. */ - -static void -hsa_init_simple_builtins () -{ - if (omp_simple_builtins != NULL) - return; - - omp_simple_builtins - = new hash_map <nofree_string_hash, omp_simple_builtin> (); - - omp_simple_builtin omp_builtins[] = - { - omp_simple_builtin ("omp_get_initial_device", NULL, false, - new hsa_op_immed (GOMP_DEVICE_HOST, - (BrigType16_t) BRIG_TYPE_S32)), - omp_simple_builtin ("omp_is_initial_device", NULL, false, - new hsa_op_immed (0, (BrigType16_t) BRIG_TYPE_S32)), - omp_simple_builtin ("omp_get_dynamic", NULL, false, - new hsa_op_immed (0, (BrigType16_t) BRIG_TYPE_S32)), - omp_simple_builtin ("omp_set_dynamic", NULL, false, NULL), - omp_simple_builtin ("omp_init_lock", HSA_WARN_LOCK_ROUTINE, true), - omp_simple_builtin ("omp_init_lock_with_hint", HSA_WARN_LOCK_ROUTINE, - true), - omp_simple_builtin ("omp_init_nest_lock_with_hint", HSA_WARN_LOCK_ROUTINE, - true), - omp_simple_builtin ("omp_destroy_lock", HSA_WARN_LOCK_ROUTINE, true), - omp_simple_builtin ("omp_set_lock", HSA_WARN_LOCK_ROUTINE, true), - omp_simple_builtin ("omp_unset_lock", HSA_WARN_LOCK_ROUTINE, true), - omp_simple_builtin ("omp_test_lock", HSA_WARN_LOCK_ROUTINE, true), - omp_simple_builtin ("omp_get_wtime", HSA_WARN_TIMING_ROUTINE, true), - omp_simple_builtin ("omp_get_wtick", HSA_WARN_TIMING_ROUTINE, true), - omp_simple_builtin ("omp_target_alloc", HSA_WARN_MEMORY_ROUTINE, false, - new hsa_op_immed (0, (BrigType16_t) BRIG_TYPE_U64)), - omp_simple_builtin ("omp_target_free", HSA_WARN_MEMORY_ROUTINE, false), - omp_simple_builtin ("omp_target_is_present", HSA_WARN_MEMORY_ROUTINE, - false, - new hsa_op_immed (-1, (BrigType16_t) BRIG_TYPE_S32)), - omp_simple_builtin ("omp_target_memcpy", HSA_WARN_MEMORY_ROUTINE, false, - new hsa_op_immed (-1, (BrigType16_t) BRIG_TYPE_S32)), - omp_simple_builtin ("omp_target_memcpy_rect", HSA_WARN_MEMORY_ROUTINE, - false, - new hsa_op_immed (-1, (BrigType16_t) BRIG_TYPE_S32)), - omp_simple_builtin ("omp_target_associate_ptr", HSA_WARN_MEMORY_ROUTINE, - false, - new hsa_op_immed (-1, (BrigType16_t) BRIG_TYPE_S32)), - omp_simple_builtin ("omp_target_disassociate_ptr", - HSA_WARN_MEMORY_ROUTINE, - false, - new hsa_op_immed (-1, (BrigType16_t) BRIG_TYPE_S32)), - omp_simple_builtin ("omp_set_max_active_levels", - "Support for HSA only allows only one active level, " - "call to omp_set_max_active_levels will be ignored " - "in the generated HSAIL", - false, NULL), - omp_simple_builtin ("omp_get_max_active_levels", NULL, false, - new hsa_op_immed (1, (BrigType16_t) BRIG_TYPE_S32)), - omp_simple_builtin ("omp_in_final", NULL, false, - new hsa_op_immed (0, (BrigType16_t) BRIG_TYPE_S32)), - omp_simple_builtin ("omp_get_proc_bind", HSA_WARN_AFFINITY, false, - new hsa_op_immed (0, (BrigType16_t) BRIG_TYPE_S32)), - omp_simple_builtin ("omp_get_num_places", HSA_WARN_AFFINITY, false, - new hsa_op_immed (0, (BrigType16_t) BRIG_TYPE_S32)), - omp_simple_builtin ("omp_get_place_num_procs", HSA_WARN_AFFINITY, false, - new hsa_op_immed (0, (BrigType16_t) BRIG_TYPE_S32)), - omp_simple_builtin ("omp_get_place_proc_ids", HSA_WARN_AFFINITY, false, - NULL), - omp_simple_builtin ("omp_get_place_num", HSA_WARN_AFFINITY, false, - new hsa_op_immed (-1, (BrigType16_t) BRIG_TYPE_S32)), - omp_simple_builtin ("omp_get_partition_num_places", HSA_WARN_AFFINITY, - false, - new hsa_op_immed (0, (BrigType16_t) BRIG_TYPE_S32)), - omp_simple_builtin ("omp_get_partition_place_nums", HSA_WARN_AFFINITY, - false, NULL), - omp_simple_builtin ("omp_set_default_device", - "omp_set_default_device has undefined semantics " - "within target regions, support for HSA ignores it", - false, NULL), - omp_simple_builtin ("omp_get_default_device", - "omp_get_default_device has undefined semantics " - "within target regions, support for HSA ignores it", - false, - new hsa_op_immed (0, (BrigType16_t) BRIG_TYPE_S32)), - omp_simple_builtin ("omp_get_num_devices", - "omp_get_num_devices has undefined semantics " - "within target regions, support for HSA ignores it", - false, - new hsa_op_immed (0, (BrigType16_t) BRIG_TYPE_S32)), - omp_simple_builtin ("omp_get_num_procs", NULL, true, NULL), - omp_simple_builtin ("omp_get_cancellation", NULL, true, NULL), - omp_simple_builtin ("omp_set_nested", NULL, true, NULL), - omp_simple_builtin ("omp_get_nested", NULL, true, NULL), - omp_simple_builtin ("omp_set_schedule", NULL, true, NULL), - omp_simple_builtin ("omp_get_schedule", NULL, true, NULL), - omp_simple_builtin ("omp_get_thread_limit", NULL, true, NULL), - omp_simple_builtin ("omp_get_team_size", NULL, true, NULL), - omp_simple_builtin ("omp_get_ancestor_thread_num", NULL, true, NULL), - omp_simple_builtin ("omp_get_max_task_priority", NULL, true, NULL) - }; - - unsigned count = sizeof (omp_builtins) / sizeof (omp_simple_builtin); - - for (unsigned i = 0; i < count; i++) - omp_simple_builtins->put (omp_builtins[i].m_name, omp_builtins[i]); -} - -/* Allocate HSA structures that we need only while generating with this. */ - -static void -hsa_init_data_for_cfun () -{ - hsa_init_compilation_unit_data (); - gcc_obstack_init (&hsa_obstack); -} - -/* Deinitialize HSA subsystem and free all allocated memory. */ - -static void -hsa_deinit_data_for_cfun (void) -{ - basic_block bb; - - FOR_ALL_BB_FN (bb, cfun) - if (bb->aux) - { - hsa_bb *hbb = hsa_bb_for_bb (bb); - hbb->~hsa_bb (); - bb->aux = NULL; - } - - for (unsigned int i = 0; i < hsa_operands.length (); i++) - hsa_destroy_operand (hsa_operands[i]); - - hsa_operands.release (); - - for (unsigned i = 0; i < hsa_instructions.length (); i++) - hsa_destroy_insn (hsa_instructions[i]); - - hsa_instructions.release (); - - if (omp_simple_builtins != NULL) - { - delete omp_simple_builtins; - omp_simple_builtins = NULL; - } - - obstack_free (&hsa_obstack, NULL); - delete hsa_cfun; -} - -/* Return the type which holds addresses in the given SEGMENT. */ - -static BrigType16_t -hsa_get_segment_addr_type (BrigSegment8_t segment) -{ - switch (segment) - { - case BRIG_SEGMENT_NONE: - gcc_unreachable (); - - case BRIG_SEGMENT_FLAT: - case BRIG_SEGMENT_GLOBAL: - case BRIG_SEGMENT_READONLY: - case BRIG_SEGMENT_KERNARG: - return hsa_machine_large_p () ? BRIG_TYPE_U64 : BRIG_TYPE_U32; - - case BRIG_SEGMENT_GROUP: - case BRIG_SEGMENT_PRIVATE: - case BRIG_SEGMENT_SPILL: - case BRIG_SEGMENT_ARG: - return BRIG_TYPE_U32; - } - gcc_unreachable (); -} - -/* Return integer brig type according to provided SIZE in bytes. If SIGN - is set to true, return signed integer type. */ - -static BrigType16_t -get_integer_type_by_bytes (unsigned size, bool sign) -{ - if (sign) - switch (size) - { - case 1: - return BRIG_TYPE_S8; - case 2: - return BRIG_TYPE_S16; - case 4: - return BRIG_TYPE_S32; - case 8: - return BRIG_TYPE_S64; - default: - break; - } - else - switch (size) - { - case 1: - return BRIG_TYPE_U8; - case 2: - return BRIG_TYPE_U16; - case 4: - return BRIG_TYPE_U32; - case 8: - return BRIG_TYPE_U64; - default: - break; - } - - return 0; -} - -/* If T points to an integral type smaller than 32 bits, change it to a 32bit - equivalent and return the result. Otherwise just return the result. */ - -static BrigType16_t -hsa_extend_inttype_to_32bit (BrigType16_t t) -{ - if (t == BRIG_TYPE_U8 || t == BRIG_TYPE_U16) - return BRIG_TYPE_U32; - else if (t == BRIG_TYPE_S8 || t == BRIG_TYPE_S16) - return BRIG_TYPE_S32; - return t; -} - -/* Return HSA type for tree TYPE, which has to fit into BrigType16_t. Pointers - are assumed to use flat addressing. If min32int is true, always expand - integer types to one that has at least 32 bits. */ - -static BrigType16_t -hsa_type_for_scalar_tree_type (const_tree type, bool min32int) -{ - HOST_WIDE_INT bsize; - const_tree base; - BrigType16_t res = BRIG_TYPE_NONE; - - gcc_checking_assert (TYPE_P (type)); - gcc_checking_assert (!AGGREGATE_TYPE_P (type)); - if (POINTER_TYPE_P (type)) - return hsa_get_segment_addr_type (BRIG_SEGMENT_FLAT); - - if (TREE_CODE (type) == VECTOR_TYPE) - base = TREE_TYPE (type); - else if (TREE_CODE (type) == COMPLEX_TYPE) - { - base = TREE_TYPE (type); - min32int = true; - } - else - base = type; - - if (!tree_fits_uhwi_p (TYPE_SIZE (base))) - { - HSA_SORRY_ATV (EXPR_LOCATION (type), - "support for HSA does not implement huge or " - "variable-sized type %qT", type); - return res; - } - - bsize = tree_to_uhwi (TYPE_SIZE (base)); - unsigned byte_size = bsize / BITS_PER_UNIT; - if (INTEGRAL_TYPE_P (base)) - res = get_integer_type_by_bytes (byte_size, !TYPE_UNSIGNED (base)); - else if (SCALAR_FLOAT_TYPE_P (base)) - { - switch (bsize) - { - case 16: - res = BRIG_TYPE_F16; - break; - case 32: - res = BRIG_TYPE_F32; - break; - case 64: - res = BRIG_TYPE_F64; - break; - default: - break; - } - } - - if (res == BRIG_TYPE_NONE) - { - HSA_SORRY_ATV (EXPR_LOCATION (type), - "support for HSA does not implement type %qT", type); - return res; - } - - if (TREE_CODE (type) == VECTOR_TYPE) - { - HOST_WIDE_INT tsize = tree_to_uhwi (TYPE_SIZE (type)); - - if (bsize == tsize) - { - HSA_SORRY_ATV (EXPR_LOCATION (type), - "support for HSA does not implement a vector type " - "where a type and unit size are equal: %qT", type); - return res; - } - - switch (tsize) - { - case 32: - res |= BRIG_TYPE_PACK_32; - break; - case 64: - res |= BRIG_TYPE_PACK_64; - break; - case 128: - res |= BRIG_TYPE_PACK_128; - break; - default: - HSA_SORRY_ATV (EXPR_LOCATION (type), - "support for HSA does not implement type %qT", type); - } - } - - if (min32int) - /* Registers/immediate operands can only be 32bit or more except for - f16. */ - res = hsa_extend_inttype_to_32bit (res); - - if (TREE_CODE (type) == COMPLEX_TYPE) - { - unsigned bsize = 2 * hsa_type_bit_size (res); - res = hsa_bittype_for_bitsize (bsize); - } - - return res; -} - -/* Returns the BRIG type we need to load/store entities of TYPE. */ - -static BrigType16_t -mem_type_for_type (BrigType16_t type) -{ - /* HSA has non-intuitive constraints on load/store types. If it's - a bit-type it _must_ be B128, if it's not a bit-type it must be - 64bit max. So for loading entities of 128 bits (e.g. vectors) - we have to use B128, while for loading the rest we have to use the - input type (??? or maybe also flattened to a equally sized non-vector - unsigned type?). */ - if ((type & BRIG_TYPE_PACK_MASK) == BRIG_TYPE_PACK_128) - return BRIG_TYPE_B128; - else if (hsa_btype_p (type) || hsa_type_packed_p (type)) - { - unsigned bitsize = hsa_type_bit_size (type); - if (bitsize < 128) - return hsa_uint_for_bitsize (bitsize); - else - return hsa_bittype_for_bitsize (bitsize); - } - return type; -} - -/* Return HSA type for tree TYPE. If it cannot fit into BrigType16_t, some - kind of array will be generated, setting DIM appropriately. Otherwise, it - will be set to zero. */ - -static BrigType16_t -hsa_type_for_tree_type (const_tree type, unsigned HOST_WIDE_INT *dim_p = NULL, - bool min32int = false) -{ - gcc_checking_assert (TYPE_P (type)); - if (!tree_fits_uhwi_p (TYPE_SIZE_UNIT (type))) - { - HSA_SORRY_ATV (EXPR_LOCATION (type), "support for HSA does not " - "implement huge or variable-sized type %qT", type); - return BRIG_TYPE_NONE; - } - - if (RECORD_OR_UNION_TYPE_P (type)) - { - if (dim_p) - *dim_p = tree_to_uhwi (TYPE_SIZE_UNIT (type)); - return BRIG_TYPE_U8 | BRIG_TYPE_ARRAY; - } - - if (TREE_CODE (type) == ARRAY_TYPE) - { - /* We try to be nice and use the real base-type when this is an array of - scalars and only resort to an array of bytes if the type is more - complex. */ - - unsigned HOST_WIDE_INT dim = 1; - - while (TREE_CODE (type) == ARRAY_TYPE) - { - tree domain = TYPE_DOMAIN (type); - if (!TYPE_MIN_VALUE (domain) - || !TYPE_MAX_VALUE (domain) - || !tree_fits_shwi_p (TYPE_MIN_VALUE (domain)) - || !tree_fits_shwi_p (TYPE_MAX_VALUE (domain))) - { - HSA_SORRY_ATV (EXPR_LOCATION (type), - "support for HSA does not implement array " - "%qT with unknown bounds", type); - return BRIG_TYPE_NONE; - } - HOST_WIDE_INT min = tree_to_shwi (TYPE_MIN_VALUE (domain)); - HOST_WIDE_INT max = tree_to_shwi (TYPE_MAX_VALUE (domain)); - dim = dim * (unsigned HOST_WIDE_INT) (max - min + 1); - type = TREE_TYPE (type); - } - - BrigType16_t res; - if (RECORD_OR_UNION_TYPE_P (type)) - { - dim = dim * tree_to_uhwi (TYPE_SIZE_UNIT (type)); - res = BRIG_TYPE_U8; - } - else - res = hsa_type_for_scalar_tree_type (type, false); - - if (dim_p) - *dim_p = dim; - return res | BRIG_TYPE_ARRAY; - } - - /* Scalar case: */ - if (dim_p) - *dim_p = 0; - - return hsa_type_for_scalar_tree_type (type, min32int); -} - -/* Returns true if converting from STYPE into DTYPE needs the _CVT - opcode. If false a normal _MOV is enough. */ - -static bool -hsa_needs_cvt (BrigType16_t dtype, BrigType16_t stype) -{ - if (hsa_btype_p (dtype)) - return false; - - /* float <-> int conversions are real converts. */ - if (hsa_type_float_p (dtype) != hsa_type_float_p (stype)) - return true; - /* When both types have different size, then we need CVT as well. */ - if (hsa_type_bit_size (dtype) != hsa_type_bit_size (stype)) - return true; - return false; -} - -/* Return declaration name if it exists or create one from UID if it does not. - If DECL is a local variable, make UID part of its name. */ - -const char * -hsa_get_declaration_name (tree decl) -{ - if (!DECL_NAME (decl)) - { - char buf[64]; - snprintf (buf, 64, "__hsa_anon_%u", DECL_UID (decl)); - size_t len = strlen (buf); - char *copy = (char *) obstack_alloc (&hsa_obstack, len + 1); - memcpy (copy, buf, len + 1); - return copy; - } - - tree name_tree; - if (TREE_CODE (decl) == FUNCTION_DECL - || (TREE_CODE (decl) == VAR_DECL && is_global_var (decl))) - name_tree = DECL_ASSEMBLER_NAME (decl); - else - name_tree = DECL_NAME (decl); - - const char *name = IDENTIFIER_POINTER (name_tree); - /* User-defined assembly names have prepended asterisk symbol. */ - if (name[0] == '*') - name++; - - if ((TREE_CODE (decl) == VAR_DECL) - && decl_function_context (decl)) - { - size_t len = strlen (name); - char *buf = (char *) alloca (len + 32); - snprintf (buf, len + 32, "%s_%u", name, DECL_UID (decl)); - len = strlen (buf); - char *copy = (char *) obstack_alloc (&hsa_obstack, len + 1); - memcpy (copy, buf, len + 1); - return copy; - } - else - return name; -} - -/* Lookup or create the associated hsa_symbol structure with a given VAR_DECL - or lookup the hsa_structure corresponding to a PARM_DECL. */ - -static hsa_symbol * -get_symbol_for_decl (tree decl) -{ - hsa_symbol **slot; - hsa_symbol dummy (BRIG_TYPE_NONE, BRIG_SEGMENT_NONE, BRIG_LINKAGE_NONE); - - gcc_assert (TREE_CODE (decl) == PARM_DECL - || TREE_CODE (decl) == RESULT_DECL - || TREE_CODE (decl) == VAR_DECL - || TREE_CODE (decl) == CONST_DECL); - - dummy.m_decl = decl; - - bool is_in_global_vars = ((TREE_CODE (decl) == VAR_DECL) - && !decl_function_context (decl)); - - if (is_in_global_vars) - slot = hsa_global_variable_symbols->find_slot (&dummy, INSERT); - else - slot = hsa_cfun->m_local_symbols->find_slot (&dummy, INSERT); - - gcc_checking_assert (slot); - if (*slot) - { - hsa_symbol *sym = (*slot); - - /* If the symbol is problematic, mark current function also as - problematic. */ - if (sym->m_seen_error) - hsa_fail_cfun (); - - /* PR hsa/70234: If a global variable was marked to be emitted, - but HSAIL generation of a function using the variable fails, - we should retry to emit the variable in context of a different - function. - - Iterate elements whether a symbol is already in m_global_symbols - of not. */ - if (is_in_global_vars && !sym->m_emitted_to_brig) - { - for (unsigned i = 0; i < hsa_cfun->m_global_symbols.length (); i++) - if (hsa_cfun->m_global_symbols[i] == sym) - return *slot; - hsa_cfun->m_global_symbols.safe_push (sym); - } - - return *slot; - } - else - { - hsa_symbol *sym; - /* PARM_DECLs and RESULT_DECL should be already in m_local_symbols. */ - gcc_assert (TREE_CODE (decl) == VAR_DECL - || TREE_CODE (decl) == CONST_DECL); - BrigAlignment8_t align = hsa_object_alignment (decl); - - if (is_in_global_vars) - { - gcc_checking_assert (TREE_CODE (decl) != CONST_DECL); - sym = new hsa_symbol (BRIG_TYPE_NONE, BRIG_SEGMENT_GLOBAL, - BRIG_LINKAGE_PROGRAM, true, - BRIG_ALLOCATION_PROGRAM, align); - hsa_cfun->m_global_symbols.safe_push (sym); - sym->fillup_for_decl (decl); - if (sym->m_align > align) - { - sym->m_seen_error = true; - HSA_SORRY_ATV (EXPR_LOCATION (decl), - "HSA specification requires that %E is at least " - "naturally aligned", decl); - } - } - else - { - /* As generation of efficient memory copy instructions relies - on alignment greater or equal to 8 bytes, - we need to increase alignment of all aggregate types.. */ - if (AGGREGATE_TYPE_P (TREE_TYPE (decl))) - align = MAX ((BrigAlignment8_t) BRIG_ALIGNMENT_8, align); - - BrigAllocation allocation = BRIG_ALLOCATION_AUTOMATIC; - BrigSegment8_t segment; - if (TREE_CODE (decl) == CONST_DECL) - { - segment = BRIG_SEGMENT_READONLY; - allocation = BRIG_ALLOCATION_AGENT; - } - else if (lookup_attribute ("hsa_group_segment", - DECL_ATTRIBUTES (decl))) - segment = BRIG_SEGMENT_GROUP; - else if (TREE_STATIC (decl)) - { - segment = BRIG_SEGMENT_GLOBAL; - allocation = BRIG_ALLOCATION_PROGRAM; - } - else if (lookup_attribute ("hsa_global_segment", - DECL_ATTRIBUTES (decl))) - segment = BRIG_SEGMENT_GLOBAL; - else - segment = BRIG_SEGMENT_PRIVATE; - - sym = new hsa_symbol (BRIG_TYPE_NONE, segment, BRIG_LINKAGE_FUNCTION, - false, allocation, align); - sym->fillup_for_decl (decl); - hsa_cfun->m_private_variables.safe_push (sym); - } - - sym->m_name = hsa_get_declaration_name (decl); - *slot = sym; - return sym; - } -} - -/* For a given HSA function declaration, return a host - function declaration. */ - -tree -hsa_get_host_function (tree decl) -{ - hsa_function_summary *s = hsa_summaries->get (cgraph_node::get_create (decl)); - gcc_assert (s->m_gpu_implementation_p); - - return s->m_bound_function ? s->m_bound_function->decl : NULL; -} - -/* Return true if function DECL has a host equivalent function. */ - -static char * -get_brig_function_name (tree decl) -{ - tree d = decl; - - hsa_function_summary *s = hsa_summaries->get (cgraph_node::get_create (d)); - if (s != NULL - && s->m_gpu_implementation_p - && s->m_bound_function) - d = s->m_bound_function->decl; - - /* IPA split can create a function that has no host equivalent. */ - if (d == NULL) - d = decl; - - char *name = xstrdup (hsa_get_declaration_name (d)); - hsa_sanitize_name (name); - - return name; -} - -/* Create a spill symbol of type TYPE. */ - -hsa_symbol * -hsa_get_spill_symbol (BrigType16_t type) -{ - hsa_symbol *sym = new hsa_symbol (type, BRIG_SEGMENT_SPILL, - BRIG_LINKAGE_FUNCTION); - hsa_cfun->m_spill_symbols.safe_push (sym); - return sym; -} - -/* Create a symbol for a read-only string constant. */ -hsa_symbol * -hsa_get_string_cst_symbol (tree string_cst) -{ - gcc_checking_assert (TREE_CODE (string_cst) == STRING_CST); - - hsa_symbol **slot = hsa_cfun->m_string_constants_map.get (string_cst); - if (slot) - return *slot; - - hsa_op_immed *cst = new hsa_op_immed (string_cst); - hsa_symbol *sym = new hsa_symbol (cst->m_type, BRIG_SEGMENT_GLOBAL, - BRIG_LINKAGE_MODULE, true, - BRIG_ALLOCATION_AGENT); - sym->m_cst_value = cst; - sym->m_dim = TREE_STRING_LENGTH (string_cst); - sym->m_name_number = hsa_cfun->m_global_symbols.length (); - - hsa_cfun->m_global_symbols.safe_push (sym); - hsa_cfun->m_string_constants_map.put (string_cst, sym); - return sym; -} - -/* Make the type of a MOV instruction larger if mandated by HSAIL rules. */ - -static void -hsa_fixup_mov_insn_type (hsa_insn_basic *insn) -{ - insn->m_type = hsa_extend_inttype_to_32bit (insn->m_type); - if (insn->m_type == BRIG_TYPE_B8 || insn->m_type == BRIG_TYPE_B16) - insn->m_type = BRIG_TYPE_B32; -} - -/* Constructor of the ancestor of all operands. K is BRIG kind that identified - what the operator is. */ - -hsa_op_base::hsa_op_base (BrigKind16_t k) - : m_next (NULL), m_brig_op_offset (0), m_kind (k) -{ - hsa_operands.safe_push (this); -} - -/* Constructor of ancestor of all operands which have a type. K is BRIG kind - that identified what the operator is. T is the type of the operator. */ - -hsa_op_with_type::hsa_op_with_type (BrigKind16_t k, BrigType16_t t) - : hsa_op_base (k), m_type (t) -{ -} - -hsa_op_with_type * -hsa_op_with_type::get_in_type (BrigType16_t dtype, hsa_bb *hbb) -{ - if (m_type == dtype) - return this; - - hsa_op_reg *dest; - - if (hsa_needs_cvt (dtype, m_type)) - { - dest = new hsa_op_reg (dtype); - hbb->append_insn (new hsa_insn_cvt (dest, this)); - } - else if (is_a <hsa_op_reg *> (this)) - { - /* In the end, HSA registers do not really have types, only sizes, so if - the sizes match, we can use the register directly. */ - gcc_checking_assert (hsa_type_bit_size (dtype) - == hsa_type_bit_size (m_type)); - return this; - } - else - { - dest = new hsa_op_reg (m_type); - - hsa_insn_basic *mov = new hsa_insn_basic (2, BRIG_OPCODE_MOV, - dest->m_type, dest, this); - hsa_fixup_mov_insn_type (mov); - hbb->append_insn (mov); - /* We cannot simply for instance: 'mov_u32 $_3, 48 (s32)' because - type of the operand must be same as type of the instruction. */ - dest->m_type = dtype; - } - - return dest; -} - -/* If this operand has integer type smaller than 32 bits, extend it to 32 bits, - adding instructions to HBB if needed. */ - -hsa_op_with_type * -hsa_op_with_type::extend_int_to_32bit (hsa_bb *hbb) -{ - if (m_type == BRIG_TYPE_U8 || m_type == BRIG_TYPE_U16) - return get_in_type (BRIG_TYPE_U32, hbb); - else if (m_type == BRIG_TYPE_S8 || m_type == BRIG_TYPE_S16) - return get_in_type (BRIG_TYPE_S32, hbb); - else - return this; -} - -/* Constructor of class representing HSA immediate values. TREE_VAL is the - tree representation of the immediate value. If min32int is true, - always expand integer types to one that has at least 32 bits. */ - -hsa_op_immed::hsa_op_immed (tree tree_val, bool min32int) - : hsa_op_with_type (BRIG_KIND_OPERAND_CONSTANT_BYTES, - hsa_type_for_tree_type (TREE_TYPE (tree_val), NULL, - min32int)) -{ - if (hsa_seen_error ()) - return; - - gcc_checking_assert ((is_gimple_min_invariant (tree_val) - && (!POINTER_TYPE_P (TREE_TYPE (tree_val)) - || TREE_CODE (tree_val) == INTEGER_CST)) - || TREE_CODE (tree_val) == CONSTRUCTOR); - m_tree_value = tree_val; - - /* Verify that all elements of a constructor are constants. */ - if (TREE_CODE (m_tree_value) == CONSTRUCTOR) - for (unsigned i = 0; i < CONSTRUCTOR_NELTS (m_tree_value); i++) - { - tree v = CONSTRUCTOR_ELT (m_tree_value, i)->value; - if (!CONSTANT_CLASS_P (v)) - { - HSA_SORRY_AT (EXPR_LOCATION (tree_val), - "HSA ctor should have only constants"); - return; - } - } -} - -/* Constructor of class representing HSA immediate values. INTEGER_VALUE is the - integer representation of the immediate value. TYPE is BRIG type. */ - -hsa_op_immed::hsa_op_immed (HOST_WIDE_INT integer_value, BrigType16_t type) - : hsa_op_with_type (BRIG_KIND_OPERAND_CONSTANT_BYTES, type), - m_tree_value (NULL) -{ - gcc_assert (hsa_type_integer_p (type)); - m_int_value = integer_value; -} - -hsa_op_immed::hsa_op_immed () - : hsa_op_with_type (BRIG_KIND_NONE, BRIG_TYPE_NONE) -{ -} - -/* New operator to allocate immediate operands from obstack. */ - -void * -hsa_op_immed::operator new (size_t size) -{ - return obstack_alloc (&hsa_obstack, size); -} - -/* Destructor. */ - -hsa_op_immed::~hsa_op_immed () -{ -} - -/* Change type of the immediate value to T. */ - -void -hsa_op_immed::set_type (BrigType16_t t) -{ - m_type = t; -} - -/* Constructor of class representing HSA registers and pseudo-registers. T is - the BRIG type of the new register. */ - -hsa_op_reg::hsa_op_reg (BrigType16_t t) - : hsa_op_with_type (BRIG_KIND_OPERAND_REGISTER, t), m_gimple_ssa (NULL_TREE), - m_def_insn (NULL), m_spill_sym (NULL), m_order (hsa_cfun->m_reg_count++), - m_lr_begin (0), m_lr_end (0), m_reg_class (0), m_hard_num (0) -{ -} - -/* New operator to allocate a register from obstack. */ - -void * -hsa_op_reg::operator new (size_t size) -{ - return obstack_alloc (&hsa_obstack, size); -} - -/* Verify register operand. */ - -void -hsa_op_reg::verify_ssa () -{ - /* Verify that each HSA register has a definition assigned. - Exceptions are VAR_DECL and PARM_DECL that are a default - definition. */ - gcc_checking_assert (m_def_insn - || (m_gimple_ssa != NULL - && (!SSA_NAME_VAR (m_gimple_ssa) - || (TREE_CODE (SSA_NAME_VAR (m_gimple_ssa)) - != PARM_DECL)) - && SSA_NAME_IS_DEFAULT_DEF (m_gimple_ssa))); - - /* Verify that every use of the register is really present - in an instruction. */ - for (unsigned i = 0; i < m_uses.length (); i++) - { - hsa_insn_basic *use = m_uses[i]; - - bool is_visited = false; - for (unsigned j = 0; j < use->operand_count (); j++) - { - hsa_op_base *u = use->get_op (j); - hsa_op_address *addr; addr = dyn_cast <hsa_op_address *> (u); - if (addr && addr->m_reg) - u = addr->m_reg; - - if (u == this) - { - bool r = !addr && use->op_output_p (j); - - if (r) - { - error ("HSA SSA name defined by instruction that is supposed " - "to be using it"); - debug_hsa_operand (this); - debug_hsa_insn (use); - internal_error ("HSA SSA verification failed"); - } - - is_visited = true; - } - } - - if (!is_visited) - { - error ("HSA SSA name not among operands of instruction that is " - "supposed to use it"); - debug_hsa_operand (this); - debug_hsa_insn (use); - internal_error ("HSA SSA verification failed"); - } - } -} - -hsa_op_address::hsa_op_address (hsa_symbol *sym, hsa_op_reg *r, - HOST_WIDE_INT offset) - : hsa_op_base (BRIG_KIND_OPERAND_ADDRESS), m_symbol (sym), m_reg (r), - m_imm_offset (offset) -{ -} - -hsa_op_address::hsa_op_address (hsa_symbol *sym, HOST_WIDE_INT offset) - : hsa_op_base (BRIG_KIND_OPERAND_ADDRESS), m_symbol (sym), m_reg (NULL), - m_imm_offset (offset) -{ -} - -hsa_op_address::hsa_op_address (hsa_op_reg *r, HOST_WIDE_INT offset) - : hsa_op_base (BRIG_KIND_OPERAND_ADDRESS), m_symbol (NULL), m_reg (r), - m_imm_offset (offset) -{ -} - -/* New operator to allocate address operands from obstack. */ - -void * -hsa_op_address::operator new (size_t size) -{ - return obstack_alloc (&hsa_obstack, size); -} - -/* Constructor of an operand referring to HSAIL code. */ - -hsa_op_code_ref::hsa_op_code_ref () : hsa_op_base (BRIG_KIND_OPERAND_CODE_REF), - m_directive_offset (0) -{ -} - -/* Constructor of an operand representing a code list. Set it up so that it - can contain ELEMENTS number of elements. */ - -hsa_op_code_list::hsa_op_code_list (unsigned elements) - : hsa_op_base (BRIG_KIND_OPERAND_CODE_LIST) -{ - m_offsets.create (1); - m_offsets.safe_grow_cleared (elements); -} - -/* New operator to allocate code list operands from obstack. */ - -void * -hsa_op_code_list::operator new (size_t size) -{ - return obstack_alloc (&hsa_obstack, size); -} - -/* Constructor of an operand representing an operand list. - Set it up so that it can contain ELEMENTS number of elements. */ - -hsa_op_operand_list::hsa_op_operand_list (unsigned elements) - : hsa_op_base (BRIG_KIND_OPERAND_OPERAND_LIST) -{ - m_offsets.create (elements); - m_offsets.safe_grow (elements); -} - -/* New operator to allocate operand list operands from obstack. */ - -void * -hsa_op_operand_list::operator new (size_t size) -{ - return obstack_alloc (&hsa_obstack, size); -} - -hsa_op_operand_list::~hsa_op_operand_list () -{ - m_offsets.release (); -} - - -hsa_op_reg * -hsa_function_representation::reg_for_gimple_ssa (tree ssa) -{ - hsa_op_reg *hreg; - - gcc_checking_assert (TREE_CODE (ssa) == SSA_NAME); - if (m_ssa_map[SSA_NAME_VERSION (ssa)]) - return m_ssa_map[SSA_NAME_VERSION (ssa)]; - - hreg = new hsa_op_reg (hsa_type_for_scalar_tree_type (TREE_TYPE (ssa), - false)); - hreg->m_gimple_ssa = ssa; - m_ssa_map[SSA_NAME_VERSION (ssa)] = hreg; - - return hreg; -} - -void -hsa_op_reg::set_definition (hsa_insn_basic *insn) -{ - if (hsa_cfun->m_in_ssa) - { - gcc_checking_assert (!m_def_insn); - m_def_insn = insn; - } - else - m_def_insn = NULL; -} - -/* Constructor of the class which is the bases of all instructions and directly - represents the most basic ones. NOPS is the number of operands that the - operand vector will contain (and which will be cleared). OP is the opcode - of the instruction. This constructor does not set type. */ - -hsa_insn_basic::hsa_insn_basic (unsigned nops, int opc) - : m_prev (NULL), - m_next (NULL), m_bb (NULL), m_opcode (opc), m_number (0), - m_type (BRIG_TYPE_NONE), m_brig_offset (0) -{ - if (nops > 0) - m_operands.safe_grow_cleared (nops); - - hsa_instructions.safe_push (this); -} - -/* Make OP the operand number INDEX of operands of this instruction. If OP is a - register or an address containing a register, then either set the definition - of the register to this instruction if it an output operand or add this - instruction to the uses if it is an input one. */ - -void -hsa_insn_basic::set_op (int index, hsa_op_base *op) -{ - /* Each address operand is always use. */ - hsa_op_address *addr = dyn_cast <hsa_op_address *> (op); - if (addr && addr->m_reg) - addr->m_reg->m_uses.safe_push (this); - else - { - hsa_op_reg *reg = dyn_cast <hsa_op_reg *> (op); - if (reg) - { - if (op_output_p (index)) - reg->set_definition (this); - else - reg->m_uses.safe_push (this); - } - } - - m_operands[index] = op; -} - -/* Get INDEX-th operand of the instruction. */ - -hsa_op_base * -hsa_insn_basic::get_op (int index) -{ - return m_operands[index]; -} - -/* Get address of INDEX-th operand of the instruction. */ - -hsa_op_base ** -hsa_insn_basic::get_op_addr (int index) -{ - return &m_operands[index]; -} - -/* Get number of operands of the instruction. */ -unsigned int -hsa_insn_basic::operand_count () -{ - return m_operands.length (); -} - -/* Constructor of the class which is the bases of all instructions and directly - represents the most basic ones. NOPS is the number of operands that the - operand vector will contain (and which will be cleared). OPC is the opcode - of the instruction, T is the type of the instruction. */ - -hsa_insn_basic::hsa_insn_basic (unsigned nops, int opc, BrigType16_t t, - hsa_op_base *arg0, hsa_op_base *arg1, - hsa_op_base *arg2, hsa_op_base *arg3) - : m_prev (NULL), m_next (NULL), m_bb (NULL), m_opcode (opc),m_number (0), - m_type (t), m_brig_offset (0) -{ - if (nops > 0) - m_operands.safe_grow_cleared (nops); - - if (arg0 != NULL) - { - gcc_checking_assert (nops >= 1); - set_op (0, arg0); - } - - if (arg1 != NULL) - { - gcc_checking_assert (nops >= 2); - set_op (1, arg1); - } - - if (arg2 != NULL) - { - gcc_checking_assert (nops >= 3); - set_op (2, arg2); - } - - if (arg3 != NULL) - { - gcc_checking_assert (nops >= 4); - set_op (3, arg3); - } - - hsa_instructions.safe_push (this); -} - -/* New operator to allocate basic instruction from obstack. */ - -void * -hsa_insn_basic::operator new (size_t size) -{ - return obstack_alloc (&hsa_obstack, size); -} - -/* Verify the instruction. */ - -void -hsa_insn_basic::verify () -{ - hsa_op_address *addr; - hsa_op_reg *reg; - - /* Iterate all register operands and verify that the instruction - is set in uses of the register. */ - for (unsigned i = 0; i < operand_count (); i++) - { - hsa_op_base *use = get_op (i); - - if ((addr = dyn_cast <hsa_op_address *> (use)) && addr->m_reg) - { - gcc_assert (addr->m_reg->m_def_insn != this); - use = addr->m_reg; - } - - if ((reg = dyn_cast <hsa_op_reg *> (use)) && !op_output_p (i)) - { - unsigned j; - for (j = 0; j < reg->m_uses.length (); j++) - { - if (reg->m_uses[j] == this) - break; - } - - if (j == reg->m_uses.length ()) - { - error ("HSA instruction uses a register but is not among " - "recorded register uses"); - debug_hsa_operand (reg); - debug_hsa_insn (this); - internal_error ("HSA instruction verification failed"); - } - } - } -} - -/* Constructor of an instruction representing a PHI node. NOPS is the number - of operands (equal to the number of predecessors). */ - -hsa_insn_phi::hsa_insn_phi (unsigned nops, hsa_op_reg *dst) - : hsa_insn_basic (nops, HSA_OPCODE_PHI), m_dest (dst) -{ - dst->set_definition (this); -} - -/* Constructor of class representing instructions for control flow and - sychronization, */ - -hsa_insn_br::hsa_insn_br (unsigned nops, int opc, BrigType16_t t, - BrigWidth8_t width, hsa_op_base *arg0, - hsa_op_base *arg1, hsa_op_base *arg2, - hsa_op_base *arg3) - : hsa_insn_basic (nops, opc, t, arg0, arg1, arg2, arg3), - m_width (width) -{ -} - -/* Constructor of class representing instruction for conditional jump, CTRL is - the control register determining whether the jump will be carried out, the - new instruction is automatically added to its uses list. */ - -hsa_insn_cbr::hsa_insn_cbr (hsa_op_reg *ctrl) - : hsa_insn_br (1, BRIG_OPCODE_CBR, BRIG_TYPE_B1, BRIG_WIDTH_1, ctrl) -{ -} - -/* Constructor of class representing instruction for switch jump, CTRL is - the index register. */ - -hsa_insn_sbr::hsa_insn_sbr (hsa_op_reg *index, unsigned jump_count) - : hsa_insn_basic (1, BRIG_OPCODE_SBR, BRIG_TYPE_B1, index), - m_width (BRIG_WIDTH_1), m_jump_table (vNULL), - m_label_code_list (new hsa_op_code_list (jump_count)) -{ -} - -/* Replace all occurrences of OLD_BB with NEW_BB in the statements - jump table. */ - -void -hsa_insn_sbr::replace_all_labels (basic_block old_bb, basic_block new_bb) -{ - for (unsigned i = 0; i < m_jump_table.length (); i++) - if (m_jump_table[i] == old_bb) - m_jump_table[i] = new_bb; -} - -hsa_insn_sbr::~hsa_insn_sbr () -{ - m_jump_table.release (); -} - -/* Constructor of comparison instruction. CMP is the comparison operation and T - is the result type. */ - -hsa_insn_cmp::hsa_insn_cmp (BrigCompareOperation8_t cmp, BrigType16_t t, - hsa_op_base *arg0, hsa_op_base *arg1, - hsa_op_base *arg2) - : hsa_insn_basic (3 , BRIG_OPCODE_CMP, t, arg0, arg1, arg2), m_compare (cmp) -{ -} - -/* Constructor of classes representing memory accesses. OPC is the opcode (must - be BRIG_OPCODE_ST or BRIG_OPCODE_LD) and T is the type. The instruction - operands are provided as ARG0 and ARG1. */ - -hsa_insn_mem::hsa_insn_mem (int opc, BrigType16_t t, hsa_op_base *arg0, - hsa_op_base *arg1) - : hsa_insn_basic (2, opc, t, arg0, arg1), - m_align (hsa_natural_alignment (t)), m_equiv_class (0) -{ - gcc_checking_assert (opc == BRIG_OPCODE_LD || opc == BRIG_OPCODE_ST); -} - -/* Constructor for descendants allowing different opcodes and number of - operands, it passes its arguments directly to hsa_insn_basic - constructor. The instruction operands are provided as ARG[0-3]. */ - - -hsa_insn_mem::hsa_insn_mem (unsigned nops, int opc, BrigType16_t t, - hsa_op_base *arg0, hsa_op_base *arg1, - hsa_op_base *arg2, hsa_op_base *arg3) - : hsa_insn_basic (nops, opc, t, arg0, arg1, arg2, arg3), - m_align (hsa_natural_alignment (t)), m_equiv_class (0) -{ -} - -/* Constructor of class representing atomic instructions. OPC is the principal - opcode, AOP is the specific atomic operation opcode. T is the type of the - instruction. The instruction operands are provided as ARG[0-3]. */ - -hsa_insn_atomic::hsa_insn_atomic (int nops, int opc, - enum BrigAtomicOperation aop, - BrigType16_t t, BrigMemoryOrder memorder, - hsa_op_base *arg0, - hsa_op_base *arg1, hsa_op_base *arg2, - hsa_op_base *arg3) - : hsa_insn_mem (nops, opc, t, arg0, arg1, arg2, arg3), m_atomicop (aop), - m_memoryorder (memorder), - m_memoryscope (BRIG_MEMORY_SCOPE_SYSTEM) -{ - gcc_checking_assert (opc == BRIG_OPCODE_ATOMICNORET || - opc == BRIG_OPCODE_ATOMIC || - opc == BRIG_OPCODE_SIGNAL || - opc == BRIG_OPCODE_SIGNALNORET); -} - -/* Constructor of class representing signal instructions. OPC is the prinicpal - opcode, SOP is the specific signal operation opcode. T is the type of the - instruction. The instruction operands are provided as ARG[0-3]. */ - -hsa_insn_signal::hsa_insn_signal (int nops, int opc, - enum BrigAtomicOperation sop, - BrigType16_t t, BrigMemoryOrder memorder, - hsa_op_base *arg0, hsa_op_base *arg1, - hsa_op_base *arg2, hsa_op_base *arg3) - : hsa_insn_basic (nops, opc, t, arg0, arg1, arg2, arg3), - m_memory_order (memorder), m_signalop (sop) -{ -} - -/* Constructor of class representing segment conversion instructions. OPC is - the opcode which must be either BRIG_OPCODE_STOF or BRIG_OPCODE_FTOS. DEST - and SRCT are destination and source types respectively, SEG is the segment - we are converting to or from. The instruction operands are - provided as ARG0 and ARG1. */ - -hsa_insn_seg::hsa_insn_seg (int opc, BrigType16_t dest, BrigType16_t srct, - BrigSegment8_t seg, hsa_op_base *arg0, - hsa_op_base *arg1) - : hsa_insn_basic (2, opc, dest, arg0, arg1), m_src_type (srct), - m_segment (seg) -{ - gcc_checking_assert (opc == BRIG_OPCODE_STOF || opc == BRIG_OPCODE_FTOS); -} - -/* Constructor of class representing a call instruction. CALLEE is the tree - representation of the function being called. */ - -hsa_insn_call::hsa_insn_call (tree callee) - : hsa_insn_basic (0, BRIG_OPCODE_CALL), m_called_function (callee), - m_output_arg (NULL), m_args_code_list (NULL), m_result_code_list (NULL) -{ -} - -hsa_insn_call::hsa_insn_call (hsa_internal_fn *fn) - : hsa_insn_basic (0, BRIG_OPCODE_CALL), m_called_function (NULL), - m_called_internal_fn (fn), m_output_arg (NULL), m_args_code_list (NULL), - m_result_code_list (NULL) -{ -} - -hsa_insn_call::~hsa_insn_call () -{ - for (unsigned i = 0; i < m_input_args.length (); i++) - delete m_input_args[i]; - - delete m_output_arg; - - m_input_args.release (); - m_input_arg_insns.release (); -} - -/* Constructor of class representing the argument block required to invoke - a call in HSAIL. */ -hsa_insn_arg_block::hsa_insn_arg_block (BrigKind brig_kind, - hsa_insn_call * call) - : hsa_insn_basic (0, HSA_OPCODE_ARG_BLOCK), m_kind (brig_kind), - m_call_insn (call) -{ -} - -hsa_insn_comment::hsa_insn_comment (const char *s) - : hsa_insn_basic (0, BRIG_KIND_DIRECTIVE_COMMENT) -{ - unsigned l = strlen (s); - - /* Append '// ' to the string. */ - char *buf = XNEWVEC (char, l + 4); - sprintf (buf, "// %s", s); - m_comment = buf; -} - -hsa_insn_comment::~hsa_insn_comment () -{ - gcc_checking_assert (m_comment); - free (m_comment); - m_comment = NULL; -} - -/* Constructor of class representing the queue instruction in HSAIL. */ - -hsa_insn_queue::hsa_insn_queue (int nops, int opcode, BrigSegment segment, - BrigMemoryOrder memory_order, - hsa_op_base *arg0, hsa_op_base *arg1, - hsa_op_base *arg2, hsa_op_base *arg3) - : hsa_insn_basic (nops, opcode, BRIG_TYPE_U64, arg0, arg1, arg2, arg3), - m_segment (segment), m_memory_order (memory_order) -{ -} - -/* Constructor of class representing the source type instruction in HSAIL. */ - -hsa_insn_srctype::hsa_insn_srctype (int nops, BrigOpcode opcode, - BrigType16_t destt, BrigType16_t srct, - hsa_op_base *arg0, hsa_op_base *arg1, - hsa_op_base *arg2 = NULL) - : hsa_insn_basic (nops, opcode, destt, arg0, arg1, arg2), - m_source_type (srct) -{} - -/* Constructor of class representing the packed instruction in HSAIL. */ - -hsa_insn_packed::hsa_insn_packed (int nops, BrigOpcode opcode, - BrigType16_t destt, BrigType16_t srct, - hsa_op_base *arg0, hsa_op_base *arg1, - hsa_op_base *arg2) - : hsa_insn_srctype (nops, opcode, destt, srct, arg0, arg1, arg2) -{ - m_operand_list = new hsa_op_operand_list (nops - 1); -} - -/* Constructor of class representing the convert instruction in HSAIL. */ - -hsa_insn_cvt::hsa_insn_cvt (hsa_op_with_type *dest, hsa_op_with_type *src) - : hsa_insn_basic (2, BRIG_OPCODE_CVT, dest->m_type, dest, src) -{ -} - -/* Constructor of class representing the alloca in HSAIL. */ - -hsa_insn_alloca::hsa_insn_alloca (hsa_op_with_type *dest, - hsa_op_with_type *size, unsigned alignment) - : hsa_insn_basic (2, BRIG_OPCODE_ALLOCA, dest->m_type, dest, size), - m_align (BRIG_ALIGNMENT_8) -{ - gcc_assert (dest->m_type == BRIG_TYPE_U32); - if (alignment) - m_align = hsa_alignment_encoding (alignment); -} - -/* Append an instruction INSN into the basic block. */ - -void -hsa_bb::append_insn (hsa_insn_basic *insn) -{ - gcc_assert (insn->m_opcode != 0 || insn->operand_count () == 0); - gcc_assert (!insn->m_bb); - - insn->m_bb = m_bb; - insn->m_prev = m_last_insn; - insn->m_next = NULL; - if (m_last_insn) - m_last_insn->m_next = insn; - m_last_insn = insn; - if (!m_first_insn) - m_first_insn = insn; -} - -void -hsa_bb::append_phi (hsa_insn_phi *hphi) -{ - hphi->m_bb = m_bb; - - hphi->m_prev = m_last_phi; - hphi->m_next = NULL; - if (m_last_phi) - m_last_phi->m_next = hphi; - m_last_phi = hphi; - if (!m_first_phi) - m_first_phi = hphi; -} - -/* Insert HSA instruction NEW_INSN immediately before an existing instruction - OLD_INSN. */ - -static void -hsa_insert_insn_before (hsa_insn_basic *new_insn, hsa_insn_basic *old_insn) -{ - hsa_bb *hbb = hsa_bb_for_bb (old_insn->m_bb); - - if (hbb->m_first_insn == old_insn) - hbb->m_first_insn = new_insn; - new_insn->m_prev = old_insn->m_prev; - new_insn->m_next = old_insn; - if (old_insn->m_prev) - old_insn->m_prev->m_next = new_insn; - old_insn->m_prev = new_insn; -} - -/* Append HSA instruction NEW_INSN immediately after an existing instruction - OLD_INSN. */ - -static void -hsa_append_insn_after (hsa_insn_basic *new_insn, hsa_insn_basic *old_insn) -{ - hsa_bb *hbb = hsa_bb_for_bb (old_insn->m_bb); - - if (hbb->m_last_insn == old_insn) - hbb->m_last_insn = new_insn; - new_insn->m_prev = old_insn; - new_insn->m_next = old_insn->m_next; - if (old_insn->m_next) - old_insn->m_next->m_prev = new_insn; - old_insn->m_next = new_insn; -} - -/* Return a register containing the calculated value of EXP which must be an - expression consisting of PLUS_EXPRs, MULT_EXPRs, NOP_EXPRs, SSA_NAMEs and - integer constants as returned by get_inner_reference. - Newly generated HSA instructions will be appended to HBB. - Perform all calculations in ADDRTYPE. */ - -static hsa_op_with_type * -gen_address_calculation (tree exp, hsa_bb *hbb, BrigType16_t addrtype) -{ - int opcode; - - if (TREE_CODE (exp) == NOP_EXPR) - exp = TREE_OPERAND (exp, 0); - - switch (TREE_CODE (exp)) - { - case SSA_NAME: - return hsa_cfun->reg_for_gimple_ssa (exp)->get_in_type (addrtype, hbb); - - case INTEGER_CST: - { - hsa_op_immed *imm = new hsa_op_immed (exp); - if (addrtype != imm->m_type) - imm->m_type = addrtype; - return imm; - } - - case PLUS_EXPR: - opcode = BRIG_OPCODE_ADD; - break; - - case MULT_EXPR: - opcode = BRIG_OPCODE_MUL; - break; - - default: - gcc_unreachable (); - } - - hsa_op_reg *res = new hsa_op_reg (addrtype); - hsa_insn_basic *insn = new hsa_insn_basic (3, opcode, addrtype); - insn->set_op (0, res); - - hsa_op_with_type *op1 = gen_address_calculation (TREE_OPERAND (exp, 0), hbb, - addrtype); - hsa_op_with_type *op2 = gen_address_calculation (TREE_OPERAND (exp, 1), hbb, - addrtype); - insn->set_op (1, op1); - insn->set_op (2, op2); - - hbb->append_insn (insn); - return res; -} - -/* If R1 is NULL, just return R2, otherwise append an instruction adding them - to HBB and return the register holding the result. */ - -static hsa_op_reg * -add_addr_regs_if_needed (hsa_op_reg *r1, hsa_op_reg *r2, hsa_bb *hbb) -{ - gcc_checking_assert (r2); - if (!r1) - return r2; - - hsa_op_reg *res = new hsa_op_reg (r1->m_type); - gcc_assert (!hsa_needs_cvt (r1->m_type, r2->m_type)); - hsa_insn_basic *insn = new hsa_insn_basic (3, BRIG_OPCODE_ADD, res->m_type); - insn->set_op (0, res); - insn->set_op (1, r1); - insn->set_op (2, r2); - hbb->append_insn (insn); - return res; -} - -/* Helper of gen_hsa_addr. Update *SYMBOL, *ADDRTYPE, *REG and *OFFSET to - reflect BASE which is the first operand of a MEM_REF or a TARGET_MEM_REF. */ - -static void -process_mem_base (tree base, hsa_symbol **symbol, BrigType16_t *addrtype, - hsa_op_reg **reg, offset_int *offset, hsa_bb *hbb) -{ - if (TREE_CODE (base) == SSA_NAME) - { - gcc_assert (!*reg); - hsa_op_with_type *ssa - = hsa_cfun->reg_for_gimple_ssa (base)->get_in_type (*addrtype, hbb); - *reg = dyn_cast <hsa_op_reg *> (ssa); - } - else if (TREE_CODE (base) == ADDR_EXPR) - { - tree decl = TREE_OPERAND (base, 0); - - if (!DECL_P (decl) || TREE_CODE (decl) == FUNCTION_DECL) - { - HSA_SORRY_AT (EXPR_LOCATION (base), - "support for HSA does not implement a memory reference " - "to a non-declaration type"); - return; - } - - gcc_assert (!*symbol); - - *symbol = get_symbol_for_decl (decl); - *addrtype = hsa_get_segment_addr_type ((*symbol)->m_segment); - } - else if (TREE_CODE (base) == INTEGER_CST) - *offset += wi::to_offset (base); - else - gcc_unreachable (); -} - -/* Forward declaration of a function. */ - -static void -gen_hsa_addr_insns (tree val, hsa_op_reg *dest, hsa_bb *hbb); - -/* Generate HSA address operand for a given tree memory reference REF. If - instructions need to be created to calculate the address, they will be added - to the end of HBB. If a caller provider OUTPUT_BITSIZE and OUTPUT_BITPOS, - the function assumes that the caller will handle possible - bit-field references. Otherwise if we reference a bit-field, sorry message - is displayed. */ - -static hsa_op_address * -gen_hsa_addr (tree ref, hsa_bb *hbb, HOST_WIDE_INT *output_bitsize = NULL, - HOST_WIDE_INT *output_bitpos = NULL) -{ - hsa_symbol *symbol = NULL; - hsa_op_reg *reg = NULL; - offset_int offset = 0; - tree origref = ref; - tree varoffset = NULL_TREE; - BrigType16_t addrtype = hsa_get_segment_addr_type (BRIG_SEGMENT_FLAT); - HOST_WIDE_INT bitsize = 0, bitpos = 0; - BrigType16_t flat_addrtype = hsa_get_segment_addr_type (BRIG_SEGMENT_FLAT); - - if (TREE_CODE (ref) == STRING_CST) - { - symbol = hsa_get_string_cst_symbol (ref); - goto out; - } - else if (TREE_CODE (ref) == BIT_FIELD_REF - && (!multiple_p (bit_field_size (ref), BITS_PER_UNIT) - || !multiple_p (bit_field_offset (ref), BITS_PER_UNIT))) - { - HSA_SORRY_ATV (EXPR_LOCATION (origref), - "support for HSA does not implement " - "bit field references such as %E", ref); - goto out; - } - - if (handled_component_p (ref)) - { - machine_mode mode; - int unsignedp, volatilep, preversep; - poly_int64 pbitsize, pbitpos; - tree new_ref; - - new_ref = get_inner_reference (ref, &pbitsize, &pbitpos, &varoffset, - &mode, &unsignedp, &preversep, - &volatilep); - /* When this isn't true, the switch below will report an - appropriate error. */ - if (pbitsize.is_constant () && pbitpos.is_constant ()) - { - bitsize = pbitsize.to_constant (); - bitpos = pbitpos.to_constant (); - ref = new_ref; - offset = bitpos; - offset = wi::rshift (offset, LOG2_BITS_PER_UNIT, SIGNED); - } - } - - switch (TREE_CODE (ref)) - { - case ADDR_EXPR: - { - addrtype = hsa_get_segment_addr_type (BRIG_SEGMENT_PRIVATE); - symbol = hsa_cfun->create_hsa_temporary (flat_addrtype); - hsa_op_reg *r = new hsa_op_reg (flat_addrtype); - gen_hsa_addr_insns (ref, r, hbb); - hbb->append_insn (new hsa_insn_mem (BRIG_OPCODE_ST, r->m_type, - r, new hsa_op_address (symbol))); - - break; - } - case SSA_NAME: - { - addrtype = hsa_get_segment_addr_type (BRIG_SEGMENT_PRIVATE); - hsa_op_with_type *r = hsa_cfun->reg_for_gimple_ssa (ref); - if (r->m_type == BRIG_TYPE_B1) - r = r->get_in_type (BRIG_TYPE_U32, hbb); - symbol = hsa_cfun->create_hsa_temporary (r->m_type); - - hbb->append_insn (new hsa_insn_mem (BRIG_OPCODE_ST, r->m_type, - r, new hsa_op_address (symbol))); - - break; - } - case PARM_DECL: - case VAR_DECL: - case RESULT_DECL: - case CONST_DECL: - gcc_assert (!symbol); - symbol = get_symbol_for_decl (ref); - addrtype = hsa_get_segment_addr_type (symbol->m_segment); - break; - - case MEM_REF: - process_mem_base (TREE_OPERAND (ref, 0), &symbol, &addrtype, ®, - &offset, hbb); - - if (!integer_zerop (TREE_OPERAND (ref, 1))) - offset += wi::to_offset (TREE_OPERAND (ref, 1)); - break; - - case TARGET_MEM_REF: - process_mem_base (TMR_BASE (ref), &symbol, &addrtype, ®, &offset, hbb); - if (TMR_INDEX (ref)) - { - hsa_op_reg *disp1; - hsa_op_base *idx = hsa_cfun->reg_for_gimple_ssa - (TMR_INDEX (ref))->get_in_type (addrtype, hbb); - if (TMR_STEP (ref) && !integer_onep (TMR_STEP (ref))) - { - disp1 = new hsa_op_reg (addrtype); - hsa_insn_basic *insn = new hsa_insn_basic (3, BRIG_OPCODE_MUL, - addrtype); - - /* As step must respect addrtype, we overwrite the type - of an immediate value. */ - hsa_op_immed *step = new hsa_op_immed (TMR_STEP (ref)); - step->m_type = addrtype; - - insn->set_op (0, disp1); - insn->set_op (1, idx); - insn->set_op (2, step); - hbb->append_insn (insn); - } - else - disp1 = as_a <hsa_op_reg *> (idx); - reg = add_addr_regs_if_needed (reg, disp1, hbb); - } - if (TMR_INDEX2 (ref)) - { - if (TREE_CODE (TMR_INDEX2 (ref)) == SSA_NAME) - { - hsa_op_base *disp2 = hsa_cfun->reg_for_gimple_ssa - (TMR_INDEX2 (ref))->get_in_type (addrtype, hbb); - reg = add_addr_regs_if_needed (reg, as_a <hsa_op_reg *> (disp2), - hbb); - } - else if (TREE_CODE (TMR_INDEX2 (ref)) == INTEGER_CST) - offset += wi::to_offset (TMR_INDEX2 (ref)); - else - gcc_unreachable (); - } - offset += wi::to_offset (TMR_OFFSET (ref)); - break; - case FUNCTION_DECL: - HSA_SORRY_AT (EXPR_LOCATION (origref), - "support for HSA does not implement function pointers"); - goto out; - default: - HSA_SORRY_ATV (EXPR_LOCATION (origref), "support for HSA does " - "not implement memory access to %E", origref); - goto out; - } - - if (varoffset) - { - if (TREE_CODE (varoffset) == INTEGER_CST) - offset += wi::to_offset (varoffset); - else - { - hsa_op_base *off_op = gen_address_calculation (varoffset, hbb, - addrtype); - reg = add_addr_regs_if_needed (reg, as_a <hsa_op_reg *> (off_op), - hbb); - } - } - - gcc_checking_assert ((symbol - && addrtype - == hsa_get_segment_addr_type (symbol->m_segment)) - || (!symbol - && addrtype - == hsa_get_segment_addr_type (BRIG_SEGMENT_FLAT))); -out: - HOST_WIDE_INT hwi_offset = offset.to_shwi (); - - /* Calculate remaining bitsize offset (if presented). */ - bitpos %= BITS_PER_UNIT; - /* If bitsize is a power of two that is greater or equal to BITS_PER_UNIT, it - is not a reason to think this is a bit-field access. */ - if (bitpos == 0 - && (bitsize >= BITS_PER_UNIT) - && !(bitsize & (bitsize - 1))) - bitsize = 0; - - if ((bitpos || bitsize) && (output_bitpos == NULL || output_bitsize == NULL)) - HSA_SORRY_ATV (EXPR_LOCATION (origref), "support for HSA does not " - "implement unhandled bit field reference such as %E", ref); - - if (output_bitsize != NULL && output_bitpos != NULL) - { - *output_bitsize = bitsize; - *output_bitpos = bitpos; - } - - return new hsa_op_address (symbol, reg, hwi_offset); -} - -/* Generate HSA address operand for a given tree memory reference REF. If - instructions need to be created to calculate the address, they will be added - to the end of HBB. OUTPUT_ALIGN is alignment of the created address. */ - -static hsa_op_address * -gen_hsa_addr_with_align (tree ref, hsa_bb *hbb, BrigAlignment8_t *output_align) -{ - hsa_op_address *addr = gen_hsa_addr (ref, hbb); - if (addr->m_reg || !addr->m_symbol) - *output_align = hsa_object_alignment (ref); - else - { - /* If the address consists only of a symbol and an offset, we - compute the alignment ourselves to take into account any alignment - promotions we might have done for the HSA symbol representation. */ - unsigned align = hsa_byte_alignment (addr->m_symbol->m_align); - unsigned misalign = addr->m_imm_offset & (align - 1); - if (misalign) - align = least_bit_hwi (misalign); - *output_align = hsa_alignment_encoding (BITS_PER_UNIT * align); - } - return addr; -} - -/* Generate HSA address for a function call argument of given TYPE. - INDEX is used to generate corresponding name of the arguments. - Special value -1 represents fact that result value is created. */ - -static hsa_op_address * -gen_hsa_addr_for_arg (tree tree_type, int index) -{ - hsa_symbol *sym = new hsa_symbol (BRIG_TYPE_NONE, BRIG_SEGMENT_ARG, - BRIG_LINKAGE_ARG); - sym->m_type = hsa_type_for_tree_type (tree_type, &sym->m_dim); - - if (index == -1) /* Function result. */ - sym->m_name = "res"; - else /* Function call arguments. */ - { - sym->m_name = NULL; - sym->m_name_number = index; - } - - return new hsa_op_address (sym); -} - -/* Generate HSA instructions that process all necessary conversions - of an ADDR to flat addressing and place the result into DEST. - Instructions are appended to HBB. */ - -static void -convert_addr_to_flat_segment (hsa_op_address *addr, hsa_op_reg *dest, - hsa_bb *hbb) -{ - hsa_insn_basic *insn = new hsa_insn_basic (2, BRIG_OPCODE_LDA); - insn->set_op (1, addr); - if (addr->m_symbol && addr->m_symbol->m_segment != BRIG_SEGMENT_GLOBAL) - { - /* LDA produces segment-relative address, we need to convert - it to the flat one. */ - hsa_op_reg *tmp; - tmp = new hsa_op_reg (hsa_get_segment_addr_type - (addr->m_symbol->m_segment)); - hsa_insn_seg *seg; - seg = new hsa_insn_seg (BRIG_OPCODE_STOF, - hsa_get_segment_addr_type (BRIG_SEGMENT_FLAT), - tmp->m_type, addr->m_symbol->m_segment, dest, - tmp); - - insn->set_op (0, tmp); - insn->m_type = tmp->m_type; - hbb->append_insn (insn); - hbb->append_insn (seg); - } - else - { - insn->set_op (0, dest); - insn->m_type = hsa_get_segment_addr_type (BRIG_SEGMENT_FLAT); - hbb->append_insn (insn); - } -} - -/* Generate HSA instructions that calculate address of VAL including all - necessary conversions to flat addressing and place the result into DEST. - Instructions are appended to HBB. */ - -static void -gen_hsa_addr_insns (tree val, hsa_op_reg *dest, hsa_bb *hbb) -{ - /* Handle cases like tmp = NULL, where we just emit a move instruction - to a register. */ - if (TREE_CODE (val) == INTEGER_CST) - { - hsa_op_immed *c = new hsa_op_immed (val); - hsa_insn_basic *insn = new hsa_insn_basic (2, BRIG_OPCODE_MOV, - dest->m_type, dest, c); - hbb->append_insn (insn); - return; - } - - hsa_op_address *addr; - - gcc_assert (dest->m_type == hsa_get_segment_addr_type (BRIG_SEGMENT_FLAT)); - if (TREE_CODE (val) == ADDR_EXPR) - val = TREE_OPERAND (val, 0); - addr = gen_hsa_addr (val, hbb); - - if (TREE_CODE (val) == CONST_DECL - && is_gimple_reg_type (TREE_TYPE (val))) - { - gcc_assert (addr->m_symbol - && addr->m_symbol->m_segment == BRIG_SEGMENT_READONLY); - /* CONST_DECLs are in readonly segment which however does not have - addresses convertible to flat segments. So copy it to a private one - and take address of that. */ - BrigType16_t csttype - = mem_type_for_type (hsa_type_for_scalar_tree_type (TREE_TYPE (val), - false)); - hsa_op_reg *r = new hsa_op_reg (csttype); - hbb->append_insn (new hsa_insn_mem (BRIG_OPCODE_LD, csttype, r, - new hsa_op_address (addr->m_symbol))); - hsa_symbol *copysym = hsa_cfun->create_hsa_temporary (csttype); - hbb->append_insn (new hsa_insn_mem (BRIG_OPCODE_ST, csttype, r, - new hsa_op_address (copysym))); - addr->m_symbol = copysym; - } - else if (addr->m_symbol && addr->m_symbol->m_segment == BRIG_SEGMENT_READONLY) - { - HSA_SORRY_ATV (EXPR_LOCATION (val), "support for HSA does " - "not implement taking addresses of complex " - "%<CONST_DECL%> such as %E", val); - return; - } - - - convert_addr_to_flat_segment (addr, dest, hbb); -} - -/* Return an HSA register or HSA immediate value operand corresponding to - gimple operand OP. */ - -static hsa_op_with_type * -hsa_reg_or_immed_for_gimple_op (tree op, hsa_bb *hbb) -{ - hsa_op_reg *tmp; - - if (TREE_CODE (op) == SSA_NAME) - tmp = hsa_cfun->reg_for_gimple_ssa (op); - else if (!POINTER_TYPE_P (TREE_TYPE (op))) - return new hsa_op_immed (op); - else - { - tmp = new hsa_op_reg (hsa_get_segment_addr_type (BRIG_SEGMENT_FLAT)); - gen_hsa_addr_insns (op, tmp, hbb); - } - return tmp; -} - -/* Create a simple movement instruction with register destination DEST and - register or immediate source SRC and append it to the end of HBB. */ - -void -hsa_build_append_simple_mov (hsa_op_reg *dest, hsa_op_base *src, hsa_bb *hbb) -{ - /* Moves of packed data between registers need to adhere to the same type - rules like when dealing with memory. */ - BrigType16_t tp = mem_type_for_type (dest->m_type); - hsa_insn_basic *insn = new hsa_insn_basic (2, BRIG_OPCODE_MOV, tp, dest, src); - hsa_fixup_mov_insn_type (insn); - unsigned dest_size = hsa_type_bit_size (dest->m_type); - if (hsa_op_reg *sreg = dyn_cast <hsa_op_reg *> (src)) - gcc_assert (dest_size == hsa_type_bit_size (sreg->m_type)); - else - { - unsigned imm_size - = hsa_type_bit_size (as_a <hsa_op_immed *> (src)->m_type); - gcc_assert ((dest_size == imm_size) - /* Eventually < 32bit registers will be promoted to 32bit. */ - || (dest_size < 32 && imm_size == 32)); - } - hbb->append_insn (insn); -} - -/* Generate HSAIL instructions loading a bit field into register DEST. - VALUE_REG is a register of a SSA name that is used in the bit field - reference. To identify a bit field BITPOS is offset to the loaded memory - and BITSIZE is number of bits of the bit field. - Add instructions to HBB. */ - -static void -gen_hsa_insns_for_bitfield (hsa_op_reg *dest, hsa_op_reg *value_reg, - HOST_WIDE_INT bitsize, HOST_WIDE_INT bitpos, - hsa_bb *hbb) -{ - unsigned type_bitsize - = hsa_type_bit_size (hsa_extend_inttype_to_32bit (dest->m_type)); - unsigned left_shift = type_bitsize - (bitsize + bitpos); - unsigned right_shift = left_shift + bitpos; - - if (left_shift) - { - hsa_op_reg *value_reg_2 - = new hsa_op_reg (hsa_extend_inttype_to_32bit (dest->m_type)); - hsa_op_immed *c = new hsa_op_immed (left_shift, BRIG_TYPE_U32); - - hsa_insn_basic *lshift - = new hsa_insn_basic (3, BRIG_OPCODE_SHL, value_reg_2->m_type, - value_reg_2, value_reg, c); - - hbb->append_insn (lshift); - - value_reg = value_reg_2; - } - - if (right_shift) - { - hsa_op_reg *value_reg_2 - = new hsa_op_reg (hsa_extend_inttype_to_32bit (dest->m_type)); - hsa_op_immed *c = new hsa_op_immed (right_shift, BRIG_TYPE_U32); - - hsa_insn_basic *rshift - = new hsa_insn_basic (3, BRIG_OPCODE_SHR, value_reg_2->m_type, - value_reg_2, value_reg, c); - - hbb->append_insn (rshift); - - value_reg = value_reg_2; - } - - hsa_insn_basic *assignment - = new hsa_insn_basic (2, BRIG_OPCODE_MOV, dest->m_type, NULL, value_reg); - hsa_fixup_mov_insn_type (assignment); - hbb->append_insn (assignment); - assignment->set_output_in_type (dest, 0, hbb); -} - - -/* Generate HSAIL instructions loading a bit field into register DEST. ADDR is - prepared memory address which is used to load the bit field. To identify a - bit field BITPOS is offset to the loaded memory and BITSIZE is number of - bits of the bit field. Add instructions to HBB. Load must be performed in - alignment ALIGN. */ - -static void -gen_hsa_insns_for_bitfield_load (hsa_op_reg *dest, hsa_op_address *addr, - HOST_WIDE_INT bitsize, HOST_WIDE_INT bitpos, - hsa_bb *hbb, BrigAlignment8_t align) -{ - hsa_op_reg *value_reg = new hsa_op_reg (dest->m_type); - hsa_insn_mem *mem - = new hsa_insn_mem (BRIG_OPCODE_LD, - hsa_extend_inttype_to_32bit (dest->m_type), - value_reg, addr); - mem->set_align (align); - hbb->append_insn (mem); - gen_hsa_insns_for_bitfield (dest, value_reg, bitsize, bitpos, hbb); -} - -/* Return the alignment of base memory accesses we issue to perform bit-field - memory access REF. */ - -static BrigAlignment8_t -hsa_bitmemref_alignment (tree ref) -{ - unsigned HOST_WIDE_INT bit_offset = 0; - - while (true) - { - if (TREE_CODE (ref) == BIT_FIELD_REF) - { - if (!tree_fits_uhwi_p (TREE_OPERAND (ref, 2))) - return BRIG_ALIGNMENT_1; - bit_offset += tree_to_uhwi (TREE_OPERAND (ref, 2)); - } - else if (TREE_CODE (ref) == COMPONENT_REF - && DECL_BIT_FIELD (TREE_OPERAND (ref, 1))) - bit_offset += int_bit_position (TREE_OPERAND (ref, 1)); - else - break; - ref = TREE_OPERAND (ref, 0); - } - - unsigned HOST_WIDE_INT bits = bit_offset % BITS_PER_UNIT; - unsigned HOST_WIDE_INT byte_bits = bit_offset - bits; - BrigAlignment8_t base = hsa_object_alignment (ref); - if (byte_bits == 0) - return base; - return MIN (base, hsa_alignment_encoding (least_bit_hwi (byte_bits))); -} - -/* Generate HSAIL instructions loading something into register DEST. RHS is - tree representation of the loaded data, which are loaded as type TYPE. Add - instructions to HBB. */ - -static void -gen_hsa_insns_for_load (hsa_op_reg *dest, tree rhs, tree type, hsa_bb *hbb) -{ - /* The destination SSA name will give us the type. */ - if (TREE_CODE (rhs) == VIEW_CONVERT_EXPR) - rhs = TREE_OPERAND (rhs, 0); - - if (TREE_CODE (rhs) == SSA_NAME) - { - hsa_op_reg *src = hsa_cfun->reg_for_gimple_ssa (rhs); - hsa_build_append_simple_mov (dest, src, hbb); - } - else if (is_gimple_min_invariant (rhs) - || TREE_CODE (rhs) == ADDR_EXPR) - { - if (POINTER_TYPE_P (TREE_TYPE (rhs))) - { - if (dest->m_type != hsa_get_segment_addr_type (BRIG_SEGMENT_FLAT)) - { - HSA_SORRY_ATV (EXPR_LOCATION (rhs), - "support for HSA does not implement conversion " - "of %E to the requested non-pointer type", rhs); - return; - } - - gen_hsa_addr_insns (rhs, dest, hbb); - } - else if (TREE_CODE (rhs) == COMPLEX_CST) - { - hsa_op_immed *real_part = new hsa_op_immed (TREE_REALPART (rhs)); - hsa_op_immed *imag_part = new hsa_op_immed (TREE_IMAGPART (rhs)); - - hsa_op_reg *real_part_reg - = new hsa_op_reg (hsa_type_for_scalar_tree_type (TREE_TYPE (type), - true)); - hsa_op_reg *imag_part_reg - = new hsa_op_reg (hsa_type_for_scalar_tree_type (TREE_TYPE (type), - true)); - - hsa_build_append_simple_mov (real_part_reg, real_part, hbb); - hsa_build_append_simple_mov (imag_part_reg, imag_part, hbb); - - BrigType16_t src_type = hsa_bittype_for_type (real_part_reg->m_type); - - hsa_insn_packed *insn - = new hsa_insn_packed (3, BRIG_OPCODE_COMBINE, dest->m_type, - src_type, dest, real_part_reg, - imag_part_reg); - hbb->append_insn (insn); - } - else - { - hsa_op_immed *imm = new hsa_op_immed (rhs); - hsa_build_append_simple_mov (dest, imm, hbb); - } - } - else if (TREE_CODE (rhs) == REALPART_EXPR || TREE_CODE (rhs) == IMAGPART_EXPR) - { - tree pack_type = TREE_TYPE (TREE_OPERAND (rhs, 0)); - - hsa_op_reg *packed_reg - = new hsa_op_reg (hsa_type_for_scalar_tree_type (pack_type, true)); - - tree complex_rhs = TREE_OPERAND (rhs, 0); - gen_hsa_insns_for_load (packed_reg, complex_rhs, TREE_TYPE (complex_rhs), - hbb); - - hsa_op_reg *real_reg - = new hsa_op_reg (hsa_type_for_scalar_tree_type (type, true)); - - hsa_op_reg *imag_reg - = new hsa_op_reg (hsa_type_for_scalar_tree_type (type, true)); - - BrigKind16_t brig_type = packed_reg->m_type; - hsa_insn_packed *packed - = new hsa_insn_packed (3, BRIG_OPCODE_EXPAND, - hsa_bittype_for_type (real_reg->m_type), - brig_type, real_reg, imag_reg, packed_reg); - - hbb->append_insn (packed); - - hsa_op_reg *source = TREE_CODE (rhs) == REALPART_EXPR ? - real_reg : imag_reg; - - hsa_insn_basic *insn = new hsa_insn_basic (2, BRIG_OPCODE_MOV, - dest->m_type, NULL, source); - hsa_fixup_mov_insn_type (insn); - hbb->append_insn (insn); - insn->set_output_in_type (dest, 0, hbb); - } - else if (TREE_CODE (rhs) == BIT_FIELD_REF - && TREE_CODE (TREE_OPERAND (rhs, 0)) == SSA_NAME) - { - tree ssa_name = TREE_OPERAND (rhs, 0); - HOST_WIDE_INT bitsize = tree_to_uhwi (TREE_OPERAND (rhs, 1)); - HOST_WIDE_INT bitpos = tree_to_uhwi (TREE_OPERAND (rhs, 2)); - - hsa_op_reg *imm_value = hsa_cfun->reg_for_gimple_ssa (ssa_name); - gen_hsa_insns_for_bitfield (dest, imm_value, bitsize, bitpos, hbb); - } - else if (DECL_P (rhs) || TREE_CODE (rhs) == MEM_REF - || TREE_CODE (rhs) == TARGET_MEM_REF - || handled_component_p (rhs)) - { - HOST_WIDE_INT bitsize, bitpos; - - /* Load from memory. */ - hsa_op_address *addr; - addr = gen_hsa_addr (rhs, hbb, &bitsize, &bitpos); - - /* Handle load of a bit field. */ - if (bitsize > 64) - { - HSA_SORRY_AT (EXPR_LOCATION (rhs), - "support for HSA does not implement load from a bit " - "field bigger than 64 bits"); - return; - } - - if (bitsize || bitpos) - gen_hsa_insns_for_bitfield_load (dest, addr, bitsize, bitpos, hbb, - hsa_bitmemref_alignment (rhs)); - else - { - BrigType16_t mtype; - /* Not dest->m_type, that's possibly extended. */ - mtype = mem_type_for_type (hsa_type_for_scalar_tree_type (type, - false)); - hsa_insn_mem *mem = new hsa_insn_mem (BRIG_OPCODE_LD, mtype, dest, - addr); - mem->set_align (hsa_object_alignment (rhs)); - hbb->append_insn (mem); - } - } - else - HSA_SORRY_ATV (EXPR_LOCATION (rhs), - "support for HSA does not implement loading " - "of expression %E", - rhs); -} - -/* Return number of bits necessary for representation of a bit field, - starting at BITPOS with size of BITSIZE. */ - -static unsigned -get_bitfield_size (unsigned bitpos, unsigned bitsize) -{ - unsigned s = bitpos + bitsize; - unsigned sizes[] = {8, 16, 32, 64}; - - for (unsigned i = 0; i < 4; i++) - if (s <= sizes[i]) - return sizes[i]; - - gcc_unreachable (); - return 0; -} - -/* Generate HSAIL instructions storing into memory. LHS is the destination of - the store, SRC is the source operand. Add instructions to HBB. */ - -static void -gen_hsa_insns_for_store (tree lhs, hsa_op_base *src, hsa_bb *hbb) -{ - HOST_WIDE_INT bitsize = 0, bitpos = 0; - BrigAlignment8_t req_align; - BrigType16_t mtype; - mtype = mem_type_for_type (hsa_type_for_scalar_tree_type (TREE_TYPE (lhs), - false)); - hsa_op_address *addr; - addr = gen_hsa_addr (lhs, hbb, &bitsize, &bitpos); - - /* Handle store to a bit field. */ - if (bitsize > 64) - { - HSA_SORRY_AT (EXPR_LOCATION (lhs), - "support for HSA does not implement store to a bit field " - "bigger than 64 bits"); - return; - } - - unsigned type_bitsize = get_bitfield_size (bitpos, bitsize); - - /* HSAIL does not support MOV insn with 16-bits integers. */ - if (type_bitsize < 32) - type_bitsize = 32; - - if (bitpos || (bitsize && type_bitsize != bitsize)) - { - unsigned HOST_WIDE_INT mask = 0; - BrigType16_t mem_type - = get_integer_type_by_bytes (type_bitsize / BITS_PER_UNIT, - !TYPE_UNSIGNED (TREE_TYPE (lhs))); - - for (unsigned i = 0; i < type_bitsize; i++) - if (i < bitpos || i >= bitpos + bitsize) - mask |= ((unsigned HOST_WIDE_INT)1 << i); - - hsa_op_reg *value_reg = new hsa_op_reg (mem_type); - - req_align = hsa_bitmemref_alignment (lhs); - /* Load value from memory. */ - hsa_insn_mem *mem = new hsa_insn_mem (BRIG_OPCODE_LD, mem_type, - value_reg, addr); - mem->set_align (req_align); - hbb->append_insn (mem); - - /* AND the loaded value with prepared mask. */ - hsa_op_reg *cleared_reg = new hsa_op_reg (mem_type); - - BrigType16_t t - = get_integer_type_by_bytes (type_bitsize / BITS_PER_UNIT, false); - hsa_op_immed *c = new hsa_op_immed (mask, t); - - hsa_insn_basic *clearing - = new hsa_insn_basic (3, BRIG_OPCODE_AND, mem_type, cleared_reg, - value_reg, c); - hbb->append_insn (clearing); - - /* Shift to left a value that is going to be stored. */ - hsa_op_reg *new_value_reg = new hsa_op_reg (mem_type); - - hsa_insn_basic *basic = new hsa_insn_basic (2, BRIG_OPCODE_MOV, mem_type, - new_value_reg, src); - hsa_fixup_mov_insn_type (basic); - hbb->append_insn (basic); - - if (bitpos) - { - hsa_op_reg *shifted_value_reg = new hsa_op_reg (mem_type); - c = new hsa_op_immed (bitpos, BRIG_TYPE_U32); - - hsa_insn_basic *basic - = new hsa_insn_basic (3, BRIG_OPCODE_SHL, mem_type, - shifted_value_reg, new_value_reg, c); - hbb->append_insn (basic); - - new_value_reg = shifted_value_reg; - } - - /* OR the prepared value with prepared chunk loaded from memory. */ - hsa_op_reg *prepared_reg= new hsa_op_reg (mem_type); - basic = new hsa_insn_basic (3, BRIG_OPCODE_OR, mem_type, prepared_reg, - new_value_reg, cleared_reg); - hbb->append_insn (basic); - - src = prepared_reg; - mtype = mem_type; - } - else - req_align = hsa_object_alignment (lhs); - - hsa_insn_mem *mem = new hsa_insn_mem (BRIG_OPCODE_ST, mtype, src, addr); - mem->set_align (req_align); - - /* The HSAIL verifier has another constraint: if the source is an immediate - then it must match the destination type. If it's a register the low bits - will be used for sub-word stores. We're always allocating new operands so - we can modify the above in place. */ - if (hsa_op_immed *imm = dyn_cast <hsa_op_immed *> (src)) - { - if (!hsa_type_packed_p (imm->m_type)) - imm->m_type = mem->m_type; - else - { - /* ...and all vector immediates apparently need to be vectors of - unsigned bytes. */ - unsigned bs = hsa_type_bit_size (imm->m_type); - gcc_assert (bs == hsa_type_bit_size (mem->m_type)); - switch (bs) - { - case 32: - imm->m_type = BRIG_TYPE_U8X4; - break; - case 64: - imm->m_type = BRIG_TYPE_U8X8; - break; - case 128: - imm->m_type = BRIG_TYPE_U8X16; - break; - default: - gcc_unreachable (); - } - } - } - - hbb->append_insn (mem); -} - -/* Generate memory copy instructions that are going to be used - for copying a SRC memory to TARGET memory, - represented by pointer in a register. MIN_ALIGN is minimal alignment - of provided HSA addresses. */ - -static void -gen_hsa_memory_copy (hsa_bb *hbb, hsa_op_address *target, hsa_op_address *src, - unsigned size, BrigAlignment8_t min_align) -{ - hsa_op_address *addr; - hsa_insn_mem *mem; - - unsigned offset = 0; - unsigned min_byte_align = hsa_byte_alignment (min_align); - - while (size) - { - unsigned s; - if (size >= 8) - s = 8; - else if (size >= 4) - s = 4; - else if (size >= 2) - s = 2; - else - s = 1; - - if (s > min_byte_align) - s = min_byte_align; - - BrigType16_t t = get_integer_type_by_bytes (s, false); - - hsa_op_reg *tmp = new hsa_op_reg (t); - addr = new hsa_op_address (src->m_symbol, src->m_reg, - src->m_imm_offset + offset); - mem = new hsa_insn_mem (BRIG_OPCODE_LD, t, tmp, addr); - hbb->append_insn (mem); - - addr = new hsa_op_address (target->m_symbol, target->m_reg, - target->m_imm_offset + offset); - mem = new hsa_insn_mem (BRIG_OPCODE_ST, t, tmp, addr); - hbb->append_insn (mem); - offset += s; - size -= s; - } -} - -/* Create a memset mask that is created by copying a CONSTANT byte value - to an integer of BYTE_SIZE bytes. */ - -static unsigned HOST_WIDE_INT -build_memset_value (unsigned HOST_WIDE_INT constant, unsigned byte_size) -{ - if (constant == 0) - return 0; - - HOST_WIDE_INT v = constant; - - for (unsigned i = 1; i < byte_size; i++) - v |= constant << (8 * i); - - return v; -} - -/* Generate memory set instructions that are going to be used - for setting a CONSTANT byte value to TARGET memory of SIZE bytes. - MIN_ALIGN is minimal alignment of provided HSA addresses. */ - -static void -gen_hsa_memory_set (hsa_bb *hbb, hsa_op_address *target, - unsigned HOST_WIDE_INT constant, - unsigned size, BrigAlignment8_t min_align) -{ - hsa_op_address *addr; - hsa_insn_mem *mem; - - unsigned offset = 0; - unsigned min_byte_align = hsa_byte_alignment (min_align); - - while (size) - { - unsigned s; - if (size >= 8) - s = 8; - else if (size >= 4) - s = 4; - else if (size >= 2) - s = 2; - else - s = 1; - - if (s > min_byte_align) - s = min_byte_align; - - addr = new hsa_op_address (target->m_symbol, target->m_reg, - target->m_imm_offset + offset); - - BrigType16_t t = get_integer_type_by_bytes (s, false); - HOST_WIDE_INT c = build_memset_value (constant, s); - - mem = new hsa_insn_mem (BRIG_OPCODE_ST, t, new hsa_op_immed (c, t), - addr); - hbb->append_insn (mem); - offset += s; - size -= s; - } -} - -/* Generate HSAIL instructions for a single assignment - of an empty constructor to an ADDR_LHS. Constructor is passed as a - tree RHS and all instructions are appended to HBB. ALIGN is - alignment of the address. */ - -void -gen_hsa_ctor_assignment (hsa_op_address *addr_lhs, tree rhs, hsa_bb *hbb, - BrigAlignment8_t align) -{ - if (CONSTRUCTOR_NELTS (rhs)) - { - HSA_SORRY_AT (EXPR_LOCATION (rhs), - "support for HSA does not implement load from constructor"); - return; - } - - unsigned size = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (rhs))); - gen_hsa_memory_set (hbb, addr_lhs, 0, size, align); -} - -/* Generate HSA instructions for a single assignment of RHS to LHS. - HBB is the basic block they will be appended to. */ - -static void -gen_hsa_insns_for_single_assignment (tree lhs, tree rhs, hsa_bb *hbb) -{ - if (TREE_CODE (lhs) == SSA_NAME) - { - hsa_op_reg *dest = hsa_cfun->reg_for_gimple_ssa (lhs); - if (hsa_seen_error ()) - return; - - gen_hsa_insns_for_load (dest, rhs, TREE_TYPE (lhs), hbb); - } - else if (TREE_CODE (rhs) == SSA_NAME - || (is_gimple_min_invariant (rhs) && TREE_CODE (rhs) != STRING_CST)) - { - /* Store to memory. */ - hsa_op_base *src = hsa_reg_or_immed_for_gimple_op (rhs, hbb); - if (hsa_seen_error ()) - return; - - gen_hsa_insns_for_store (lhs, src, hbb); - } - else - { - BrigAlignment8_t lhs_align; - hsa_op_address *addr_lhs = gen_hsa_addr_with_align (lhs, hbb, - &lhs_align); - - if (TREE_CODE (rhs) == CONSTRUCTOR) - gen_hsa_ctor_assignment (addr_lhs, rhs, hbb, lhs_align); - else - { - BrigAlignment8_t rhs_align; - hsa_op_address *addr_rhs = gen_hsa_addr_with_align (rhs, hbb, - &rhs_align); - - unsigned size = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (rhs))); - gen_hsa_memory_copy (hbb, addr_lhs, addr_rhs, size, - MIN (lhs_align, rhs_align)); - } - } -} - -/* Prepend before INSN a load from spill symbol of SPILL_REG. Return the - register into which we loaded. If this required another register to convert - from a B1 type, return it in *PTMP2, otherwise store NULL into it. We - assume we are out of SSA so the returned register does not have its - definition set. */ - -hsa_op_reg * -hsa_spill_in (hsa_insn_basic *insn, hsa_op_reg *spill_reg, hsa_op_reg **ptmp2) -{ - hsa_symbol *spill_sym = spill_reg->m_spill_sym; - hsa_op_reg *reg = new hsa_op_reg (spill_sym->m_type); - hsa_op_address *addr = new hsa_op_address (spill_sym); - - hsa_insn_mem *mem = new hsa_insn_mem (BRIG_OPCODE_LD, spill_sym->m_type, - reg, addr); - hsa_insert_insn_before (mem, insn); - - *ptmp2 = NULL; - if (spill_reg->m_type == BRIG_TYPE_B1) - { - hsa_insn_basic *cvtinsn; - *ptmp2 = reg; - reg = new hsa_op_reg (spill_reg->m_type); - - cvtinsn = new hsa_insn_cvt (reg, *ptmp2); - hsa_insert_insn_before (cvtinsn, insn); - } - return reg; -} - -/* Append after INSN a store to spill symbol of SPILL_REG. Return the register - from which we stored. If this required another register to convert to a B1 - type, return it in *PTMP2, otherwise store NULL into it. We assume we are - out of SSA so the returned register does not have its use updated. */ - -hsa_op_reg * -hsa_spill_out (hsa_insn_basic *insn, hsa_op_reg *spill_reg, hsa_op_reg **ptmp2) -{ - hsa_symbol *spill_sym = spill_reg->m_spill_sym; - hsa_op_reg *reg = new hsa_op_reg (spill_sym->m_type); - hsa_op_address *addr = new hsa_op_address (spill_sym); - hsa_op_reg *returnreg; - - *ptmp2 = NULL; - returnreg = reg; - if (spill_reg->m_type == BRIG_TYPE_B1) - { - hsa_insn_basic *cvtinsn; - *ptmp2 = new hsa_op_reg (spill_sym->m_type); - reg->m_type = spill_reg->m_type; - - cvtinsn = new hsa_insn_cvt (*ptmp2, returnreg); - hsa_append_insn_after (cvtinsn, insn); - insn = cvtinsn; - reg = *ptmp2; - } - - hsa_insn_mem *mem = new hsa_insn_mem (BRIG_OPCODE_ST, spill_sym->m_type, reg, - addr); - hsa_append_insn_after (mem, insn); - return returnreg; -} - -/* Generate a comparison instruction that will compare LHS and RHS with - comparison specified by CODE and put result into register DEST. DEST has to - have its type set already but must not have its definition set yet. - Generated instructions will be added to HBB. */ - -static void -gen_hsa_cmp_insn_from_gimple (enum tree_code code, tree lhs, tree rhs, - hsa_op_reg *dest, hsa_bb *hbb) -{ - BrigCompareOperation8_t compare; - - switch (code) - { - case LT_EXPR: - compare = BRIG_COMPARE_LT; - break; - case LE_EXPR: - compare = BRIG_COMPARE_LE; - break; - case GT_EXPR: - compare = BRIG_COMPARE_GT; - break; - case GE_EXPR: - compare = BRIG_COMPARE_GE; - break; - case EQ_EXPR: - compare = BRIG_COMPARE_EQ; - break; - case NE_EXPR: - compare = BRIG_COMPARE_NE; - break; - case UNORDERED_EXPR: - compare = BRIG_COMPARE_NAN; - break; - case ORDERED_EXPR: - compare = BRIG_COMPARE_NUM; - break; - case UNLT_EXPR: - compare = BRIG_COMPARE_LTU; - break; - case UNLE_EXPR: - compare = BRIG_COMPARE_LEU; - break; - case UNGT_EXPR: - compare = BRIG_COMPARE_GTU; - break; - case UNGE_EXPR: - compare = BRIG_COMPARE_GEU; - break; - case UNEQ_EXPR: - compare = BRIG_COMPARE_EQU; - break; - case LTGT_EXPR: - compare = BRIG_COMPARE_NEU; - break; - - default: - HSA_SORRY_ATV (EXPR_LOCATION (lhs), - "support for HSA does not implement comparison tree " - "code %s", get_tree_code_name (code)); - return; - } - - /* CMP instruction returns e.g. 0xffffffff (for a 32-bit with integer) - as a result of comparison. */ - - BrigType16_t dest_type = hsa_type_integer_p (dest->m_type) - ? (BrigType16_t) BRIG_TYPE_B1 : dest->m_type; - - hsa_insn_cmp *cmp = new hsa_insn_cmp (compare, dest_type); - hsa_op_with_type *op1 = hsa_reg_or_immed_for_gimple_op (lhs, hbb); - cmp->set_op (1, op1->extend_int_to_32bit (hbb)); - hsa_op_with_type *op2 = hsa_reg_or_immed_for_gimple_op (rhs, hbb); - cmp->set_op (2, op2->extend_int_to_32bit (hbb)); - - hbb->append_insn (cmp); - cmp->set_output_in_type (dest, 0, hbb); -} - -/* Generate an unary instruction with OPCODE and append it to a basic block - HBB. The instruction uses DEST as a destination and OP1 - as a single operand. */ - -static void -gen_hsa_unary_operation (BrigOpcode opcode, hsa_op_reg *dest, - hsa_op_with_type *op1, hsa_bb *hbb) -{ - gcc_checking_assert (dest); - hsa_insn_basic *insn; - - if (opcode == BRIG_OPCODE_MOV && hsa_needs_cvt (dest->m_type, op1->m_type)) - { - insn = new hsa_insn_cvt (dest, op1); - hbb->append_insn (insn); - return; - } - - op1 = op1->extend_int_to_32bit (hbb); - if (opcode == BRIG_OPCODE_FIRSTBIT || opcode == BRIG_OPCODE_LASTBIT) - { - BrigType16_t srctype = hsa_type_integer_p (op1->m_type) ? op1->m_type - : hsa_unsigned_type_for_type (op1->m_type); - insn = new hsa_insn_srctype (2, opcode, BRIG_TYPE_U32, srctype, NULL, - op1); - } - else - { - BrigType16_t optype = hsa_extend_inttype_to_32bit (dest->m_type); - insn = new hsa_insn_basic (2, opcode, optype, NULL, op1); - - if (opcode == BRIG_OPCODE_MOV) - hsa_fixup_mov_insn_type (insn); - else if (opcode == BRIG_OPCODE_ABS || opcode == BRIG_OPCODE_NEG) - { - /* ABS and NEG only exist in _s form :-/ */ - if (insn->m_type == BRIG_TYPE_U32) - insn->m_type = BRIG_TYPE_S32; - else if (insn->m_type == BRIG_TYPE_U64) - insn->m_type = BRIG_TYPE_S64; - } - } - - hbb->append_insn (insn); - insn->set_output_in_type (dest, 0, hbb); -} - -/* Generate a binary instruction with OPCODE and append it to a basic block - HBB. The instruction uses DEST as a destination and operands OP1 - and OP2. */ - -static void -gen_hsa_binary_operation (int opcode, hsa_op_reg *dest, - hsa_op_with_type *op1, hsa_op_with_type *op2, - hsa_bb *hbb) -{ - gcc_checking_assert (dest); - - BrigType16_t optype = hsa_extend_inttype_to_32bit (dest->m_type); - op1 = op1->extend_int_to_32bit (hbb); - op2 = op2->extend_int_to_32bit (hbb); - - if ((opcode == BRIG_OPCODE_SHL || opcode == BRIG_OPCODE_SHR) - && is_a <hsa_op_immed *> (op2)) - { - hsa_op_immed *i = dyn_cast <hsa_op_immed *> (op2); - i->set_type (BRIG_TYPE_U32); - } - if ((opcode == BRIG_OPCODE_OR - || opcode == BRIG_OPCODE_XOR - || opcode == BRIG_OPCODE_AND) - && is_a <hsa_op_immed *> (op2)) - { - hsa_op_immed *i = dyn_cast <hsa_op_immed *> (op2); - i->set_type (hsa_unsigned_type_for_type (i->m_type)); - } - - hsa_insn_basic *insn = new hsa_insn_basic (3, opcode, optype, NULL, - op1, op2); - hbb->append_insn (insn); - insn->set_output_in_type (dest, 0, hbb); -} - -/* Generate HSA instructions for a single assignment. HBB is the basic block - they will be appended to. */ - -static void -gen_hsa_insns_for_operation_assignment (gimple *assign, hsa_bb *hbb) -{ - tree_code code = gimple_assign_rhs_code (assign); - gimple_rhs_class rhs_class = get_gimple_rhs_class (gimple_expr_code (assign)); - - tree lhs = gimple_assign_lhs (assign); - tree rhs1 = gimple_assign_rhs1 (assign); - tree rhs2 = gimple_assign_rhs2 (assign); - tree rhs3 = gimple_assign_rhs3 (assign); - - BrigOpcode opcode; - - switch (code) - { - CASE_CONVERT: - case FLOAT_EXPR: - /* The opcode is changed to BRIG_OPCODE_CVT if BRIG types - needs a conversion. */ - opcode = BRIG_OPCODE_MOV; - break; - - case PLUS_EXPR: - case POINTER_PLUS_EXPR: - opcode = BRIG_OPCODE_ADD; - break; - case MINUS_EXPR: - opcode = BRIG_OPCODE_SUB; - break; - case MULT_EXPR: - opcode = BRIG_OPCODE_MUL; - break; - case MULT_HIGHPART_EXPR: - opcode = BRIG_OPCODE_MULHI; - break; - case RDIV_EXPR: - case TRUNC_DIV_EXPR: - case EXACT_DIV_EXPR: - opcode = BRIG_OPCODE_DIV; - break; - case CEIL_DIV_EXPR: - case FLOOR_DIV_EXPR: - case ROUND_DIV_EXPR: - HSA_SORRY_AT (gimple_location (assign), - "support for HSA does not implement %<CEIL_DIV_EXPR%>, " - "%<FLOOR_DIV_EXPR%> or %<ROUND_DIV_EXPR%>"); - return; - case TRUNC_MOD_EXPR: - opcode = BRIG_OPCODE_REM; - break; - case CEIL_MOD_EXPR: - case FLOOR_MOD_EXPR: - case ROUND_MOD_EXPR: - HSA_SORRY_AT (gimple_location (assign), - "support for HSA does not implement %<CEIL_MOD_EXPR%>, " - "%<FLOOR_MOD_EXPR%> or %<ROUND_MOD_EXPR%>"); - return; - case NEGATE_EXPR: - opcode = BRIG_OPCODE_NEG; - break; - case MIN_EXPR: - opcode = BRIG_OPCODE_MIN; - break; - case MAX_EXPR: - opcode = BRIG_OPCODE_MAX; - break; - case ABS_EXPR: - opcode = BRIG_OPCODE_ABS; - break; - case LSHIFT_EXPR: - opcode = BRIG_OPCODE_SHL; - break; - case RSHIFT_EXPR: - opcode = BRIG_OPCODE_SHR; - break; - case LROTATE_EXPR: - case RROTATE_EXPR: - { - hsa_insn_basic *insn = NULL; - int code1 = code == LROTATE_EXPR ? BRIG_OPCODE_SHL : BRIG_OPCODE_SHR; - int code2 = code != LROTATE_EXPR ? BRIG_OPCODE_SHL : BRIG_OPCODE_SHR; - BrigType16_t btype = hsa_type_for_scalar_tree_type (TREE_TYPE (lhs), - true); - - hsa_op_with_type *src = hsa_reg_or_immed_for_gimple_op (rhs1, hbb); - hsa_op_reg *op1 = new hsa_op_reg (btype); - hsa_op_reg *op2 = new hsa_op_reg (btype); - hsa_op_with_type *shift1 = hsa_reg_or_immed_for_gimple_op (rhs2, hbb); - - tree type = TREE_TYPE (rhs2); - unsigned HOST_WIDE_INT bitsize = TREE_INT_CST_LOW (TYPE_SIZE (type)); - - hsa_op_with_type *shift2 = NULL; - if (TREE_CODE (rhs2) == INTEGER_CST) - shift2 = new hsa_op_immed (bitsize - tree_to_uhwi (rhs2), - BRIG_TYPE_U32); - else if (TREE_CODE (rhs2) == SSA_NAME) - { - hsa_op_reg *s = hsa_cfun->reg_for_gimple_ssa (rhs2); - s = as_a <hsa_op_reg *> (s->extend_int_to_32bit (hbb)); - hsa_op_reg *d = new hsa_op_reg (s->m_type); - hsa_op_immed *size_imm = new hsa_op_immed (bitsize, BRIG_TYPE_U32); - - insn = new hsa_insn_basic (3, BRIG_OPCODE_SUB, d->m_type, - d, s, size_imm); - hbb->append_insn (insn); - - shift2 = d; - } - else - gcc_unreachable (); - - hsa_op_reg *dest = hsa_cfun->reg_for_gimple_ssa (lhs); - gen_hsa_binary_operation (code1, op1, src, shift1, hbb); - gen_hsa_binary_operation (code2, op2, src, shift2, hbb); - gen_hsa_binary_operation (BRIG_OPCODE_OR, dest, op1, op2, hbb); - - return; - } - case BIT_IOR_EXPR: - opcode = BRIG_OPCODE_OR; - break; - case BIT_XOR_EXPR: - opcode = BRIG_OPCODE_XOR; - break; - case BIT_AND_EXPR: - opcode = BRIG_OPCODE_AND; - break; - case BIT_NOT_EXPR: - opcode = BRIG_OPCODE_NOT; - break; - case FIX_TRUNC_EXPR: - { - hsa_op_reg *dest = hsa_cfun->reg_for_gimple_ssa (lhs); - hsa_op_with_type *v = hsa_reg_or_immed_for_gimple_op (rhs1, hbb); - - if (hsa_needs_cvt (dest->m_type, v->m_type)) - { - hsa_op_reg *tmp = new hsa_op_reg (v->m_type); - - hsa_insn_basic *insn = new hsa_insn_basic (2, BRIG_OPCODE_TRUNC, - tmp->m_type, tmp, v); - hbb->append_insn (insn); - - hsa_insn_basic *cvtinsn = new hsa_insn_cvt (dest, tmp); - hbb->append_insn (cvtinsn); - } - else - { - hsa_insn_basic *insn = new hsa_insn_basic (2, BRIG_OPCODE_TRUNC, - dest->m_type, dest, v); - hbb->append_insn (insn); - } - - return; - } - opcode = BRIG_OPCODE_TRUNC; - break; - - case LT_EXPR: - case LE_EXPR: - case GT_EXPR: - case GE_EXPR: - case EQ_EXPR: - case NE_EXPR: - case UNORDERED_EXPR: - case ORDERED_EXPR: - case UNLT_EXPR: - case UNLE_EXPR: - case UNGT_EXPR: - case UNGE_EXPR: - case UNEQ_EXPR: - case LTGT_EXPR: - { - hsa_op_reg *dest - = hsa_cfun->reg_for_gimple_ssa (gimple_assign_lhs (assign)); - - gen_hsa_cmp_insn_from_gimple (code, rhs1, rhs2, dest, hbb); - return; - } - case COND_EXPR: - { - hsa_op_reg *dest - = hsa_cfun->reg_for_gimple_ssa (gimple_assign_lhs (assign)); - hsa_op_with_type *ctrl = NULL; - tree cond = rhs1; - - if (CONSTANT_CLASS_P (cond) || TREE_CODE (cond) == SSA_NAME) - ctrl = hsa_reg_or_immed_for_gimple_op (cond, hbb); - else - { - hsa_op_reg *r = new hsa_op_reg (BRIG_TYPE_B1); - - gen_hsa_cmp_insn_from_gimple (TREE_CODE (cond), - TREE_OPERAND (cond, 0), - TREE_OPERAND (cond, 1), - r, hbb); - - ctrl = r; - } - - hsa_op_with_type *op2 = hsa_reg_or_immed_for_gimple_op (rhs2, hbb); - hsa_op_with_type *op3 = hsa_reg_or_immed_for_gimple_op (rhs3, hbb); - op2 = op2->extend_int_to_32bit (hbb); - op3 = op3->extend_int_to_32bit (hbb); - - BrigType16_t type = hsa_extend_inttype_to_32bit (dest->m_type); - BrigType16_t utype = hsa_unsigned_type_for_type (type); - if (is_a <hsa_op_immed *> (op2)) - op2->m_type = utype; - if (is_a <hsa_op_immed *> (op3)) - op3->m_type = utype; - - hsa_insn_basic *insn - = new hsa_insn_basic (4, BRIG_OPCODE_CMOV, - hsa_bittype_for_type (type), - NULL, ctrl, op2, op3); - - hbb->append_insn (insn); - insn->set_output_in_type (dest, 0, hbb); - return; - } - case COMPLEX_EXPR: - { - hsa_op_reg *dest - = hsa_cfun->reg_for_gimple_ssa (gimple_assign_lhs (assign)); - hsa_op_with_type *rhs1_reg = hsa_reg_or_immed_for_gimple_op (rhs1, hbb); - rhs1_reg = rhs1_reg->extend_int_to_32bit (hbb); - hsa_op_with_type *rhs2_reg = hsa_reg_or_immed_for_gimple_op (rhs2, hbb); - rhs2_reg = rhs2_reg->extend_int_to_32bit (hbb); - - if (hsa_seen_error ()) - return; - - BrigType16_t src_type = hsa_bittype_for_type (rhs1_reg->m_type); - rhs1_reg = rhs1_reg->get_in_type (src_type, hbb); - rhs2_reg = rhs2_reg->get_in_type (src_type, hbb); - - hsa_insn_packed *insn - = new hsa_insn_packed (3, BRIG_OPCODE_COMBINE, dest->m_type, src_type, - dest, rhs1_reg, rhs2_reg); - hbb->append_insn (insn); - - return; - } - default: - /* Implement others as we come across them. */ - HSA_SORRY_ATV (gimple_location (assign), - "support for HSA does not implement operation %s", - get_tree_code_name (code)); - return; - } - - - hsa_op_reg *dest = hsa_cfun->reg_for_gimple_ssa (lhs); - hsa_op_with_type *op1 = hsa_reg_or_immed_for_gimple_op (rhs1, hbb); - hsa_op_with_type *op2 - = rhs2 ? hsa_reg_or_immed_for_gimple_op (rhs2, hbb) : NULL; - - if (hsa_seen_error ()) - return; - - switch (rhs_class) - { - case GIMPLE_TERNARY_RHS: - { - hsa_op_with_type *op3 = hsa_reg_or_immed_for_gimple_op (rhs3, hbb); - op3 = op3->extend_int_to_32bit (hbb); - hsa_insn_basic *insn = new hsa_insn_basic (4, opcode, dest->m_type, dest, - op1, op2, op3); - hbb->append_insn (insn); - } - return; - - case GIMPLE_BINARY_RHS: - gen_hsa_binary_operation (opcode, dest, op1, op2, hbb); - break; - - case GIMPLE_UNARY_RHS: - gen_hsa_unary_operation (opcode, dest, op1, hbb); - break; - default: - gcc_unreachable (); - } -} - -/* Generate HSA instructions for a given gimple condition statement COND. - Instructions will be appended to HBB, which also needs to be the - corresponding structure to the basic_block of COND. */ - -static void -gen_hsa_insns_for_cond_stmt (gimple *cond, hsa_bb *hbb) -{ - hsa_op_reg *ctrl = new hsa_op_reg (BRIG_TYPE_B1); - hsa_insn_cbr *cbr; - - gen_hsa_cmp_insn_from_gimple (gimple_cond_code (cond), - gimple_cond_lhs (cond), - gimple_cond_rhs (cond), - ctrl, hbb); - - cbr = new hsa_insn_cbr (ctrl); - hbb->append_insn (cbr); -} - -/* Maximum number of elements in a jump table for an HSA SBR instruction. */ - -#define HSA_MAXIMUM_SBR_LABELS 16 - -/* Return lowest value of a switch S that is handled in a non-default - label. */ - -static tree -get_switch_low (gswitch *s) -{ - unsigned labels = gimple_switch_num_labels (s); - gcc_checking_assert (labels >= 1); - - return CASE_LOW (gimple_switch_label (s, 1)); -} - -/* Return highest value of a switch S that is handled in a non-default - label. */ - -static tree -get_switch_high (gswitch *s) -{ - unsigned labels = gimple_switch_num_labels (s); - - /* Compare last label to maximum number of labels. */ - tree label = gimple_switch_label (s, labels - 1); - tree low = CASE_LOW (label); - tree high = CASE_HIGH (label); - - return high != NULL_TREE ? high : low; -} - -static tree -get_switch_size (gswitch *s) -{ - return int_const_binop (MINUS_EXPR, get_switch_high (s), get_switch_low (s)); -} - -/* Generate HSA instructions for a given gimple switch. - Instructions will be appended to HBB. */ - -static void -gen_hsa_insns_for_switch_stmt (gswitch *s, hsa_bb *hbb) -{ - gimple_stmt_iterator it = gsi_for_stmt (s); - gsi_prev (&it); - - /* Create preambule that verifies that index - lowest_label >= 0. */ - edge e = split_block (hbb->m_bb, gsi_stmt (it)); - e->flags &= ~EDGE_FALLTHRU; - e->flags |= EDGE_TRUE_VALUE; - - tree index_tree = gimple_switch_index (s); - tree lowest = get_switch_low (s); - tree highest = get_switch_high (s); - - hsa_op_reg *index = hsa_cfun->reg_for_gimple_ssa (index_tree); - index = as_a <hsa_op_reg *> (index->extend_int_to_32bit (hbb)); - - hsa_op_reg *cmp1_reg = new hsa_op_reg (BRIG_TYPE_B1); - hsa_op_immed *cmp1_immed = new hsa_op_immed (lowest, true); - hbb->append_insn (new hsa_insn_cmp (BRIG_COMPARE_GE, cmp1_reg->m_type, - cmp1_reg, index, cmp1_immed)); - - hsa_op_reg *cmp2_reg = new hsa_op_reg (BRIG_TYPE_B1); - hsa_op_immed *cmp2_immed = new hsa_op_immed (highest, true); - hbb->append_insn (new hsa_insn_cmp (BRIG_COMPARE_LE, cmp2_reg->m_type, - cmp2_reg, index, cmp2_immed)); - - hsa_op_reg *cmp_reg = new hsa_op_reg (BRIG_TYPE_B1); - hbb->append_insn (new hsa_insn_basic (3, BRIG_OPCODE_AND, cmp_reg->m_type, - cmp_reg, cmp1_reg, cmp2_reg)); - - hbb->append_insn (new hsa_insn_cbr (cmp_reg)); - - basic_block default_label_bb = gimple_switch_default_bb (cfun, s); - - if (!gimple_seq_empty_p (phi_nodes (default_label_bb))) - { - default_label_bb = split_edge (find_edge (e->dest, default_label_bb)); - hsa_init_new_bb (default_label_bb); - } - - make_edge (e->src, default_label_bb, EDGE_FALSE_VALUE); - - hsa_cfun->m_modified_cfg = true; - - /* Basic block with the SBR instruction. */ - hbb = hsa_init_new_bb (e->dest); - - hsa_op_reg *sub_index = new hsa_op_reg (index->m_type); - hbb->append_insn (new hsa_insn_basic (3, BRIG_OPCODE_SUB, sub_index->m_type, - sub_index, index, - new hsa_op_immed (lowest, true))); - - hsa_op_base *tmp = sub_index->get_in_type (BRIG_TYPE_U64, hbb); - sub_index = as_a <hsa_op_reg *> (tmp); - unsigned labels = gimple_switch_num_labels (s); - unsigned HOST_WIDE_INT size = tree_to_uhwi (get_switch_size (s)); - - hsa_insn_sbr *sbr = new hsa_insn_sbr (sub_index, size + 1); - - /* Prepare array with default label destination. */ - for (unsigned HOST_WIDE_INT i = 0; i <= size; i++) - sbr->m_jump_table.safe_push (default_label_bb); - - /* Iterate all labels and fill up the jump table. */ - for (unsigned i = 1; i < labels; i++) - { - tree label = gimple_switch_label (s, i); - basic_block bb = label_to_block (cfun, CASE_LABEL (label)); - - unsigned HOST_WIDE_INT sub_low - = tree_to_uhwi (int_const_binop (MINUS_EXPR, CASE_LOW (label), lowest)); - - unsigned HOST_WIDE_INT sub_high = sub_low; - tree high = CASE_HIGH (label); - if (high != NULL) - sub_high = tree_to_uhwi (int_const_binop (MINUS_EXPR, high, lowest)); - - for (unsigned HOST_WIDE_INT j = sub_low; j <= sub_high; j++) - sbr->m_jump_table[j] = bb; - } - - hbb->append_insn (sbr); -} - -/* Verify that the function DECL can be handled by HSA. */ - -static void -verify_function_arguments (tree decl) -{ - tree type = TREE_TYPE (decl); - if (DECL_STATIC_CHAIN (decl)) - { - HSA_SORRY_ATV (EXPR_LOCATION (decl), - "HSA does not support nested functions: %qD", decl); - return; - } - else if (!TYPE_ARG_TYPES (type) || stdarg_p (type)) - { - HSA_SORRY_ATV (EXPR_LOCATION (decl), - "HSA does not support functions with variadic arguments " - "(or unknown return type): %qD", decl); - return; - } -} - -/* Return BRIG type for FORMAL_ARG_TYPE. If the formal argument type is NULL, - return ACTUAL_ARG_TYPE. */ - -static BrigType16_t -get_format_argument_type (tree formal_arg_type, BrigType16_t actual_arg_type) -{ - if (formal_arg_type == NULL) - return actual_arg_type; - - BrigType16_t decl_type - = hsa_type_for_scalar_tree_type (formal_arg_type, false); - return mem_type_for_type (decl_type); -} - -/* Generate HSA instructions for a direct call instruction. - Instructions will be appended to HBB, which also needs to be the - corresponding structure to the basic_block of STMT. - If ASSIGN_LHS is false, do not copy HSA function result argument into the - corresponding HSA representation of the gimple statement LHS. */ - -static void -gen_hsa_insns_for_direct_call (gimple *stmt, hsa_bb *hbb, - bool assign_lhs = true) -{ - tree decl = gimple_call_fndecl (stmt); - verify_function_arguments (decl); - if (hsa_seen_error ()) - return; - - hsa_insn_call *call_insn = new hsa_insn_call (decl); - hsa_cfun->m_called_functions.safe_push (call_insn->m_called_function); - - /* Argument block start. */ - hsa_insn_arg_block *arg_start - = new hsa_insn_arg_block (BRIG_KIND_DIRECTIVE_ARG_BLOCK_START, call_insn); - hbb->append_insn (arg_start); - - tree parm_type_chain = TYPE_ARG_TYPES (gimple_call_fntype (stmt)); - - /* Preparation of arguments that will be passed to function. */ - const unsigned args = gimple_call_num_args (stmt); - for (unsigned i = 0; i < args; ++i) - { - tree parm = gimple_call_arg (stmt, (int)i); - tree parm_decl_type = parm_type_chain != NULL_TREE - ? TREE_VALUE (parm_type_chain) : NULL_TREE; - hsa_op_address *addr; - - if (AGGREGATE_TYPE_P (TREE_TYPE (parm))) - { - addr = gen_hsa_addr_for_arg (TREE_TYPE (parm), i); - BrigAlignment8_t align; - hsa_op_address *src = gen_hsa_addr_with_align (parm, hbb, &align); - gen_hsa_memory_copy (hbb, addr, src, - addr->m_symbol->total_byte_size (), align); - } - else - { - hsa_op_with_type *src = hsa_reg_or_immed_for_gimple_op (parm, hbb); - - if (parm_decl_type != NULL && AGGREGATE_TYPE_P (parm_decl_type)) - { - HSA_SORRY_AT (gimple_location (stmt), - "support for HSA does not implement an aggregate " - "formal argument in a function call, while actual " - "argument is not an aggregate"); - return; - } - - BrigType16_t formal_arg_type - = get_format_argument_type (parm_decl_type, src->m_type); - if (hsa_seen_error ()) - return; - - if (src->m_type != formal_arg_type) - src = src->get_in_type (formal_arg_type, hbb); - - addr - = gen_hsa_addr_for_arg (parm_decl_type != NULL_TREE ? - parm_decl_type: TREE_TYPE (parm), i); - hsa_insn_mem *mem = new hsa_insn_mem (BRIG_OPCODE_ST, formal_arg_type, - src, addr); - - hbb->append_insn (mem); - } - - call_insn->m_input_args.safe_push (addr->m_symbol); - if (parm_type_chain) - parm_type_chain = TREE_CHAIN (parm_type_chain); - } - - call_insn->m_args_code_list = new hsa_op_code_list (args); - hbb->append_insn (call_insn); - - tree result_type = TREE_TYPE (TREE_TYPE (decl)); - - tree result = gimple_call_lhs (stmt); - hsa_insn_mem *result_insn = NULL; - if (!VOID_TYPE_P (result_type)) - { - hsa_op_address *addr = gen_hsa_addr_for_arg (result_type, -1); - - /* Even if result of a function call is unused, we have to emit - declaration for the result. */ - if (result && assign_lhs) - { - tree lhs_type = TREE_TYPE (result); - - if (hsa_seen_error ()) - return; - - if (AGGREGATE_TYPE_P (lhs_type)) - { - BrigAlignment8_t align; - hsa_op_address *result_addr - = gen_hsa_addr_with_align (result, hbb, &align); - gen_hsa_memory_copy (hbb, result_addr, addr, - addr->m_symbol->total_byte_size (), align); - } - else - { - BrigType16_t mtype - = mem_type_for_type (hsa_type_for_scalar_tree_type (lhs_type, - false)); - - hsa_op_reg *dst = hsa_cfun->reg_for_gimple_ssa (result); - result_insn = new hsa_insn_mem (BRIG_OPCODE_LD, mtype, dst, addr); - hbb->append_insn (result_insn); - } - } - - call_insn->m_output_arg = addr->m_symbol; - call_insn->m_result_code_list = new hsa_op_code_list (1); - } - else - { - if (result) - { - HSA_SORRY_AT (gimple_location (stmt), - "support for HSA does not implement an assignment of " - "return value from a void function"); - return; - } - - call_insn->m_result_code_list = new hsa_op_code_list (0); - } - - /* Argument block end. */ - hsa_insn_arg_block *arg_end - = new hsa_insn_arg_block (BRIG_KIND_DIRECTIVE_ARG_BLOCK_END, call_insn); - hbb->append_insn (arg_end); -} - -/* Generate HSA instructions for a direct call of an internal fn. - Instructions will be appended to HBB, which also needs to be the - corresponding structure to the basic_block of STMT. */ - -static void -gen_hsa_insns_for_call_of_internal_fn (gimple *stmt, hsa_bb *hbb) -{ - tree lhs = gimple_call_lhs (stmt); - if (!lhs) - return; - - tree lhs_type = TREE_TYPE (lhs); - tree rhs1 = gimple_call_arg (stmt, 0); - tree rhs1_type = TREE_TYPE (rhs1); - enum internal_fn fn = gimple_call_internal_fn (stmt); - hsa_internal_fn *ifn - = new hsa_internal_fn (fn, tree_to_uhwi (TYPE_SIZE (rhs1_type))); - hsa_insn_call *call_insn = new hsa_insn_call (ifn); - - gcc_checking_assert (FLOAT_TYPE_P (rhs1_type)); - - if (!hsa_emitted_internal_decls->find (call_insn->m_called_internal_fn)) - hsa_cfun->m_called_internal_fns.safe_push (call_insn->m_called_internal_fn); - - hsa_insn_arg_block *arg_start - = new hsa_insn_arg_block (BRIG_KIND_DIRECTIVE_ARG_BLOCK_START, call_insn); - hbb->append_insn (arg_start); - - unsigned num_args = gimple_call_num_args (stmt); - - /* Function arguments. */ - for (unsigned i = 0; i < num_args; i++) - { - tree parm = gimple_call_arg (stmt, (int)i); - hsa_op_with_type *src = hsa_reg_or_immed_for_gimple_op (parm, hbb); - - hsa_op_address *addr = gen_hsa_addr_for_arg (TREE_TYPE (parm), i); - hsa_insn_mem *mem = new hsa_insn_mem (BRIG_OPCODE_ST, src->m_type, - src, addr); - - call_insn->m_input_args.safe_push (addr->m_symbol); - hbb->append_insn (mem); - } - - call_insn->m_args_code_list = new hsa_op_code_list (num_args); - hbb->append_insn (call_insn); - - /* Assign returned value. */ - hsa_op_address *addr = gen_hsa_addr_for_arg (lhs_type, -1); - - call_insn->m_output_arg = addr->m_symbol; - call_insn->m_result_code_list = new hsa_op_code_list (1); - - /* Argument block end. */ - hsa_insn_arg_block *arg_end - = new hsa_insn_arg_block (BRIG_KIND_DIRECTIVE_ARG_BLOCK_END, call_insn); - hbb->append_insn (arg_end); -} - -/* Generate HSA instructions for a return value instruction. - Instructions will be appended to HBB, which also needs to be the - corresponding structure to the basic_block of STMT. */ - -static void -gen_hsa_insns_for_return (greturn *stmt, hsa_bb *hbb) -{ - tree retval = gimple_return_retval (stmt); - if (retval) - { - hsa_op_address *addr = new hsa_op_address (hsa_cfun->m_output_arg); - - if (AGGREGATE_TYPE_P (TREE_TYPE (retval))) - { - BrigAlignment8_t align; - hsa_op_address *retval_addr = gen_hsa_addr_with_align (retval, hbb, - &align); - gen_hsa_memory_copy (hbb, addr, retval_addr, - hsa_cfun->m_output_arg->total_byte_size (), - align); - } - else - { - BrigType16_t t = hsa_type_for_scalar_tree_type (TREE_TYPE (retval), - false); - BrigType16_t mtype = mem_type_for_type (t); - - /* Store of return value. */ - hsa_op_with_type *src = hsa_reg_or_immed_for_gimple_op (retval, hbb); - src = src->get_in_type (mtype, hbb); - hsa_insn_mem *mem = new hsa_insn_mem (BRIG_OPCODE_ST, mtype, src, - addr); - hbb->append_insn (mem); - } - } - - /* HSAIL return instruction emission. */ - hsa_insn_basic *ret = new hsa_insn_basic (0, BRIG_OPCODE_RET); - hbb->append_insn (ret); -} - -/* Set OP_INDEX-th operand of the instruction to DEST, as the DEST - can have a different type, conversion instructions are possibly - appended to HBB. */ - -void -hsa_insn_basic::set_output_in_type (hsa_op_reg *dest, unsigned op_index, - hsa_bb *hbb) -{ - gcc_checking_assert (op_output_p (op_index)); - - if (dest->m_type == m_type) - { - set_op (op_index, dest); - return; - } - - hsa_insn_basic *insn; - hsa_op_reg *tmp; - if (hsa_needs_cvt (dest->m_type, m_type)) - { - tmp = new hsa_op_reg (m_type); - insn = new hsa_insn_cvt (dest, tmp); - } - else if (hsa_type_bit_size (dest->m_type) == hsa_type_bit_size (m_type)) - { - /* When output, HSA registers do not really have types, only sizes, so if - the sizes match, we can use the register directly. */ - set_op (op_index, dest); - return; - } - else - { - tmp = new hsa_op_reg (m_type); - insn = new hsa_insn_basic (2, BRIG_OPCODE_MOV, dest->m_type, - dest, tmp->get_in_type (dest->m_type, hbb)); - hsa_fixup_mov_insn_type (insn); - } - set_op (op_index, tmp); - hbb->append_insn (insn); -} - -/* Generate instruction OPCODE to query a property of HSA grid along the - given DIMENSION. Store result into DEST and append the instruction to - HBB. */ - -static void -query_hsa_grid_dim (hsa_op_reg *dest, int opcode, hsa_op_immed *dimension, - hsa_bb *hbb) -{ - hsa_insn_basic *insn = new hsa_insn_basic (2, opcode, BRIG_TYPE_U32, NULL, - dimension); - hbb->append_insn (insn); - insn->set_output_in_type (dest, 0, hbb); -} - -/* Generate instruction OPCODE to query a property of HSA grid along the given - dimension which is an immediate in first argument of STMT. Store result - into the register corresponding to LHS of STMT and append the instruction to - HBB. */ - -static void -query_hsa_grid_dim (gimple *stmt, int opcode, hsa_bb *hbb) -{ - tree lhs = gimple_call_lhs (dyn_cast <gcall *> (stmt)); - if (lhs == NULL_TREE) - return; - - tree arg = gimple_call_arg (stmt, 0); - unsigned HOST_WIDE_INT dim = 5; - if (tree_fits_uhwi_p (arg)) - dim = tree_to_uhwi (arg); - if (dim > 2) - { - HSA_SORRY_AT (gimple_location (stmt), - "HSA grid query dimension must be immediate constant 0, 1 " - "or 2"); - return; - } - - hsa_op_immed *hdim = new hsa_op_immed (dim, (BrigKind16_t) BRIG_TYPE_U32); - hsa_op_reg *dest = hsa_cfun->reg_for_gimple_ssa (lhs); - query_hsa_grid_dim (dest, opcode, hdim, hbb); -} - -/* Generate instruction OPCODE to query a property of HSA grid that is - independent of any dimension. Store result into the register corresponding - to LHS of STMT and append the instruction to HBB. */ - -static void -query_hsa_grid_nodim (gimple *stmt, BrigOpcode16_t opcode, hsa_bb *hbb) -{ - tree lhs = gimple_call_lhs (dyn_cast <gcall *> (stmt)); - if (lhs == NULL_TREE) - return; - hsa_op_reg *dest = hsa_cfun->reg_for_gimple_ssa (lhs); - BrigType16_t brig_type = hsa_unsigned_type_for_type (dest->m_type); - hsa_insn_basic *insn = new hsa_insn_basic (1, opcode, brig_type, dest); - hbb->append_insn (insn); -} - -/* Emit instructions that set hsa_num_threads according to provided VALUE. - Instructions are appended to basic block HBB. */ - -static void -gen_set_num_threads (tree value, hsa_bb *hbb) -{ - hbb->append_insn (new hsa_insn_comment ("omp_set_num_threads")); - hsa_op_with_type *src = hsa_reg_or_immed_for_gimple_op (value, hbb); - - src = src->get_in_type (hsa_num_threads->m_type, hbb); - hsa_op_address *addr = new hsa_op_address (hsa_num_threads); - - hsa_insn_basic *basic - = new hsa_insn_mem (BRIG_OPCODE_ST, hsa_num_threads->m_type, src, addr); - hbb->append_insn (basic); -} - -/* Return byte offset of a FIELD_NAME in GOMP_hsa_kernel_dispatch which - is defined in plugin-hsa.c. */ - -static HOST_WIDE_INT -get_hsa_kernel_dispatch_offset (const char *field_name) -{ - tree *hsa_kernel_dispatch_type = hsa_get_kernel_dispatch_type (); - if (*hsa_kernel_dispatch_type == NULL) - { - /* Collection of information needed for a dispatch of a kernel from a - kernel. Keep in sync with libgomp's plugin-hsa.c. */ - - *hsa_kernel_dispatch_type = make_node (RECORD_TYPE); - tree id_f1 = build_decl (BUILTINS_LOCATION, FIELD_DECL, - get_identifier ("queue"), ptr_type_node); - DECL_CHAIN (id_f1) = NULL_TREE; - tree id_f2 = build_decl (BUILTINS_LOCATION, FIELD_DECL, - get_identifier ("omp_data_memory"), - ptr_type_node); - DECL_CHAIN (id_f2) = id_f1; - tree id_f3 = build_decl (BUILTINS_LOCATION, FIELD_DECL, - get_identifier ("kernarg_address"), - ptr_type_node); - DECL_CHAIN (id_f3) = id_f2; - tree id_f4 = build_decl (BUILTINS_LOCATION, FIELD_DECL, - get_identifier ("object"), - uint64_type_node); - DECL_CHAIN (id_f4) = id_f3; - tree id_f5 = build_decl (BUILTINS_LOCATION, FIELD_DECL, - get_identifier ("signal"), - uint64_type_node); - DECL_CHAIN (id_f5) = id_f4; - tree id_f6 = build_decl (BUILTINS_LOCATION, FIELD_DECL, - get_identifier ("private_segment_size"), - uint32_type_node); - DECL_CHAIN (id_f6) = id_f5; - tree id_f7 = build_decl (BUILTINS_LOCATION, FIELD_DECL, - get_identifier ("group_segment_size"), - uint32_type_node); - DECL_CHAIN (id_f7) = id_f6; - tree id_f8 = build_decl (BUILTINS_LOCATION, FIELD_DECL, - get_identifier ("kernel_dispatch_count"), - uint64_type_node); - DECL_CHAIN (id_f8) = id_f7; - tree id_f9 = build_decl (BUILTINS_LOCATION, FIELD_DECL, - get_identifier ("debug"), - uint64_type_node); - DECL_CHAIN (id_f9) = id_f8; - tree id_f10 = build_decl (BUILTINS_LOCATION, FIELD_DECL, - get_identifier ("omp_level"), - uint64_type_node); - DECL_CHAIN (id_f10) = id_f9; - tree id_f11 = build_decl (BUILTINS_LOCATION, FIELD_DECL, - get_identifier ("children_dispatches"), - ptr_type_node); - DECL_CHAIN (id_f11) = id_f10; - tree id_f12 = build_decl (BUILTINS_LOCATION, FIELD_DECL, - get_identifier ("omp_num_threads"), - uint32_type_node); - DECL_CHAIN (id_f12) = id_f11; - - - finish_builtin_struct (*hsa_kernel_dispatch_type, "__hsa_kernel_dispatch", - id_f12, NULL_TREE); - TYPE_ARTIFICIAL (*hsa_kernel_dispatch_type) = 1; - } - - for (tree chain = TYPE_FIELDS (*hsa_kernel_dispatch_type); - chain != NULL_TREE; chain = TREE_CHAIN (chain)) - if (id_equal (DECL_NAME (chain), field_name)) - return int_byte_position (chain); - - gcc_unreachable (); -} - -/* Return an HSA register that will contain number of threads for - a future dispatched kernel. Instructions are added to HBB. */ - -static hsa_op_reg * -gen_num_threads_for_dispatch (hsa_bb *hbb) -{ - /* Step 1) Assign to number of threads: - MIN (HSA_DEFAULT_NUM_THREADS, hsa_num_threads). */ - hsa_op_reg *threads = new hsa_op_reg (hsa_num_threads->m_type); - hsa_op_address *addr = new hsa_op_address (hsa_num_threads); - - hbb->append_insn (new hsa_insn_mem (BRIG_OPCODE_LD, threads->m_type, - threads, addr)); - - hsa_op_immed *limit = new hsa_op_immed (HSA_DEFAULT_NUM_THREADS, - BRIG_TYPE_U32); - hsa_op_reg *r = new hsa_op_reg (BRIG_TYPE_B1); - hsa_insn_cmp * cmp - = new hsa_insn_cmp (BRIG_COMPARE_LT, r->m_type, r, threads, limit); - hbb->append_insn (cmp); - - BrigType16_t btype = hsa_bittype_for_type (threads->m_type); - hsa_op_reg *tmp = new hsa_op_reg (threads->m_type); - - hbb->append_insn (new hsa_insn_basic (4, BRIG_OPCODE_CMOV, btype, tmp, r, - threads, limit)); - - /* Step 2) If the number is equal to zero, - return shadow->omp_num_threads. */ - hsa_op_reg *shadow_reg_ptr = hsa_cfun->get_shadow_reg (); - - hsa_op_reg *shadow_thread_count = new hsa_op_reg (BRIG_TYPE_U32); - addr - = new hsa_op_address (shadow_reg_ptr, - get_hsa_kernel_dispatch_offset ("omp_num_threads")); - hsa_insn_basic *basic - = new hsa_insn_mem (BRIG_OPCODE_LD, shadow_thread_count->m_type, - shadow_thread_count, addr); - hbb->append_insn (basic); - - hsa_op_reg *tmp2 = new hsa_op_reg (threads->m_type); - r = new hsa_op_reg (BRIG_TYPE_B1); - hsa_op_immed *imm = new hsa_op_immed (0, shadow_thread_count->m_type); - hbb->append_insn (new hsa_insn_cmp (BRIG_COMPARE_EQ, r->m_type, r, tmp, imm)); - hbb->append_insn (new hsa_insn_basic (4, BRIG_OPCODE_CMOV, btype, tmp2, r, - shadow_thread_count, tmp)); - - hsa_op_base *dest = tmp2->get_in_type (BRIG_TYPE_U16, hbb); - - return as_a <hsa_op_reg *> (dest); -} - -/* Build OPCODE query for all three hsa dimensions, multiply them and store the - result into DEST. */ - -static void -multiply_grid_dim_characteristics (hsa_op_reg *dest, int opcode, hsa_bb *hbb) -{ - hsa_op_reg *dimx = new hsa_op_reg (BRIG_TYPE_U32); - query_hsa_grid_dim (dimx, opcode, - new hsa_op_immed (0, (BrigKind16_t) BRIG_TYPE_U32), hbb); - hsa_op_reg *dimy = new hsa_op_reg (BRIG_TYPE_U32); - query_hsa_grid_dim (dimy, opcode, - new hsa_op_immed (1, (BrigKind16_t) BRIG_TYPE_U32), hbb); - hsa_op_reg *dimz = new hsa_op_reg (BRIG_TYPE_U32); - query_hsa_grid_dim (dimz, opcode, - new hsa_op_immed (2, (BrigKind16_t) BRIG_TYPE_U32), hbb); - hsa_op_reg *tmp = new hsa_op_reg (dest->m_type); - gen_hsa_binary_operation (BRIG_OPCODE_MUL, tmp, - dimx->get_in_type (dest->m_type, hbb), - dimy->get_in_type (dest->m_type, hbb), hbb); - gen_hsa_binary_operation (BRIG_OPCODE_MUL, dest, tmp, - dimz->get_in_type (dest->m_type, hbb), hbb); -} - -/* Emit instructions that assign number of threads to lhs of gimple STMT. - Instructions are appended to basic block HBB. */ - -static void -gen_get_num_threads (gimple *stmt, hsa_bb *hbb) -{ - if (gimple_call_lhs (stmt) == NULL_TREE) - return; - - hbb->append_insn (new hsa_insn_comment ("omp_get_num_threads")); - tree lhs = gimple_call_lhs (stmt); - hsa_op_reg *dest = hsa_cfun->reg_for_gimple_ssa (lhs); - multiply_grid_dim_characteristics (dest, BRIG_OPCODE_CURRENTWORKGROUPSIZE, - hbb); -} - -/* Emit instructions that assign number of teams to lhs of gimple STMT. - Instructions are appended to basic block HBB. */ - -static void -gen_get_num_teams (gimple *stmt, hsa_bb *hbb) -{ - if (gimple_call_lhs (stmt) == NULL_TREE) - return; - - hbb->append_insn (new hsa_insn_comment ("omp_get_num_teams")); - tree lhs = gimple_call_lhs (stmt); - hsa_op_reg *dest = hsa_cfun->reg_for_gimple_ssa (lhs); - multiply_grid_dim_characteristics (dest, BRIG_OPCODE_GRIDGROUPS, hbb); -} - -/* Emit instructions that assign a team number to lhs of gimple STMT. - Instructions are appended to basic block HBB. */ - -static void -gen_get_team_num (gimple *stmt, hsa_bb *hbb) -{ - if (gimple_call_lhs (stmt) == NULL_TREE) - return; - - hbb->append_insn (new hsa_insn_comment ("omp_get_team_num")); - tree lhs = gimple_call_lhs (stmt); - hsa_op_reg *dest = hsa_cfun->reg_for_gimple_ssa (lhs); - - hsa_op_reg *gnum_x = new hsa_op_reg (BRIG_TYPE_U32); - query_hsa_grid_dim (gnum_x, BRIG_OPCODE_GRIDGROUPS, - new hsa_op_immed (0, (BrigKind16_t) BRIG_TYPE_U32), hbb); - hsa_op_reg *gnum_y = new hsa_op_reg (BRIG_TYPE_U32); - query_hsa_grid_dim (gnum_y, BRIG_OPCODE_GRIDGROUPS, - new hsa_op_immed (1, (BrigKind16_t) BRIG_TYPE_U32), hbb); - - hsa_op_reg *gno_z = new hsa_op_reg (BRIG_TYPE_U32); - query_hsa_grid_dim (gno_z, BRIG_OPCODE_WORKGROUPID, - new hsa_op_immed (2, (BrigKind16_t) BRIG_TYPE_U32), hbb); - - hsa_op_reg *tmp1 = new hsa_op_reg (dest->m_type); - gen_hsa_binary_operation (BRIG_OPCODE_MUL, tmp1, - gnum_x->get_in_type (dest->m_type, hbb), - gnum_y->get_in_type (dest->m_type, hbb), hbb); - hsa_op_reg *tmp2 = new hsa_op_reg (dest->m_type); - gen_hsa_binary_operation (BRIG_OPCODE_MUL, tmp2, tmp1, - gno_z->get_in_type (dest->m_type, hbb), hbb); - - hsa_op_reg *gno_y = new hsa_op_reg (BRIG_TYPE_U32); - query_hsa_grid_dim (gno_y, BRIG_OPCODE_WORKGROUPID, - new hsa_op_immed (1, (BrigKind16_t) BRIG_TYPE_U32), hbb); - hsa_op_reg *tmp3 = new hsa_op_reg (dest->m_type); - gen_hsa_binary_operation (BRIG_OPCODE_MUL, tmp3, - gnum_x->get_in_type (dest->m_type, hbb), - gno_y->get_in_type (dest->m_type, hbb), hbb); - hsa_op_reg *tmp4 = new hsa_op_reg (dest->m_type); - gen_hsa_binary_operation (BRIG_OPCODE_ADD, tmp4, tmp3, tmp2, hbb); - hsa_op_reg *gno_x = new hsa_op_reg (BRIG_TYPE_U32); - query_hsa_grid_dim (gno_x, BRIG_OPCODE_WORKGROUPID, - new hsa_op_immed (0, (BrigKind16_t) BRIG_TYPE_U32), hbb); - gen_hsa_binary_operation (BRIG_OPCODE_ADD, dest, tmp4, - gno_x->get_in_type (dest->m_type, hbb), hbb); -} - -/* Emit instructions that get levels-var ICV to lhs of gimple STMT. - Instructions are appended to basic block HBB. */ - -static void -gen_get_level (gimple *stmt, hsa_bb *hbb) -{ - if (gimple_call_lhs (stmt) == NULL_TREE) - return; - - hbb->append_insn (new hsa_insn_comment ("omp_get_level")); - - tree lhs = gimple_call_lhs (stmt); - hsa_op_reg *dest = hsa_cfun->reg_for_gimple_ssa (lhs); - - hsa_op_reg *shadow_reg_ptr = hsa_cfun->get_shadow_reg (); - if (shadow_reg_ptr == NULL) - { - HSA_SORRY_AT (gimple_location (stmt), - "support for HSA does not implement %<omp_get_level%> " - "called from a function not being inlined within a kernel"); - return; - } - - hsa_op_address *addr - = new hsa_op_address (shadow_reg_ptr, - get_hsa_kernel_dispatch_offset ("omp_level")); - - hsa_insn_mem *mem = new hsa_insn_mem (BRIG_OPCODE_LD, BRIG_TYPE_U64, - (hsa_op_base *) NULL, addr); - hbb->append_insn (mem); - mem->set_output_in_type (dest, 0, hbb); -} - -/* Emit instruction that implement omp_get_max_threads of gimple STMT. */ - -static void -gen_get_max_threads (gimple *stmt, hsa_bb *hbb) -{ - tree lhs = gimple_call_lhs (stmt); - if (!lhs) - return; - - hbb->append_insn (new hsa_insn_comment ("omp_get_max_threads")); - - hsa_op_reg *dest = hsa_cfun->reg_for_gimple_ssa (lhs); - hsa_op_with_type *num_theads_reg = gen_num_threads_for_dispatch (hbb) - ->get_in_type (dest->m_type, hbb); - hsa_build_append_simple_mov (dest, num_theads_reg, hbb); -} - -/* Emit instructions that implement alloca builtin gimple STMT. - Instructions are appended to basic block HBB. */ - -static void -gen_hsa_alloca (gcall *call, hsa_bb *hbb) -{ - tree lhs = gimple_call_lhs (call); - if (lhs == NULL_TREE) - return; - - tree fndecl = gimple_call_fndecl (call); - built_in_function fn = DECL_FUNCTION_CODE (fndecl); - - gcc_checking_assert (ALLOCA_FUNCTION_CODE_P (fn)); - - unsigned bit_alignment = 0; - - if (fn != BUILT_IN_ALLOCA) - { - tree alignment_tree = gimple_call_arg (call, 1); - if (TREE_CODE (alignment_tree) != INTEGER_CST) - { - HSA_SORRY_ATV (gimple_location (call), - "support for HSA does not implement " - "%qD with a non-constant alignment %E", - fndecl, alignment_tree); - } - - bit_alignment = tree_to_uhwi (alignment_tree); - } - - tree rhs1 = gimple_call_arg (call, 0); - hsa_op_with_type *size = hsa_reg_or_immed_for_gimple_op (rhs1, hbb) - ->get_in_type (BRIG_TYPE_U32, hbb); - hsa_op_with_type *dest = hsa_cfun->reg_for_gimple_ssa (lhs); - - hsa_op_reg *tmp - = new hsa_op_reg (hsa_get_segment_addr_type (BRIG_SEGMENT_PRIVATE)); - hsa_insn_alloca *a = new hsa_insn_alloca (tmp, size, bit_alignment); - hbb->append_insn (a); - - hsa_insn_seg *seg - = new hsa_insn_seg (BRIG_OPCODE_STOF, - hsa_get_segment_addr_type (BRIG_SEGMENT_FLAT), - tmp->m_type, BRIG_SEGMENT_PRIVATE, dest, tmp); - hbb->append_insn (seg); -} - -/* Emit instructions that implement clrsb builtin STMT: - Returns the number of leading redundant sign bits in x, i.e. the number - of bits following the most significant bit that are identical to it. - There are no special cases for 0 or other values. - Instructions are appended to basic block HBB. */ - -static void -gen_hsa_clrsb (gcall *call, hsa_bb *hbb) -{ - tree lhs = gimple_call_lhs (call); - if (lhs == NULL_TREE) - return; - - hsa_op_reg *dest = hsa_cfun->reg_for_gimple_ssa (lhs); - tree rhs1 = gimple_call_arg (call, 0); - hsa_op_with_type *arg = hsa_reg_or_immed_for_gimple_op (rhs1, hbb); - arg->extend_int_to_32bit (hbb); - BrigType16_t bittype = hsa_bittype_for_type (arg->m_type); - unsigned bitsize = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (rhs1))); - - /* FIRSTBIT instruction is defined just for 32 and 64-bits wide integers. */ - gcc_checking_assert (bitsize == 32 || bitsize == 64); - - /* Set true to MOST_SIG if the most significant bit is set to one. */ - hsa_op_immed *c = new hsa_op_immed (1ul << (bitsize - 1), - hsa_uint_for_bitsize (bitsize)); - - hsa_op_reg *and_reg = new hsa_op_reg (bittype); - gen_hsa_binary_operation (BRIG_OPCODE_AND, and_reg, arg, c, hbb); - - hsa_op_reg *most_sign = new hsa_op_reg (BRIG_TYPE_B1); - hsa_insn_cmp *cmp - = new hsa_insn_cmp (BRIG_COMPARE_EQ, most_sign->m_type, most_sign, - and_reg, c); - hbb->append_insn (cmp); - - /* If the most significant bit is one, negate the input. Otherwise - shift the input value to left by one bit. */ - hsa_op_reg *arg_neg = new hsa_op_reg (arg->m_type); - gen_hsa_unary_operation (BRIG_OPCODE_NEG, arg_neg, arg, hbb); - - hsa_op_reg *shifted_arg = new hsa_op_reg (arg->m_type); - gen_hsa_binary_operation (BRIG_OPCODE_SHL, shifted_arg, arg, - new hsa_op_immed (1, BRIG_TYPE_U64), hbb); - - /* Assign the value that can be used for FIRSTBIT instruction according - to the most significant bit. */ - hsa_op_reg *tmp = new hsa_op_reg (bittype); - hsa_insn_basic *cmov - = new hsa_insn_basic (4, BRIG_OPCODE_CMOV, bittype, tmp, most_sign, - arg_neg, shifted_arg); - hbb->append_insn (cmov); - - hsa_op_reg *leading_bits = new hsa_op_reg (BRIG_TYPE_S32); - gen_hsa_unary_operation (BRIG_OPCODE_FIRSTBIT, leading_bits, - tmp->get_in_type (hsa_uint_for_bitsize (bitsize), - hbb), hbb); - - /* Set flag if the input value is equal to zero. */ - hsa_op_reg *is_zero = new hsa_op_reg (BRIG_TYPE_B1); - cmp = new hsa_insn_cmp (BRIG_COMPARE_EQ, is_zero->m_type, is_zero, arg, - new hsa_op_immed (0, arg->m_type)); - hbb->append_insn (cmp); - - /* Return the number of leading bits, - or (bitsize - 1) if the input value is zero. */ - cmov = new hsa_insn_basic (4, BRIG_OPCODE_CMOV, BRIG_TYPE_B32, NULL, is_zero, - new hsa_op_immed (bitsize - 1, BRIG_TYPE_U32), - leading_bits->get_in_type (BRIG_TYPE_B32, hbb)); - hbb->append_insn (cmov); - cmov->set_output_in_type (dest, 0, hbb); -} - -/* Emit instructions that implement ffs builtin STMT: - Returns one plus the index of the least significant 1-bit of x, - or if x is zero, returns zero. - Instructions are appended to basic block HBB. */ - -static void -gen_hsa_ffs (gcall *call, hsa_bb *hbb) -{ - tree lhs = gimple_call_lhs (call); - if (lhs == NULL_TREE) - return; - - hsa_op_reg *dest = hsa_cfun->reg_for_gimple_ssa (lhs); - - tree rhs1 = gimple_call_arg (call, 0); - hsa_op_with_type *arg = hsa_reg_or_immed_for_gimple_op (rhs1, hbb); - arg = arg->extend_int_to_32bit (hbb); - - hsa_op_reg *tmp = new hsa_op_reg (BRIG_TYPE_U32); - hsa_insn_srctype *insn = new hsa_insn_srctype (2, BRIG_OPCODE_LASTBIT, - tmp->m_type, arg->m_type, - tmp, arg); - hbb->append_insn (insn); - - hsa_insn_basic *addition - = new hsa_insn_basic (3, BRIG_OPCODE_ADD, tmp->m_type, NULL, tmp, - new hsa_op_immed (1, tmp->m_type)); - hbb->append_insn (addition); - addition->set_output_in_type (dest, 0, hbb); -} - -static void -gen_hsa_popcount_to_dest (hsa_op_reg *dest, hsa_op_with_type *arg, hsa_bb *hbb) -{ - gcc_checking_assert (hsa_type_integer_p (arg->m_type)); - - if (hsa_type_bit_size (arg->m_type) < 32) - arg = arg->get_in_type (BRIG_TYPE_B32, hbb); - - BrigType16_t srctype = hsa_bittype_for_type (arg->m_type); - if (!hsa_btype_p (arg->m_type)) - arg = arg->get_in_type (srctype, hbb); - - hsa_insn_srctype *popcount - = new hsa_insn_srctype (2, BRIG_OPCODE_POPCOUNT, BRIG_TYPE_U32, - srctype, NULL, arg); - hbb->append_insn (popcount); - popcount->set_output_in_type (dest, 0, hbb); -} - -/* Emit instructions that implement parity builtin STMT: - Returns the parity of x, i.e. the number of 1-bits in x modulo 2. - Instructions are appended to basic block HBB. */ - -static void -gen_hsa_parity (gcall *call, hsa_bb *hbb) -{ - tree lhs = gimple_call_lhs (call); - if (lhs == NULL_TREE) - return; - - hsa_op_reg *dest = hsa_cfun->reg_for_gimple_ssa (lhs); - tree rhs1 = gimple_call_arg (call, 0); - hsa_op_with_type *arg = hsa_reg_or_immed_for_gimple_op (rhs1, hbb); - - hsa_op_reg *popcount = new hsa_op_reg (BRIG_TYPE_U32); - gen_hsa_popcount_to_dest (popcount, arg, hbb); - - hsa_insn_basic *insn - = new hsa_insn_basic (3, BRIG_OPCODE_REM, popcount->m_type, NULL, popcount, - new hsa_op_immed (2, popcount->m_type)); - hbb->append_insn (insn); - insn->set_output_in_type (dest, 0, hbb); -} - -/* Emit instructions that implement popcount builtin STMT. - Instructions are appended to basic block HBB. */ - -static void -gen_hsa_popcount (gcall *call, hsa_bb *hbb) -{ - tree lhs = gimple_call_lhs (call); - if (lhs == NULL_TREE) - return; - - hsa_op_reg *dest = hsa_cfun->reg_for_gimple_ssa (lhs); - tree rhs1 = gimple_call_arg (call, 0); - hsa_op_with_type *arg = hsa_reg_or_immed_for_gimple_op (rhs1, hbb); - - gen_hsa_popcount_to_dest (dest, arg, hbb); -} - -/* Emit instructions that implement DIVMOD builtin STMT. - Instructions are appended to basic block HBB. */ - -static void -gen_hsa_divmod (gcall *call, hsa_bb *hbb) -{ - tree lhs = gimple_call_lhs (call); - if (lhs == NULL_TREE) - return; - - tree rhs0 = gimple_call_arg (call, 0); - tree rhs1 = gimple_call_arg (call, 1); - - hsa_op_with_type *arg0 = hsa_reg_or_immed_for_gimple_op (rhs0, hbb); - arg0 = arg0->extend_int_to_32bit (hbb); - hsa_op_with_type *arg1 = hsa_reg_or_immed_for_gimple_op (rhs1, hbb); - arg1 = arg1->extend_int_to_32bit (hbb); - - hsa_op_reg *dest0 = new hsa_op_reg (arg0->m_type); - hsa_op_reg *dest1 = new hsa_op_reg (arg1->m_type); - - hsa_insn_basic *insn = new hsa_insn_basic (3, BRIG_OPCODE_DIV, dest0->m_type, - dest0, arg0, arg1); - hbb->append_insn (insn); - insn = new hsa_insn_basic (3, BRIG_OPCODE_REM, dest1->m_type, dest1, arg0, - arg1); - hbb->append_insn (insn); - - hsa_op_reg *dest = hsa_cfun->reg_for_gimple_ssa (lhs); - BrigType16_t dst_type = hsa_extend_inttype_to_32bit (dest->m_type); - BrigType16_t src_type = hsa_bittype_for_type (dest0->m_type); - - insn = new hsa_insn_packed (3, BRIG_OPCODE_COMBINE, dst_type, - src_type, NULL, dest0, dest1); - hbb->append_insn (insn); - insn->set_output_in_type (dest, 0, hbb); -} - -/* Emit instructions that implement FMA, FMS, FNMA or FNMS call STMT. - Instructions are appended to basic block HBB. NEGATE1 is true for - FNMA and FNMS. NEGATE3 is true for FMS and FNMS. */ - -static void -gen_hsa_fma (gcall *call, hsa_bb *hbb, bool negate1, bool negate3) -{ - tree lhs = gimple_call_lhs (call); - if (lhs == NULL_TREE) - return; - - tree rhs1 = gimple_call_arg (call, 0); - tree rhs2 = gimple_call_arg (call, 1); - tree rhs3 = gimple_call_arg (call, 2); - - hsa_op_reg *dest = hsa_cfun->reg_for_gimple_ssa (lhs); - hsa_op_with_type *op1 = hsa_reg_or_immed_for_gimple_op (rhs1, hbb); - hsa_op_with_type *op2 = hsa_reg_or_immed_for_gimple_op (rhs2, hbb); - hsa_op_with_type *op3 = hsa_reg_or_immed_for_gimple_op (rhs3, hbb); - - if (negate1) - { - hsa_op_reg *tmp = new hsa_op_reg (dest->m_type); - gen_hsa_unary_operation (BRIG_OPCODE_NEG, tmp, op1, hbb); - op1 = tmp; - } - - /* There is a native HSA instruction for scalar FMAs but not for vector - ones. */ - if (TREE_CODE (TREE_TYPE (lhs)) == VECTOR_TYPE) - { - hsa_op_reg *tmp = new hsa_op_reg (dest->m_type); - gen_hsa_binary_operation (BRIG_OPCODE_MUL, tmp, op1, op2, hbb); - gen_hsa_binary_operation (negate3 ? BRIG_OPCODE_SUB : BRIG_OPCODE_ADD, - dest, tmp, op3, hbb); - } - else - { - if (negate3) - { - hsa_op_reg *tmp = new hsa_op_reg (dest->m_type); - gen_hsa_unary_operation (BRIG_OPCODE_NEG, tmp, op3, hbb); - op3 = tmp; - } - hsa_insn_basic *insn = new hsa_insn_basic (4, BRIG_OPCODE_MAD, - dest->m_type, dest, - op1, op2, op3); - hbb->append_insn (insn); - } -} - -/* Set VALUE to a shadow kernel debug argument and append a new instruction - to HBB basic block. */ - -static void -set_debug_value (hsa_bb *hbb, hsa_op_with_type *value) -{ - hsa_op_reg *shadow_reg_ptr = hsa_cfun->get_shadow_reg (); - if (shadow_reg_ptr == NULL) - return; - - hsa_op_address *addr - = new hsa_op_address (shadow_reg_ptr, - get_hsa_kernel_dispatch_offset ("debug")); - hsa_insn_mem *mem = new hsa_insn_mem (BRIG_OPCODE_ST, BRIG_TYPE_U64, value, - addr); - hbb->append_insn (mem); -} - -void -omp_simple_builtin::generate (gimple *stmt, hsa_bb *hbb) -{ - if (m_sorry) - { - if (m_warning_message) - HSA_SORRY_AT (gimple_location (stmt), m_warning_message); - else - HSA_SORRY_ATV (gimple_location (stmt), - "support for HSA does not implement calls to %qs", - m_name); - } - else if (m_warning_message != NULL) - warning_at (gimple_location (stmt), OPT_Whsa, m_warning_message); - - if (m_return_value != NULL) - { - tree lhs = gimple_call_lhs (stmt); - if (!lhs) - return; - - hbb->append_insn (new hsa_insn_comment (m_name)); - - hsa_op_reg *dest = hsa_cfun->reg_for_gimple_ssa (lhs); - hsa_op_with_type *op = m_return_value->get_in_type (dest->m_type, hbb); - hsa_build_append_simple_mov (dest, op, hbb); - } -} - -/* If STMT is a call of a known library function, generate code to perform - it and return true. */ - -static bool -gen_hsa_insns_for_known_library_call (gimple *stmt, hsa_bb *hbb) -{ - bool handled = false; - const char *name = hsa_get_declaration_name (gimple_call_fndecl (stmt)); - - char *copy = NULL; - size_t len = strlen (name); - if (len > 0 && name[len - 1] == '_') - { - copy = XNEWVEC (char, len + 1); - strcpy (copy, name); - copy[len - 1] = '\0'; - name = copy; - } - - /* Handle omp_* routines. */ - if (strstr (name, "omp_") == name) - { - hsa_init_simple_builtins (); - omp_simple_builtin *builtin = omp_simple_builtins->get (name); - if (builtin) - { - builtin->generate (stmt, hbb); - return true; - } - - handled = true; - if (strcmp (name, "omp_set_num_threads") == 0) - gen_set_num_threads (gimple_call_arg (stmt, 0), hbb); - else if (strcmp (name, "omp_get_thread_num") == 0) - { - hbb->append_insn (new hsa_insn_comment (name)); - query_hsa_grid_nodim (stmt, BRIG_OPCODE_WORKITEMFLATABSID, hbb); - } - else if (strcmp (name, "omp_get_num_threads") == 0) - { - hbb->append_insn (new hsa_insn_comment (name)); - gen_get_num_threads (stmt, hbb); - } - else if (strcmp (name, "omp_get_num_teams") == 0) - gen_get_num_teams (stmt, hbb); - else if (strcmp (name, "omp_get_team_num") == 0) - gen_get_team_num (stmt, hbb); - else if (strcmp (name, "omp_get_level") == 0) - gen_get_level (stmt, hbb); - else if (strcmp (name, "omp_get_active_level") == 0) - gen_get_level (stmt, hbb); - else if (strcmp (name, "omp_in_parallel") == 0) - gen_get_level (stmt, hbb); - else if (strcmp (name, "omp_get_max_threads") == 0) - gen_get_max_threads (stmt, hbb); - else - handled = false; - - if (handled) - { - if (copy) - free (copy); - return true; - } - } - - if (strcmp (name, "__hsa_set_debug_value") == 0) - { - handled = true; - if (hsa_cfun->has_shadow_reg_p ()) - { - tree rhs1 = gimple_call_arg (stmt, 0); - hsa_op_with_type *src = hsa_reg_or_immed_for_gimple_op (rhs1, hbb); - - src = src->get_in_type (BRIG_TYPE_U64, hbb); - set_debug_value (hbb, src); - } - } - - if (copy) - free (copy); - return handled; -} - -/* Helper functions to create a single unary HSA operations out of calls to - builtins. OPCODE is the HSA operation to be generated. STMT is a gimple - call to a builtin. HBB is the HSA BB to which the instruction should be - added. Note that nothing will be created if STMT does not have a LHS. */ - -static void -gen_hsa_unaryop_for_builtin (BrigOpcode opcode, gimple *stmt, hsa_bb *hbb) -{ - tree lhs = gimple_call_lhs (stmt); - if (!lhs) - return; - hsa_op_reg *dest = hsa_cfun->reg_for_gimple_ssa (lhs); - hsa_op_with_type *op - = hsa_reg_or_immed_for_gimple_op (gimple_call_arg (stmt, 0), hbb); - gen_hsa_unary_operation (opcode, dest, op, hbb); -} - -/* Helper functions to create a call to standard library if LHS of the - STMT is used. HBB is the HSA BB to which the instruction should be - added. */ - -static void -gen_hsa_unaryop_builtin_call (gimple *stmt, hsa_bb *hbb) -{ - tree lhs = gimple_call_lhs (stmt); - if (!lhs) - return; - - if (gimple_call_internal_p (stmt)) - gen_hsa_insns_for_call_of_internal_fn (stmt, hbb); - else - gen_hsa_insns_for_direct_call (stmt, hbb); -} - -/* Helper functions to create a single unary HSA operations out of calls to - builtins (if unsafe math optimizations are enable). Otherwise, create - a call to standard library function. - OPCODE is the HSA operation to be generated. STMT is a gimple - call to a builtin. HBB is the HSA BB to which the instruction should be - added. Note that nothing will be created if STMT does not have a LHS. */ - -static void -gen_hsa_unaryop_or_call_for_builtin (BrigOpcode opcode, gimple *stmt, - hsa_bb *hbb) -{ - if (flag_unsafe_math_optimizations) - gen_hsa_unaryop_for_builtin (opcode, stmt, hbb); - else - gen_hsa_unaryop_builtin_call (stmt, hbb); -} - -/* Generate HSA address corresponding to a value VAL (as opposed to a memory - reference tree), for example an SSA_NAME or an ADDR_EXPR. HBB is the HSA BB - to which the instruction should be added. */ - -static hsa_op_address * -get_address_from_value (tree val, hsa_bb *hbb) -{ - switch (TREE_CODE (val)) - { - case SSA_NAME: - { - BrigType16_t addrtype = hsa_get_segment_addr_type (BRIG_SEGMENT_FLAT); - hsa_op_base *reg - = hsa_cfun->reg_for_gimple_ssa (val)->get_in_type (addrtype, hbb); - return new hsa_op_address (NULL, as_a <hsa_op_reg *> (reg), 0); - } - case ADDR_EXPR: - return gen_hsa_addr (TREE_OPERAND (val, 0), hbb); - - case INTEGER_CST: - if (tree_fits_shwi_p (val)) - return new hsa_op_address (NULL, NULL, tree_to_shwi (val)); - /* fall-through */ - - default: - HSA_SORRY_ATV (EXPR_LOCATION (val), - "support for HSA does not implement memory access to %E", - val); - return new hsa_op_address (NULL, NULL, 0); - } -} - -/* Expand assignment of a result of a string BUILTIN to DST. - Size of the operation is N bytes, where instructions - will be append to HBB. */ - -static void -expand_lhs_of_string_op (gimple *stmt, - unsigned HOST_WIDE_INT n, hsa_bb *hbb, - enum built_in_function builtin) -{ - /* If LHS is expected, we need to emit a PHI instruction. */ - tree lhs = gimple_call_lhs (stmt); - if (!lhs) - return; - - hsa_op_reg *lhs_reg = hsa_cfun->reg_for_gimple_ssa (lhs); - - hsa_op_with_type *dst_reg - = hsa_reg_or_immed_for_gimple_op (gimple_call_arg (stmt, 0), hbb); - hsa_op_with_type *tmp; - - switch (builtin) - { - case BUILT_IN_MEMPCPY: - { - tmp = new hsa_op_reg (dst_reg->m_type); - hsa_insn_basic *add - = new hsa_insn_basic (3, BRIG_OPCODE_ADD, tmp->m_type, - tmp, dst_reg, - new hsa_op_immed (n, dst_reg->m_type)); - hbb->append_insn (add); - break; - } - case BUILT_IN_MEMCPY: - case BUILT_IN_MEMSET: - tmp = dst_reg; - break; - default: - gcc_unreachable (); - } - - hbb->append_insn (new hsa_insn_basic (2, BRIG_OPCODE_MOV, lhs_reg->m_type, - lhs_reg, tmp)); -} - -#define HSA_MEMORY_BUILTINS_LIMIT 128 - -/* Expand a string builtin (from a gimple STMT) in a way that - according to MISALIGNED_FLAG we process either direct emission - (a bunch of memory load and store instructions), or we emit a function call - of a library function (for instance 'memcpy'). Actually, a basic block - for direct emission is just prepared, where caller is responsible - for emission of corresponding instructions. - All instruction are appended to HBB. */ - -hsa_bb * -expand_string_operation_builtin (gimple *stmt, hsa_bb *hbb, - hsa_op_reg *misaligned_flag) -{ - edge e = split_block (hbb->m_bb, stmt); - basic_block condition_bb = e->src; - hbb->append_insn (new hsa_insn_cbr (misaligned_flag)); - - /* Prepare the control flow. */ - edge condition_edge = EDGE_SUCC (condition_bb, 0); - basic_block call_bb = split_edge (condition_edge); - - basic_block expanded_bb = split_edge (EDGE_SUCC (call_bb, 0)); - basic_block cont_bb = EDGE_SUCC (expanded_bb, 0)->dest; - basic_block merge_bb = split_edge (EDGE_PRED (cont_bb, 0)); - - condition_edge->flags &= ~EDGE_FALLTHRU; - condition_edge->flags |= EDGE_TRUE_VALUE; - make_edge (condition_bb, expanded_bb, EDGE_FALSE_VALUE); - - redirect_edge_succ (EDGE_SUCC (call_bb, 0), merge_bb); - - hsa_cfun->m_modified_cfg = true; - - hsa_init_new_bb (expanded_bb); - - /* Slow path: function call. */ - gen_hsa_insns_for_direct_call (stmt, hsa_init_new_bb (call_bb), false); - - return hsa_bb_for_bb (expanded_bb); -} - -/* Expand a memory copy BUILTIN (BUILT_IN_MEMCPY, BUILT_IN_MEMPCPY) from - a gimple STMT and store all necessary instruction to HBB basic block. */ - -static void -expand_memory_copy (gimple *stmt, hsa_bb *hbb, enum built_in_function builtin) -{ - tree byte_size = gimple_call_arg (stmt, 2); - - if (!tree_fits_uhwi_p (byte_size)) - { - gen_hsa_insns_for_direct_call (stmt, hbb); - return; - } - - unsigned HOST_WIDE_INT n = tree_to_uhwi (byte_size); - - if (n > HSA_MEMORY_BUILTINS_LIMIT) - { - gen_hsa_insns_for_direct_call (stmt, hbb); - return; - } - - tree dst = gimple_call_arg (stmt, 0); - tree src = gimple_call_arg (stmt, 1); - - hsa_op_address *dst_addr = get_address_from_value (dst, hbb); - hsa_op_address *src_addr = get_address_from_value (src, hbb); - - /* As gen_hsa_memory_copy relies on memory alignment - greater or equal to 8 bytes, we need to verify the alignment. */ - BrigType16_t addrtype = hsa_get_segment_addr_type (BRIG_SEGMENT_FLAT); - hsa_op_reg *src_addr_reg = new hsa_op_reg (addrtype); - hsa_op_reg *dst_addr_reg = new hsa_op_reg (addrtype); - - convert_addr_to_flat_segment (src_addr, src_addr_reg, hbb); - convert_addr_to_flat_segment (dst_addr, dst_addr_reg, hbb); - - /* Process BIT OR for source and destination addresses. */ - hsa_op_reg *or_reg = new hsa_op_reg (addrtype); - gen_hsa_binary_operation (BRIG_OPCODE_OR, or_reg, src_addr_reg, - dst_addr_reg, hbb); - - /* Process BIT AND with 0x7 to identify the desired alignment - of 8 bytes. */ - hsa_op_reg *masked = new hsa_op_reg (addrtype); - - gen_hsa_binary_operation (BRIG_OPCODE_AND, masked, or_reg, - new hsa_op_immed (7, addrtype), hbb); - - hsa_op_reg *misaligned = new hsa_op_reg (BRIG_TYPE_B1); - hbb->append_insn (new hsa_insn_cmp (BRIG_COMPARE_NE, misaligned->m_type, - misaligned, masked, - new hsa_op_immed (0, masked->m_type))); - - hsa_bb *native_impl_bb - = expand_string_operation_builtin (stmt, hbb, misaligned); - - gen_hsa_memory_copy (native_impl_bb, dst_addr, src_addr, n, BRIG_ALIGNMENT_8); - hsa_bb *merge_bb - = hsa_init_new_bb (EDGE_SUCC (native_impl_bb->m_bb, 0)->dest); - expand_lhs_of_string_op (stmt, n, merge_bb, builtin); -} - - -/* Expand a memory set BUILTIN (BUILT_IN_MEMSET, BUILT_IN_BZERO) from - a gimple STMT and store all necessary instruction to HBB basic block. - The operation set N bytes with a CONSTANT value. */ - -static void -expand_memory_set (gimple *stmt, unsigned HOST_WIDE_INT n, - unsigned HOST_WIDE_INT constant, hsa_bb *hbb, - enum built_in_function builtin) -{ - tree dst = gimple_call_arg (stmt, 0); - hsa_op_address *dst_addr = get_address_from_value (dst, hbb); - - /* As gen_hsa_memory_set relies on memory alignment - greater or equal to 8 bytes, we need to verify the alignment. */ - BrigType16_t addrtype = hsa_get_segment_addr_type (BRIG_SEGMENT_FLAT); - hsa_op_reg *dst_addr_reg = new hsa_op_reg (addrtype); - convert_addr_to_flat_segment (dst_addr, dst_addr_reg, hbb); - - /* Process BIT AND with 0x7 to identify the desired alignment - of 8 bytes. */ - hsa_op_reg *masked = new hsa_op_reg (addrtype); - - gen_hsa_binary_operation (BRIG_OPCODE_AND, masked, dst_addr_reg, - new hsa_op_immed (7, addrtype), hbb); - - hsa_op_reg *misaligned = new hsa_op_reg (BRIG_TYPE_B1); - hbb->append_insn (new hsa_insn_cmp (BRIG_COMPARE_NE, misaligned->m_type, - misaligned, masked, - new hsa_op_immed (0, masked->m_type))); - - hsa_bb *native_impl_bb - = expand_string_operation_builtin (stmt, hbb, misaligned); - - gen_hsa_memory_set (native_impl_bb, dst_addr, constant, n, BRIG_ALIGNMENT_8); - hsa_bb *merge_bb - = hsa_init_new_bb (EDGE_SUCC (native_impl_bb->m_bb, 0)->dest); - expand_lhs_of_string_op (stmt, n, merge_bb, builtin); -} - -/* Store into MEMORDER the memory order specified by tree T, which must be an - integer constant representing a C++ memory order. If it isn't, issue an HSA - sorry message using LOC and return true, otherwise return false and store - the name of the requested order to *MNAME. */ - -static bool -hsa_memorder_from_tree (tree t, BrigMemoryOrder *memorder, const char **mname, - location_t loc) -{ - if (!tree_fits_uhwi_p (t)) - { - HSA_SORRY_ATV (loc, "support for HSA does not implement memory model %E", - t); - return true; - } - - unsigned HOST_WIDE_INT mm = tree_to_uhwi (t); - switch (mm & MEMMODEL_BASE_MASK) - { - case MEMMODEL_RELAXED: - *memorder = BRIG_MEMORY_ORDER_RELAXED; - *mname = "relaxed"; - break; - case MEMMODEL_CONSUME: - /* HSA does not have an equivalent, but we can use the slightly stronger - ACQUIRE. */ - *memorder = BRIG_MEMORY_ORDER_SC_ACQUIRE; - *mname = "consume"; - break; - case MEMMODEL_ACQUIRE: - *memorder = BRIG_MEMORY_ORDER_SC_ACQUIRE; - *mname = "acquire"; - break; - case MEMMODEL_RELEASE: - *memorder = BRIG_MEMORY_ORDER_SC_RELEASE; - *mname = "release"; - break; - case MEMMODEL_ACQ_REL: - *memorder = BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE; - *mname = "acq_rel"; - break; - case MEMMODEL_SEQ_CST: - /* Callers implementing a simple load or store need to remove the release - or acquire part respectively. */ - *memorder = BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE; - *mname = "seq_cst"; - break; - default: - { - HSA_SORRY_AT (loc, "support for HSA does not implement the specified " - "memory model"); - return true; - } - } - return false; -} - -/* Helper function to create an HSA atomic operation instruction out of calls - to atomic builtins. RET_ORIG is true if the built-in is the variant that - return s the value before applying operation, and false if it should return - the value after applying the operation (if it returns value at all). ACODE - is the atomic operation code, STMT is a gimple call to a builtin. HBB is - the HSA BB to which the instruction should be added. If SIGNAL is true, the - created operation will work on HSA signals rather than atomic variables. */ - -static void -gen_hsa_atomic_for_builtin (bool ret_orig, enum BrigAtomicOperation acode, - gimple *stmt, hsa_bb *hbb, bool signal) -{ - tree lhs = gimple_call_lhs (stmt); - - tree type = TREE_TYPE (gimple_call_arg (stmt, 1)); - BrigType16_t hsa_type = hsa_type_for_scalar_tree_type (type, false); - BrigType16_t mtype = mem_type_for_type (hsa_type); - BrigMemoryOrder memorder; - const char *mmname; - - if (hsa_memorder_from_tree (gimple_call_arg (stmt, 2), &memorder, &mmname, - gimple_location (stmt))) - return; - - /* Certain atomic insns must have Bx memory types. */ - switch (acode) - { - case BRIG_ATOMIC_LD: - case BRIG_ATOMIC_ST: - case BRIG_ATOMIC_AND: - case BRIG_ATOMIC_OR: - case BRIG_ATOMIC_XOR: - case BRIG_ATOMIC_EXCH: - mtype = hsa_bittype_for_type (mtype); - break; - default: - break; - } - - hsa_op_reg *dest; - int nops, opcode; - if (lhs) - { - if (ret_orig) - dest = hsa_cfun->reg_for_gimple_ssa (lhs); - else - dest = new hsa_op_reg (hsa_type); - opcode = signal ? BRIG_OPCODE_SIGNAL : BRIG_OPCODE_ATOMIC; - nops = 3; - } - else - { - dest = NULL; - opcode = signal ? BRIG_OPCODE_SIGNALNORET : BRIG_OPCODE_ATOMICNORET; - nops = 2; - } - - if (acode == BRIG_ATOMIC_ST) - { - if (memorder == BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE) - memorder = BRIG_MEMORY_ORDER_SC_RELEASE; - - if (memorder != BRIG_MEMORY_ORDER_RELAXED - && memorder != BRIG_MEMORY_ORDER_SC_RELEASE - && memorder != BRIG_MEMORY_ORDER_NONE) - { - HSA_SORRY_ATV (gimple_location (stmt), - "support for HSA does not implement memory model for " - "%<ATOMIC_ST%>: %s", mmname); - return; - } - } - - hsa_insn_basic *atominsn; - hsa_op_base *tgt; - if (signal) - { - atominsn = new hsa_insn_signal (nops, opcode, acode, mtype, memorder); - tgt = hsa_reg_or_immed_for_gimple_op (gimple_call_arg (stmt, 0), hbb); - } - else - { - atominsn = new hsa_insn_atomic (nops, opcode, acode, mtype, memorder); - hsa_op_address *addr; - addr = get_address_from_value (gimple_call_arg (stmt, 0), hbb); - if (addr->m_symbol && addr->m_symbol->m_segment == BRIG_SEGMENT_PRIVATE) - { - HSA_SORRY_AT (gimple_location (stmt), - "HSA does not implement atomic operations in private " - "segment"); - return; - } - tgt = addr; - } - - hsa_op_with_type *op - = hsa_reg_or_immed_for_gimple_op (gimple_call_arg (stmt, 1), hbb); - if (lhs) - { - atominsn->set_op (0, dest); - atominsn->set_op (1, tgt); - atominsn->set_op (2, op); - } - else - { - atominsn->set_op (0, tgt); - atominsn->set_op (1, op); - } - - hbb->append_insn (atominsn); - - /* HSA does not natively support the variants that return the modified value, - so re-do the operation again non-atomically if that is what was - requested. */ - if (lhs && !ret_orig) - { - int arith; - switch (acode) - { - case BRIG_ATOMIC_ADD: - arith = BRIG_OPCODE_ADD; - break; - case BRIG_ATOMIC_AND: - arith = BRIG_OPCODE_AND; - break; - case BRIG_ATOMIC_OR: - arith = BRIG_OPCODE_OR; - break; - case BRIG_ATOMIC_SUB: - arith = BRIG_OPCODE_SUB; - break; - case BRIG_ATOMIC_XOR: - arith = BRIG_OPCODE_XOR; - break; - default: - gcc_unreachable (); - } - hsa_op_reg *real_dest = hsa_cfun->reg_for_gimple_ssa (lhs); - gen_hsa_binary_operation (arith, real_dest, dest, op, hbb); - } -} - -/* Generate HSA instructions for an internal fn. - Instructions will be appended to HBB, which also needs to be the - corresponding structure to the basic_block of STMT. */ - -static void -gen_hsa_insn_for_internal_fn_call (gcall *stmt, hsa_bb *hbb) -{ - gcc_checking_assert (gimple_call_internal_fn (stmt)); - internal_fn fn = gimple_call_internal_fn (stmt); - - bool is_float_type_p = false; - if (gimple_call_lhs (stmt) != NULL - && TREE_TYPE (gimple_call_lhs (stmt)) == float_type_node) - is_float_type_p = true; - - switch (fn) - { - case IFN_CEIL: - gen_hsa_unaryop_for_builtin (BRIG_OPCODE_CEIL, stmt, hbb); - break; - - case IFN_FLOOR: - gen_hsa_unaryop_for_builtin (BRIG_OPCODE_FLOOR, stmt, hbb); - break; - - case IFN_RINT: - gen_hsa_unaryop_for_builtin (BRIG_OPCODE_RINT, stmt, hbb); - break; - - case IFN_SQRT: - gen_hsa_unaryop_for_builtin (BRIG_OPCODE_SQRT, stmt, hbb); - break; - - case IFN_RSQRT: - gen_hsa_unaryop_for_builtin (BRIG_OPCODE_NRSQRT, stmt, hbb); - break; - - case IFN_TRUNC: - gen_hsa_unaryop_for_builtin (BRIG_OPCODE_TRUNC, stmt, hbb); - break; - - case IFN_COS: - { - if (is_float_type_p) - gen_hsa_unaryop_or_call_for_builtin (BRIG_OPCODE_NCOS, stmt, hbb); - else - gen_hsa_unaryop_builtin_call (stmt, hbb); - - break; - } - case IFN_EXP2: - { - if (is_float_type_p) - gen_hsa_unaryop_or_call_for_builtin (BRIG_OPCODE_NEXP2, stmt, hbb); - else - gen_hsa_unaryop_builtin_call (stmt, hbb); - - break; - } - - case IFN_LOG2: - { - if (is_float_type_p) - gen_hsa_unaryop_or_call_for_builtin (BRIG_OPCODE_NLOG2, stmt, hbb); - else - gen_hsa_unaryop_builtin_call (stmt, hbb); - - break; - } - - case IFN_SIN: - { - if (is_float_type_p) - gen_hsa_unaryop_or_call_for_builtin (BRIG_OPCODE_NSIN, stmt, hbb); - else - gen_hsa_unaryop_builtin_call (stmt, hbb); - break; - } - - case IFN_CLRSB: - gen_hsa_clrsb (stmt, hbb); - break; - - case IFN_CLZ: - gen_hsa_unaryop_for_builtin (BRIG_OPCODE_FIRSTBIT, stmt, hbb); - break; - - case IFN_CTZ: - gen_hsa_unaryop_for_builtin (BRIG_OPCODE_LASTBIT, stmt, hbb); - break; - - case IFN_FFS: - gen_hsa_ffs (stmt, hbb); - break; - - case IFN_PARITY: - gen_hsa_parity (stmt, hbb); - break; - - case IFN_POPCOUNT: - gen_hsa_popcount (stmt, hbb); - break; - - case IFN_DIVMOD: - gen_hsa_divmod (stmt, hbb); - break; - - case IFN_ACOS: - case IFN_ASIN: - case IFN_ATAN: - case IFN_EXP: - case IFN_EXP10: - case IFN_EXPM1: - case IFN_LOG: - case IFN_LOG10: - case IFN_LOG1P: - case IFN_LOGB: - case IFN_SIGNIFICAND: - case IFN_TAN: - case IFN_NEARBYINT: - case IFN_ROUND: - case IFN_ATAN2: - case IFN_COPYSIGN: - case IFN_FMOD: - case IFN_POW: - case IFN_REMAINDER: - case IFN_SCALB: - case IFN_FMIN: - case IFN_FMAX: - gen_hsa_insns_for_call_of_internal_fn (stmt, hbb); - break; - - case IFN_FMA: - gen_hsa_fma (stmt, hbb, false, false); - break; - - case IFN_FMS: - gen_hsa_fma (stmt, hbb, false, true); - break; - - case IFN_FNMA: - gen_hsa_fma (stmt, hbb, true, false); - break; - - case IFN_FNMS: - gen_hsa_fma (stmt, hbb, true, true); - break; - - default: - HSA_SORRY_ATV (gimple_location (stmt), - "support for HSA does not implement internal function: %s", - internal_fn_name (fn)); - break; - } -} - -/* Generate HSA instructions for the given call statement STMT. Instructions - will be appended to HBB. */ - -static void -gen_hsa_insns_for_call (gimple *stmt, hsa_bb *hbb) -{ - gcall *call = as_a <gcall *> (stmt); - tree lhs = gimple_call_lhs (stmt); - hsa_op_reg *dest; - - if (gimple_call_internal_p (stmt)) - { - gen_hsa_insn_for_internal_fn_call (call, hbb); - return; - } - - if (!gimple_call_builtin_p (stmt, BUILT_IN_NORMAL)) - { - tree function_decl = gimple_call_fndecl (stmt); - - if (function_decl == NULL_TREE) - { - HSA_SORRY_AT (gimple_location (stmt), - "support for HSA does not implement indirect calls"); - return; - } - - /* Prefetch pass can create type-mismatching prefetch builtin calls which - fail the gimple_call_builtin_p test above. Handle them here. */ - if (fndecl_built_in_p (function_decl, BUILT_IN_PREFETCH)) - return; - - if (hsa_callable_function_p (function_decl)) - gen_hsa_insns_for_direct_call (stmt, hbb); - else if (!gen_hsa_insns_for_known_library_call (stmt, hbb)) - HSA_SORRY_AT (gimple_location (stmt), - "HSA supports only calls of functions marked with " - "%<#pragma omp declare target%>"); - return; - } - - tree fndecl = gimple_call_fndecl (stmt); - enum built_in_function builtin = DECL_FUNCTION_CODE (fndecl); - switch (builtin) - { - case BUILT_IN_FABS: - case BUILT_IN_FABSF: - gen_hsa_unaryop_for_builtin (BRIG_OPCODE_ABS, stmt, hbb); - break; - - case BUILT_IN_CEIL: - case BUILT_IN_CEILF: - gen_hsa_unaryop_for_builtin (BRIG_OPCODE_CEIL, stmt, hbb); - break; - - case BUILT_IN_FLOOR: - case BUILT_IN_FLOORF: - gen_hsa_unaryop_for_builtin (BRIG_OPCODE_FLOOR, stmt, hbb); - break; - - case BUILT_IN_RINT: - case BUILT_IN_RINTF: - gen_hsa_unaryop_for_builtin (BRIG_OPCODE_RINT, stmt, hbb); - break; - - case BUILT_IN_SQRT: - case BUILT_IN_SQRTF: - gen_hsa_unaryop_for_builtin (BRIG_OPCODE_SQRT, stmt, hbb); - break; - - case BUILT_IN_TRUNC: - case BUILT_IN_TRUNCF: - gen_hsa_unaryop_for_builtin (BRIG_OPCODE_TRUNC, stmt, hbb); - break; - - case BUILT_IN_COS: - case BUILT_IN_SIN: - case BUILT_IN_EXP2: - case BUILT_IN_LOG2: - /* HSAIL does not provide an instruction for double argument type. */ - gen_hsa_unaryop_builtin_call (stmt, hbb); - break; - - case BUILT_IN_COSF: - gen_hsa_unaryop_or_call_for_builtin (BRIG_OPCODE_NCOS, stmt, hbb); - break; - - case BUILT_IN_EXP2F: - gen_hsa_unaryop_or_call_for_builtin (BRIG_OPCODE_NEXP2, stmt, hbb); - break; - - case BUILT_IN_LOG2F: - gen_hsa_unaryop_or_call_for_builtin (BRIG_OPCODE_NLOG2, stmt, hbb); - break; - - case BUILT_IN_SINF: - gen_hsa_unaryop_or_call_for_builtin (BRIG_OPCODE_NSIN, stmt, hbb); - break; - - case BUILT_IN_CLRSB: - case BUILT_IN_CLRSBL: - case BUILT_IN_CLRSBLL: - gen_hsa_clrsb (call, hbb); - break; - - case BUILT_IN_CLZ: - case BUILT_IN_CLZL: - case BUILT_IN_CLZLL: - gen_hsa_unaryop_for_builtin (BRIG_OPCODE_FIRSTBIT, stmt, hbb); - break; - - case BUILT_IN_CTZ: - case BUILT_IN_CTZL: - case BUILT_IN_CTZLL: - gen_hsa_unaryop_for_builtin (BRIG_OPCODE_LASTBIT, stmt, hbb); - break; - - case BUILT_IN_FFS: - case BUILT_IN_FFSL: - case BUILT_IN_FFSLL: - gen_hsa_ffs (call, hbb); - break; - - case BUILT_IN_PARITY: - case BUILT_IN_PARITYL: - case BUILT_IN_PARITYLL: - gen_hsa_parity (call, hbb); - break; - - case BUILT_IN_POPCOUNT: - case BUILT_IN_POPCOUNTL: - case BUILT_IN_POPCOUNTLL: - gen_hsa_popcount (call, hbb); - break; - - case BUILT_IN_ATOMIC_LOAD_1: - case BUILT_IN_ATOMIC_LOAD_2: - case BUILT_IN_ATOMIC_LOAD_4: - case BUILT_IN_ATOMIC_LOAD_8: - case BUILT_IN_ATOMIC_LOAD_16: - { - BrigType16_t mtype; - hsa_op_base *src; - src = get_address_from_value (gimple_call_arg (stmt, 0), hbb); - - BrigMemoryOrder memorder; - const char *mmname; - if (hsa_memorder_from_tree (gimple_call_arg (stmt, 1), &memorder, - &mmname, gimple_location (stmt))) - return; - - if (memorder == BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE) - memorder = BRIG_MEMORY_ORDER_SC_ACQUIRE; - - if (memorder != BRIG_MEMORY_ORDER_RELAXED - && memorder != BRIG_MEMORY_ORDER_SC_ACQUIRE - && memorder != BRIG_MEMORY_ORDER_NONE) - { - HSA_SORRY_ATV (gimple_location (stmt), - "support for HSA does not implement " - "memory model for atomic loads: %s", mmname); - return; - } - - if (lhs) - { - BrigType16_t t = hsa_type_for_scalar_tree_type (TREE_TYPE (lhs), - false); - mtype = mem_type_for_type (t); - mtype = hsa_bittype_for_type (mtype); - dest = hsa_cfun->reg_for_gimple_ssa (lhs); - } - else - { - mtype = BRIG_TYPE_B64; - dest = new hsa_op_reg (mtype); - } - - hsa_insn_basic *atominsn; - atominsn = new hsa_insn_atomic (2, BRIG_OPCODE_ATOMIC, BRIG_ATOMIC_LD, - mtype, memorder, dest, src); - - hbb->append_insn (atominsn); - break; - } - - case BUILT_IN_ATOMIC_EXCHANGE_1: - case BUILT_IN_ATOMIC_EXCHANGE_2: - case BUILT_IN_ATOMIC_EXCHANGE_4: - case BUILT_IN_ATOMIC_EXCHANGE_8: - case BUILT_IN_ATOMIC_EXCHANGE_16: - gen_hsa_atomic_for_builtin (true, BRIG_ATOMIC_EXCH, stmt, hbb, false); - break; - break; - - case BUILT_IN_ATOMIC_FETCH_ADD_1: - case BUILT_IN_ATOMIC_FETCH_ADD_2: - case BUILT_IN_ATOMIC_FETCH_ADD_4: - case BUILT_IN_ATOMIC_FETCH_ADD_8: - case BUILT_IN_ATOMIC_FETCH_ADD_16: - gen_hsa_atomic_for_builtin (true, BRIG_ATOMIC_ADD, stmt, hbb, false); - break; - break; - - case BUILT_IN_ATOMIC_FETCH_SUB_1: - case BUILT_IN_ATOMIC_FETCH_SUB_2: - case BUILT_IN_ATOMIC_FETCH_SUB_4: - case BUILT_IN_ATOMIC_FETCH_SUB_8: - case BUILT_IN_ATOMIC_FETCH_SUB_16: - gen_hsa_atomic_for_builtin (true, BRIG_ATOMIC_SUB, stmt, hbb, false); - break; - break; - - case BUILT_IN_ATOMIC_FETCH_AND_1: - case BUILT_IN_ATOMIC_FETCH_AND_2: - case BUILT_IN_ATOMIC_FETCH_AND_4: - case BUILT_IN_ATOMIC_FETCH_AND_8: - case BUILT_IN_ATOMIC_FETCH_AND_16: - gen_hsa_atomic_for_builtin (true, BRIG_ATOMIC_AND, stmt, hbb, false); - break; - break; - - case BUILT_IN_ATOMIC_FETCH_XOR_1: - case BUILT_IN_ATOMIC_FETCH_XOR_2: - case BUILT_IN_ATOMIC_FETCH_XOR_4: - case BUILT_IN_ATOMIC_FETCH_XOR_8: - case BUILT_IN_ATOMIC_FETCH_XOR_16: - gen_hsa_atomic_for_builtin (true, BRIG_ATOMIC_XOR, stmt, hbb, false); - break; - break; - - case BUILT_IN_ATOMIC_FETCH_OR_1: - case BUILT_IN_ATOMIC_FETCH_OR_2: - case BUILT_IN_ATOMIC_FETCH_OR_4: - case BUILT_IN_ATOMIC_FETCH_OR_8: - case BUILT_IN_ATOMIC_FETCH_OR_16: - gen_hsa_atomic_for_builtin (true, BRIG_ATOMIC_OR, stmt, hbb, false); - break; - break; - - case BUILT_IN_ATOMIC_STORE_1: - case BUILT_IN_ATOMIC_STORE_2: - case BUILT_IN_ATOMIC_STORE_4: - case BUILT_IN_ATOMIC_STORE_8: - case BUILT_IN_ATOMIC_STORE_16: - /* Since there cannot be any LHS, the first parameter is meaningless. */ - gen_hsa_atomic_for_builtin (true, BRIG_ATOMIC_ST, stmt, hbb, false); - break; - break; - - case BUILT_IN_ATOMIC_ADD_FETCH_1: - case BUILT_IN_ATOMIC_ADD_FETCH_2: - case BUILT_IN_ATOMIC_ADD_FETCH_4: - case BUILT_IN_ATOMIC_ADD_FETCH_8: - case BUILT_IN_ATOMIC_ADD_FETCH_16: - gen_hsa_atomic_for_builtin (false, BRIG_ATOMIC_ADD, stmt, hbb, false); - break; - - case BUILT_IN_ATOMIC_SUB_FETCH_1: - case BUILT_IN_ATOMIC_SUB_FETCH_2: - case BUILT_IN_ATOMIC_SUB_FETCH_4: - case BUILT_IN_ATOMIC_SUB_FETCH_8: - case BUILT_IN_ATOMIC_SUB_FETCH_16: - gen_hsa_atomic_for_builtin (false, BRIG_ATOMIC_SUB, stmt, hbb, false); - break; - - case BUILT_IN_ATOMIC_AND_FETCH_1: - case BUILT_IN_ATOMIC_AND_FETCH_2: - case BUILT_IN_ATOMIC_AND_FETCH_4: - case BUILT_IN_ATOMIC_AND_FETCH_8: - case BUILT_IN_ATOMIC_AND_FETCH_16: - gen_hsa_atomic_for_builtin (false, BRIG_ATOMIC_AND, stmt, hbb, false); - break; - - case BUILT_IN_ATOMIC_XOR_FETCH_1: - case BUILT_IN_ATOMIC_XOR_FETCH_2: - case BUILT_IN_ATOMIC_XOR_FETCH_4: - case BUILT_IN_ATOMIC_XOR_FETCH_8: - case BUILT_IN_ATOMIC_XOR_FETCH_16: - gen_hsa_atomic_for_builtin (false, BRIG_ATOMIC_XOR, stmt, hbb, false); - break; - - case BUILT_IN_ATOMIC_OR_FETCH_1: - case BUILT_IN_ATOMIC_OR_FETCH_2: - case BUILT_IN_ATOMIC_OR_FETCH_4: - case BUILT_IN_ATOMIC_OR_FETCH_8: - case BUILT_IN_ATOMIC_OR_FETCH_16: - gen_hsa_atomic_for_builtin (false, BRIG_ATOMIC_OR, stmt, hbb, false); - break; - - case BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_1: - case BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_2: - case BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_4: - case BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_8: - case BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_16: - { - tree type = TREE_TYPE (gimple_call_arg (stmt, 1)); - BrigType16_t atype - = hsa_bittype_for_type (hsa_type_for_scalar_tree_type (type, false)); - BrigMemoryOrder memorder = BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE; - hsa_insn_basic *atominsn; - hsa_op_base *tgt; - atominsn = new hsa_insn_atomic (4, BRIG_OPCODE_ATOMIC, - BRIG_ATOMIC_CAS, atype, memorder); - tgt = get_address_from_value (gimple_call_arg (stmt, 0), hbb); - - if (lhs != NULL) - dest = hsa_cfun->reg_for_gimple_ssa (lhs); - else - dest = new hsa_op_reg (atype); - - atominsn->set_op (0, dest); - atominsn->set_op (1, tgt); - - hsa_op_with_type *op - = hsa_reg_or_immed_for_gimple_op (gimple_call_arg (stmt, 1), hbb); - atominsn->set_op (2, op); - op = hsa_reg_or_immed_for_gimple_op (gimple_call_arg (stmt, 2), hbb); - atominsn->set_op (3, op); - - hbb->append_insn (atominsn); - break; - } - - case BUILT_IN_HSA_WORKGROUPID: - query_hsa_grid_dim (stmt, BRIG_OPCODE_WORKGROUPID, hbb); - break; - case BUILT_IN_HSA_WORKITEMID: - query_hsa_grid_dim (stmt, BRIG_OPCODE_WORKITEMID, hbb); - break; - case BUILT_IN_HSA_WORKITEMABSID: - query_hsa_grid_dim (stmt, BRIG_OPCODE_WORKITEMABSID, hbb); - break; - case BUILT_IN_HSA_GRIDSIZE: - query_hsa_grid_dim (stmt, BRIG_OPCODE_GRIDSIZE, hbb); - break; - case BUILT_IN_HSA_CURRENTWORKGROUPSIZE: - query_hsa_grid_dim (stmt, BRIG_OPCODE_CURRENTWORKGROUPSIZE, hbb); - break; - - case BUILT_IN_GOMP_BARRIER: - hbb->append_insn (new hsa_insn_br (0, BRIG_OPCODE_BARRIER, BRIG_TYPE_NONE, - BRIG_WIDTH_ALL)); - break; - case BUILT_IN_GOMP_PARALLEL: - HSA_SORRY_AT (gimple_location (stmt), - "support for HSA does not implement non-gridified " - "OpenMP parallel constructs"); - break; - - case BUILT_IN_OMP_GET_THREAD_NUM: - { - query_hsa_grid_nodim (stmt, BRIG_OPCODE_WORKITEMFLATABSID, hbb); - break; - } - - case BUILT_IN_OMP_GET_NUM_THREADS: - { - gen_get_num_threads (stmt, hbb); - break; - } - case BUILT_IN_GOMP_TEAMS: - { - gen_set_num_threads (gimple_call_arg (stmt, 1), hbb); - break; - } - case BUILT_IN_OMP_GET_NUM_TEAMS: - { - gen_get_num_teams (stmt, hbb); - break; - } - case BUILT_IN_OMP_GET_TEAM_NUM: - { - gen_get_team_num (stmt, hbb); - break; - } - case BUILT_IN_MEMCPY: - case BUILT_IN_MEMPCPY: - { - expand_memory_copy (stmt, hbb, builtin); - break; - } - case BUILT_IN_MEMSET: - { - tree c = gimple_call_arg (stmt, 1); - - if (TREE_CODE (c) != INTEGER_CST) - { - gen_hsa_insns_for_direct_call (stmt, hbb); - return; - } - - tree byte_size = gimple_call_arg (stmt, 2); - - if (!tree_fits_uhwi_p (byte_size)) - { - gen_hsa_insns_for_direct_call (stmt, hbb); - return; - } - - unsigned HOST_WIDE_INT n = tree_to_uhwi (byte_size); - - if (n > HSA_MEMORY_BUILTINS_LIMIT) - { - gen_hsa_insns_for_direct_call (stmt, hbb); - return; - } - - unsigned HOST_WIDE_INT constant - = tree_to_uhwi (fold_convert (unsigned_char_type_node, c)); - - expand_memory_set (stmt, n, constant, hbb, builtin); - - break; - } - case BUILT_IN_BZERO: - { - tree byte_size = gimple_call_arg (stmt, 1); - - if (!tree_fits_uhwi_p (byte_size)) - { - gen_hsa_insns_for_direct_call (stmt, hbb); - return; - } - - unsigned HOST_WIDE_INT n = tree_to_uhwi (byte_size); - - if (n > HSA_MEMORY_BUILTINS_LIMIT) - { - gen_hsa_insns_for_direct_call (stmt, hbb); - return; - } - - expand_memory_set (stmt, n, 0, hbb, builtin); - - break; - } - CASE_BUILT_IN_ALLOCA: - { - gen_hsa_alloca (call, hbb); - break; - } - case BUILT_IN_PREFETCH: - break; - default: - { - tree name_tree = DECL_NAME (fndecl); - const char *s = IDENTIFIER_POINTER (name_tree); - size_t len = strlen (s); - if (len > 4 && (strncmp (s, "__builtin_GOMP_", 15) == 0)) - HSA_SORRY_ATV (gimple_location (stmt), - "support for HSA does not implement GOMP function %s", - s); - else - gen_hsa_insns_for_direct_call (stmt, hbb); - return; - } - } -} - -/* Generate HSA instructions for a given gimple statement. Instructions will be - appended to HBB. */ - -static void -gen_hsa_insns_for_gimple_stmt (gimple *stmt, hsa_bb *hbb) -{ - switch (gimple_code (stmt)) - { - case GIMPLE_ASSIGN: - if (gimple_clobber_p (stmt)) - break; - - if (gimple_assign_single_p (stmt)) - { - tree lhs = gimple_assign_lhs (stmt); - tree rhs = gimple_assign_rhs1 (stmt); - gen_hsa_insns_for_single_assignment (lhs, rhs, hbb); - } - else - gen_hsa_insns_for_operation_assignment (stmt, hbb); - break; - case GIMPLE_RETURN: - gen_hsa_insns_for_return (as_a <greturn *> (stmt), hbb); - break; - case GIMPLE_COND: - gen_hsa_insns_for_cond_stmt (stmt, hbb); - break; - case GIMPLE_CALL: - gen_hsa_insns_for_call (stmt, hbb); - break; - case GIMPLE_DEBUG: - /* ??? HSA supports some debug facilities. */ - break; - case GIMPLE_LABEL: - { - tree label = gimple_label_label (as_a <glabel *> (stmt)); - if (FORCED_LABEL (label)) - HSA_SORRY_AT (gimple_location (stmt), - "support for HSA does not implement gimple label with " - "address taken"); - - break; - } - case GIMPLE_NOP: - { - hbb->append_insn (new hsa_insn_basic (0, BRIG_OPCODE_NOP)); - break; - } - case GIMPLE_SWITCH: - { - gen_hsa_insns_for_switch_stmt (as_a <gswitch *> (stmt), hbb); - break; - } - default: - HSA_SORRY_ATV (gimple_location (stmt), - "support for HSA does not implement gimple statement %s", - gimple_code_name[(int) gimple_code (stmt)]); - } -} - -/* Generate a HSA PHI from a gimple PHI. */ - -static void -gen_hsa_phi_from_gimple_phi (gimple *phi_stmt, hsa_bb *hbb) -{ - hsa_insn_phi *hphi; - unsigned count = gimple_phi_num_args (phi_stmt); - - hsa_op_reg *dest - = hsa_cfun->reg_for_gimple_ssa (gimple_phi_result (phi_stmt)); - hphi = new hsa_insn_phi (count, dest); - hphi->m_bb = hbb->m_bb; - - auto_vec <tree, 8> aexprs; - auto_vec <hsa_op_reg *, 8> aregs; - - /* Calling split_edge when processing a PHI node messes up with the order of - gimple phi node arguments (it moves the one associated with the edge to - the end). We need to keep the order of edges and arguments of HSA phi - node arguments consistent, so we do all required splitting as the first - step, and in reverse order as to not be affected by the re-orderings. */ - for (unsigned j = count; j != 0; j--) - { - unsigned i = j - 1; - tree op = gimple_phi_arg_def (phi_stmt, i); - if (TREE_CODE (op) != ADDR_EXPR) - continue; - - edge e = gimple_phi_arg_edge (as_a <gphi *> (phi_stmt), i); - hsa_bb *hbb_src = hsa_init_new_bb (split_edge (e)); - hsa_op_address *addr = gen_hsa_addr (TREE_OPERAND (op, 0), - hbb_src); - - hsa_op_reg *dest - = new hsa_op_reg (hsa_get_segment_addr_type (BRIG_SEGMENT_FLAT)); - hsa_insn_basic *insn - = new hsa_insn_basic (2, BRIG_OPCODE_LDA, BRIG_TYPE_U64, - dest, addr); - hbb_src->append_insn (insn); - aexprs.safe_push (op); - aregs.safe_push (dest); - } - - tree lhs = gimple_phi_result (phi_stmt); - for (unsigned i = 0; i < count; i++) - { - tree op = gimple_phi_arg_def (phi_stmt, i); - - if (TREE_CODE (op) == SSA_NAME) - { - hsa_op_reg *hreg = hsa_cfun->reg_for_gimple_ssa (op); - hphi->set_op (i, hreg); - } - else - { - gcc_assert (is_gimple_min_invariant (op)); - tree t = TREE_TYPE (op); - if (!POINTER_TYPE_P (t) - || (TREE_CODE (op) == STRING_CST - && TREE_CODE (TREE_TYPE (t)) == INTEGER_TYPE)) - hphi->set_op (i, new hsa_op_immed (op)); - else if (POINTER_TYPE_P (TREE_TYPE (lhs)) - && TREE_CODE (op) == INTEGER_CST) - { - /* Handle assignment of NULL value to a pointer type. */ - hphi->set_op (i, new hsa_op_immed (op)); - } - else if (TREE_CODE (op) == ADDR_EXPR) - { - hsa_op_reg *dest = NULL; - for (unsigned a_idx = 0; a_idx < aexprs.length (); a_idx++) - if (aexprs[a_idx] == op) - { - dest = aregs[a_idx]; - break; - } - gcc_assert (dest); - hphi->set_op (i, dest); - } - else - { - HSA_SORRY_AT (gimple_location (phi_stmt), - "support for HSA does not handle PHI nodes with " - "constant address operands"); - return; - } - } - } - - hbb->append_phi (hphi); -} - -/* Constructor of class containing HSA-specific information about a basic - block. CFG_BB is the CFG BB this HSA BB is associated with. IDX is the new - index of this BB (so that the constructor does not attempt to use - hsa_cfun during its construction). */ - -hsa_bb::hsa_bb (basic_block cfg_bb, int idx) - : m_bb (cfg_bb), m_first_insn (NULL), m_last_insn (NULL), m_first_phi (NULL), - m_last_phi (NULL), m_index (idx) -{ - gcc_assert (!cfg_bb->aux); - cfg_bb->aux = this; -} - -/* Constructor of class containing HSA-specific information about a basic - block. CFG_BB is the CFG BB this HSA BB is associated with. */ - -hsa_bb::hsa_bb (basic_block cfg_bb) - : m_bb (cfg_bb), m_first_insn (NULL), m_last_insn (NULL), m_first_phi (NULL), - m_last_phi (NULL), m_index (hsa_cfun->m_hbb_count++) -{ - gcc_assert (!cfg_bb->aux); - cfg_bb->aux = this; -} - -/* Create and initialize and return a new hsa_bb structure for a given CFG - basic block BB. */ - -hsa_bb * -hsa_init_new_bb (basic_block bb) -{ - void *m = obstack_alloc (&hsa_obstack, sizeof (hsa_bb)); - return new (m) hsa_bb (bb); -} - -/* Initialize OMP in an HSA basic block PROLOGUE. */ - -static void -init_prologue (void) -{ - if (!hsa_cfun->m_kern_p) - return; - - hsa_bb *prologue = hsa_bb_for_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun)); - - /* Create a magic number that is going to be printed by libgomp. */ - unsigned index = hsa_get_number_decl_kernel_mappings (); - - /* Emit store to debug argument. */ - if (param_hsa_gen_debug_stores > 0) - set_debug_value (prologue, new hsa_op_immed (1000 + index, BRIG_TYPE_U64)); -} - -/* Initialize hsa_num_threads to a default value. */ - -static void -init_hsa_num_threads (void) -{ - hsa_bb *prologue = hsa_bb_for_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun)); - - /* Save the default value to private variable hsa_num_threads. */ - hsa_insn_basic *basic - = new hsa_insn_mem (BRIG_OPCODE_ST, hsa_num_threads->m_type, - new hsa_op_immed (0, hsa_num_threads->m_type), - new hsa_op_address (hsa_num_threads)); - prologue->append_insn (basic); -} - -/* Go over gimple representation and generate our internal HSA one. */ - -static void -gen_body_from_gimple () -{ - basic_block bb; - - /* Verify CFG for complex edges we are unable to handle. */ - edge_iterator ei; - edge e; - - FOR_EACH_BB_FN (bb, cfun) - { - FOR_EACH_EDGE (e, ei, bb->succs) - { - /* Verify all unsupported flags for edges that point - to the same basic block. */ - if (e->flags & EDGE_EH) - { - HSA_SORRY_AT (UNKNOWN_LOCATION, - "support for HSA does not implement exception " - "handling"); - return; - } - } - } - - FOR_EACH_BB_FN (bb, cfun) - { - gimple_stmt_iterator gsi; - hsa_bb *hbb = hsa_bb_for_bb (bb); - if (hbb) - continue; - - hbb = hsa_init_new_bb (bb); - - for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) - { - gen_hsa_insns_for_gimple_stmt (gsi_stmt (gsi), hbb); - if (hsa_seen_error ()) - return; - } - } - - FOR_EACH_BB_FN (bb, cfun) - { - gimple_stmt_iterator gsi; - hsa_bb *hbb = hsa_bb_for_bb (bb); - gcc_assert (hbb != NULL); - - for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi)) - if (!virtual_operand_p (gimple_phi_result (gsi_stmt (gsi)))) - gen_hsa_phi_from_gimple_phi (gsi_stmt (gsi), hbb); - } - - if (dump_file && (dump_flags & TDF_DETAILS)) - { - fprintf (dump_file, "------- Generated SSA form -------\n"); - dump_hsa_cfun (dump_file); - } -} - -static void -gen_function_decl_parameters (hsa_function_representation *f, - tree decl) -{ - tree parm; - unsigned i; - - for (parm = TYPE_ARG_TYPES (TREE_TYPE (decl)), i = 0; - parm; - parm = TREE_CHAIN (parm), i++) - { - /* Result type if last in the tree list. */ - if (TREE_CHAIN (parm) == NULL) - break; - - tree v = TREE_VALUE (parm); - - hsa_symbol *arg = new hsa_symbol (BRIG_TYPE_NONE, BRIG_SEGMENT_ARG, - BRIG_LINKAGE_NONE); - arg->m_type = hsa_type_for_tree_type (v, &arg->m_dim); - arg->m_name_number = i; - - f->m_input_args.safe_push (arg); - } - - tree result_type = TREE_TYPE (TREE_TYPE (decl)); - if (!VOID_TYPE_P (result_type)) - { - f->m_output_arg = new hsa_symbol (BRIG_TYPE_NONE, BRIG_SEGMENT_ARG, - BRIG_LINKAGE_NONE); - f->m_output_arg->m_type - = hsa_type_for_tree_type (result_type, &f->m_output_arg->m_dim); - f->m_output_arg->m_name = "res"; - } -} - -/* Generate the vector of parameters of the HSA representation of the current - function. This also includes the output parameter representing the - result. */ - -static void -gen_function_def_parameters () -{ - tree parm; - - hsa_bb *prologue = hsa_bb_for_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun)); - - for (parm = DECL_ARGUMENTS (cfun->decl); parm; - parm = DECL_CHAIN (parm)) - { - class hsa_symbol **slot; - - hsa_symbol *arg - = new hsa_symbol (BRIG_TYPE_NONE, hsa_cfun->m_kern_p - ? BRIG_SEGMENT_KERNARG : BRIG_SEGMENT_ARG, - BRIG_LINKAGE_FUNCTION); - arg->fillup_for_decl (parm); - - hsa_cfun->m_input_args.safe_push (arg); - - if (hsa_seen_error ()) - return; - - arg->m_name = hsa_get_declaration_name (parm); - - /* Copy all input arguments and create corresponding private symbols - for them. */ - hsa_symbol *private_arg; - hsa_op_address *parm_addr = new hsa_op_address (arg); - - if (TREE_ADDRESSABLE (parm) - || (!is_gimple_reg (parm) && !TREE_READONLY (parm))) - { - private_arg = hsa_cfun->create_hsa_temporary (arg->m_type); - private_arg->fillup_for_decl (parm); - - BrigAlignment8_t align = MIN (arg->m_align, private_arg->m_align); - - hsa_op_address *private_arg_addr = new hsa_op_address (private_arg); - gen_hsa_memory_copy (prologue, private_arg_addr, parm_addr, - arg->total_byte_size (), align); - } - else - private_arg = arg; - - slot = hsa_cfun->m_local_symbols->find_slot (private_arg, INSERT); - gcc_assert (!*slot); - *slot = private_arg; - - if (is_gimple_reg (parm)) - { - tree ddef = ssa_default_def (cfun, parm); - if (ddef && !has_zero_uses (ddef)) - { - BrigType16_t t = hsa_type_for_scalar_tree_type (TREE_TYPE (ddef), - false); - BrigType16_t mtype = mem_type_for_type (t); - hsa_op_reg *dest = hsa_cfun->reg_for_gimple_ssa (ddef); - hsa_insn_mem *mem = new hsa_insn_mem (BRIG_OPCODE_LD, mtype, - dest, parm_addr); - gcc_assert (!parm_addr->m_reg); - prologue->append_insn (mem); - } - } - } - - if (!VOID_TYPE_P (TREE_TYPE (TREE_TYPE (cfun->decl)))) - { - class hsa_symbol **slot; - - hsa_cfun->m_output_arg = new hsa_symbol (BRIG_TYPE_NONE, BRIG_SEGMENT_ARG, - BRIG_LINKAGE_FUNCTION); - hsa_cfun->m_output_arg->fillup_for_decl (DECL_RESULT (cfun->decl)); - - if (hsa_seen_error ()) - return; - - hsa_cfun->m_output_arg->m_name = "res"; - slot = hsa_cfun->m_local_symbols->find_slot (hsa_cfun->m_output_arg, - INSERT); - gcc_assert (!*slot); - *slot = hsa_cfun->m_output_arg; - } -} - -/* Generate function representation that corresponds to - a function declaration. */ - -hsa_function_representation * -hsa_generate_function_declaration (tree decl) -{ - hsa_function_representation *fun - = new hsa_function_representation (decl, false, 0); - - fun->m_declaration_p = true; - fun->m_name = get_brig_function_name (decl); - gen_function_decl_parameters (fun, decl); - - return fun; -} - - -/* Generate function representation that corresponds to - an internal FN. */ - -hsa_function_representation * -hsa_generate_internal_fn_decl (hsa_internal_fn *fn) -{ - hsa_function_representation *fun = new hsa_function_representation (fn); - - fun->m_name = fn->name (); - - for (unsigned i = 0; i < fn->get_arity (); i++) - { - hsa_symbol *arg - = new hsa_symbol (fn->get_argument_type (i), BRIG_SEGMENT_ARG, - BRIG_LINKAGE_NONE); - arg->m_name_number = i; - fun->m_input_args.safe_push (arg); - } - - fun->m_output_arg = new hsa_symbol (fn->get_argument_type (-1), - BRIG_SEGMENT_ARG, BRIG_LINKAGE_NONE); - fun->m_output_arg->m_name = "res"; - - return fun; -} - -/* Return true if switch statement S can be transformed - to a SBR instruction in HSAIL. */ - -static bool -transformable_switch_to_sbr_p (gswitch *s) -{ - /* Identify if a switch statement can be transformed to - SBR instruction, like: - - sbr_u32 $s1 [@label1, @label2, @label3]; - */ - - tree size = get_switch_size (s); - if (!tree_fits_uhwi_p (size)) - return false; - - if (tree_to_uhwi (size) > HSA_MAXIMUM_SBR_LABELS) - return false; - - return true; -} - -/* Structure hold connection between PHI nodes and immediate - values hold by there nodes. */ - -class phi_definition -{ -public: - phi_definition (unsigned phi_i, unsigned label_i, tree imm): - phi_index (phi_i), label_index (label_i), phi_value (imm) - {} - - unsigned phi_index; - unsigned label_index; - tree phi_value; -}; - -/* Sum slice of a vector V, starting from index START and ending - at the index END - 1. */ - -template <typename T> -static -T sum_slice (const auto_vec <T> &v, unsigned start, unsigned end, - T zero) -{ - T s = zero; - - for (unsigned i = start; i < end; i++) - s += v[i]; - - return s; -} - -/* Function transforms GIMPLE SWITCH statements to a series of IF statements. - Let's assume following example: - -L0: - switch (index) - case C1: -L1: hard_work_1 (); - break; - case C2..C3: -L2: hard_work_2 (); - break; - default: -LD: hard_work_3 (); - break; - - The transformation encompasses following steps: - 1) all immediate values used by edges coming from the switch basic block - are saved - 2) all these edges are removed - 3) the switch statement (in L0) is replaced by: - if (index == C1) - goto L1; - else - goto L1'; - - 4) newly created basic block Lx' is used for generation of - a next condition - 5) else branch of the last condition goes to LD - 6) fix all immediate values in PHI nodes that were propagated though - edges that were removed in step 2 - - Note: if a case is made by a range C1..C2, then process - following transformation: - - switch_cond_op1 = C1 <= index; - switch_cond_op2 = index <= C2; - switch_cond_and = switch_cond_op1 & switch_cond_op2; - if (switch_cond_and != 0) - goto Lx; - else - goto Ly; - -*/ - -static bool -convert_switch_statements (void) -{ - basic_block bb; - - bool modified_cfg = false; - - FOR_EACH_BB_FN (bb, cfun) - { - gimple_stmt_iterator gsi = gsi_last_bb (bb); - if (gsi_end_p (gsi)) - continue; - - gimple *stmt = gsi_stmt (gsi); - - if (gimple_code (stmt) == GIMPLE_SWITCH) - { - gswitch *s = as_a <gswitch *> (stmt); - - /* If the switch can utilize SBR insn, skip the statement. */ - if (transformable_switch_to_sbr_p (s)) - continue; - - modified_cfg = true; - - unsigned labels = gimple_switch_num_labels (s); - tree index = gimple_switch_index (s); - tree index_type = TREE_TYPE (index); - tree default_label = gimple_switch_default_label (s); - basic_block default_label_bb - = label_to_block (cfun, CASE_LABEL (default_label)); - basic_block cur_bb = bb; - - auto_vec <edge> new_edges; - auto_vec <phi_definition *> phi_todo_list; - auto_vec <profile_count> edge_counts; - auto_vec <profile_probability> edge_probabilities; - - /* Investigate all labels that and PHI nodes in these edges which - should be fixed after we add new collection of edges. */ - for (unsigned i = 0; i < labels; i++) - { - basic_block label_bb = gimple_switch_label_bb (cfun, s, i); - edge e = find_edge (bb, label_bb); - edge_counts.safe_push (e->count ()); - edge_probabilities.safe_push (e->probability); - gphi_iterator phi_gsi; - - /* Save PHI definitions that will be destroyed because of an edge - is going to be removed. */ - unsigned phi_index = 0; - for (phi_gsi = gsi_start_phis (e->dest); - !gsi_end_p (phi_gsi); gsi_next (&phi_gsi)) - { - gphi *phi = phi_gsi.phi (); - for (unsigned j = 0; j < gimple_phi_num_args (phi); j++) - { - if (gimple_phi_arg_edge (phi, j) == e) - { - tree imm = gimple_phi_arg_def (phi, j); - phi_definition *p = new phi_definition (phi_index, i, - imm); - phi_todo_list.safe_push (p); - break; - } - } - phi_index++; - } - } - - /* Remove all edges for the current basic block. */ - for (int i = EDGE_COUNT (bb->succs) - 1; i >= 0; i--) - { - edge e = EDGE_SUCC (bb, i); - remove_edge (e); - } - - /* Iterate all non-default labels. */ - for (unsigned i = 1; i < labels; i++) - { - tree label = gimple_switch_label (s, i); - tree low = CASE_LOW (label); - tree high = CASE_HIGH (label); - - if (!useless_type_conversion_p (TREE_TYPE (low), index_type)) - low = fold_convert (index_type, low); - - gimple_stmt_iterator cond_gsi = gsi_last_bb (cur_bb); - gimple *c = NULL; - if (high) - { - tree tmp1 = make_temp_ssa_name (boolean_type_node, NULL, - "switch_cond_op1"); - - gimple *assign1 = gimple_build_assign (tmp1, LE_EXPR, low, - index); - - tree tmp2 = make_temp_ssa_name (boolean_type_node, NULL, - "switch_cond_op2"); - - if (!useless_type_conversion_p (TREE_TYPE (high), index_type)) - high = fold_convert (index_type, high); - gimple *assign2 = gimple_build_assign (tmp2, LE_EXPR, index, - high); - - tree tmp3 = make_temp_ssa_name (boolean_type_node, NULL, - "switch_cond_and"); - gimple *assign3 = gimple_build_assign (tmp3, BIT_AND_EXPR, tmp1, - tmp2); - - gsi_insert_before (&cond_gsi, assign1, GSI_SAME_STMT); - gsi_insert_before (&cond_gsi, assign2, GSI_SAME_STMT); - gsi_insert_before (&cond_gsi, assign3, GSI_SAME_STMT); - - tree b = constant_boolean_node (false, boolean_type_node); - c = gimple_build_cond (NE_EXPR, tmp3, b, NULL, NULL); - } - else - c = gimple_build_cond (EQ_EXPR, index, low, NULL, NULL); - - gimple_set_location (c, gimple_location (stmt)); - - gsi_insert_before (&cond_gsi, c, GSI_SAME_STMT); - - basic_block label_bb = label_to_block (cfun, CASE_LABEL (label)); - edge new_edge = make_edge (cur_bb, label_bb, EDGE_TRUE_VALUE); - profile_probability prob_sum = sum_slice <profile_probability> - (edge_probabilities, i, labels, profile_probability::never ()) - + edge_probabilities[0]; - - if (prob_sum.initialized_p ()) - new_edge->probability = edge_probabilities[i] / prob_sum; - - new_edges.safe_push (new_edge); - - if (i < labels - 1) - { - /* Prepare another basic block that will contain - next condition. */ - basic_block next_bb = create_empty_bb (cur_bb); - if (current_loops) - { - add_bb_to_loop (next_bb, cur_bb->loop_father); - loops_state_set (LOOPS_NEED_FIXUP); - } - - edge next_edge = make_edge (cur_bb, next_bb, EDGE_FALSE_VALUE); - next_edge->probability = new_edge->probability.invert (); - next_bb->count = next_edge->count (); - cur_bb = next_bb; - } - else /* Link last IF statement and default label - of the switch. */ - { - edge e = make_edge (cur_bb, default_label_bb, EDGE_FALSE_VALUE); - e->probability = new_edge->probability.invert (); - new_edges.safe_insert (0, e); - } - } - - /* Restore original PHI immediate value. */ - for (unsigned i = 0; i < phi_todo_list.length (); i++) - { - phi_definition *phi_def = phi_todo_list[i]; - edge new_edge = new_edges[phi_def->label_index]; - - gphi_iterator it = gsi_start_phis (new_edge->dest); - for (unsigned i = 0; i < phi_def->phi_index; i++) - gsi_next (&it); - - gphi *phi = it.phi (); - add_phi_arg (phi, phi_def->phi_value, new_edge, UNKNOWN_LOCATION); - delete phi_def; - } - - /* Remove the original GIMPLE switch statement. */ - gsi_remove (&gsi, true); - } - } - - if (dump_file) - dump_function_to_file (current_function_decl, dump_file, TDF_DETAILS); - - return modified_cfg; -} - -/* Expand builtins that can't be handled by HSA back-end. */ - -static void -expand_builtins () -{ - basic_block bb; - - FOR_EACH_BB_FN (bb, cfun) - { - for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); - gsi_next (&gsi)) - { - gimple *stmt = gsi_stmt (gsi); - - if (gimple_code (stmt) != GIMPLE_CALL) - continue; - - gcall *call = as_a <gcall *> (stmt); - - if (!gimple_call_builtin_p (call, BUILT_IN_NORMAL)) - continue; - - tree fndecl = gimple_call_fndecl (stmt); - enum built_in_function fn = DECL_FUNCTION_CODE (fndecl); - switch (fn) - { - case BUILT_IN_CEXPF: - case BUILT_IN_CEXPIF: - case BUILT_IN_CEXPI: - { - /* Similar to builtins.c (expand_builtin_cexpi), the builtin - can be transformed to: cexp(I * z) = ccos(z) + I * csin(z). */ - tree lhs = gimple_call_lhs (stmt); - tree rhs = gimple_call_arg (stmt, 0); - tree rhs_type = TREE_TYPE (rhs); - bool float_type_p = rhs_type == float_type_node; - tree real_part = make_temp_ssa_name (rhs_type, NULL, - "cexp_real_part"); - tree imag_part = make_temp_ssa_name (rhs_type, NULL, - "cexp_imag_part"); - - tree cos_fndecl - = mathfn_built_in (rhs_type, fn == float_type_p - ? BUILT_IN_COSF : BUILT_IN_COS); - gcall *cos = gimple_build_call (cos_fndecl, 1, rhs); - gimple_call_set_lhs (cos, real_part); - gsi_insert_before (&gsi, cos, GSI_SAME_STMT); - - tree sin_fndecl - = mathfn_built_in (rhs_type, fn == float_type_p - ? BUILT_IN_SINF : BUILT_IN_SIN); - gcall *sin = gimple_build_call (sin_fndecl, 1, rhs); - gimple_call_set_lhs (sin, imag_part); - gsi_insert_before (&gsi, sin, GSI_SAME_STMT); - - - gassign *assign = gimple_build_assign (lhs, COMPLEX_EXPR, - real_part, imag_part); - gsi_insert_before (&gsi, assign, GSI_SAME_STMT); - gsi_remove (&gsi, true); - - break; - } - default: - break; - } - } - } -} - -/* Emit HSA module variables that are global for the entire module. */ - -static void -emit_hsa_module_variables (void) -{ - hsa_num_threads = new hsa_symbol (BRIG_TYPE_U32, BRIG_SEGMENT_PRIVATE, - BRIG_LINKAGE_MODULE, true); - - hsa_num_threads->m_name = "hsa_num_threads"; - - hsa_brig_emit_omp_symbols (); -} - -/* Generate HSAIL representation of the current function and write into a - special section of the output file. If KERNEL is set, the function will be - considered an HSA kernel callable from the host, otherwise it will be - compiled as an HSA function callable from other HSA code. */ - -static void -generate_hsa (bool kernel) -{ - hsa_init_data_for_cfun (); - - if (hsa_num_threads == NULL) - emit_hsa_module_variables (); - - bool modified_cfg = convert_switch_statements (); - /* Initialize hsa_cfun. */ - hsa_cfun = new hsa_function_representation (cfun->decl, kernel, - SSANAMES (cfun)->length (), - modified_cfg); - hsa_cfun->init_extra_bbs (); - - if (flag_tm) - { - HSA_SORRY_AT (UNKNOWN_LOCATION, - "support for HSA does not implement transactional memory"); - goto fail; - } - - verify_function_arguments (cfun->decl); - if (hsa_seen_error ()) - goto fail; - - hsa_cfun->m_name = get_brig_function_name (cfun->decl); - - gen_function_def_parameters (); - if (hsa_seen_error ()) - goto fail; - - init_prologue (); - - gen_body_from_gimple (); - if (hsa_seen_error ()) - goto fail; - - if (hsa_cfun->m_kernel_dispatch_count) - init_hsa_num_threads (); - - if (hsa_cfun->m_kern_p) - { - hsa_function_summary *s - = hsa_summaries->get_create (cgraph_node::get (hsa_cfun->m_decl)); - hsa_add_kern_decl_mapping (current_function_decl, hsa_cfun->m_name, - hsa_cfun->m_maximum_omp_data_size, - s->m_gridified_kernel_p); - } - - if (flag_checking) - { - for (unsigned i = 0; i < hsa_cfun->m_ssa_map.length (); i++) - if (hsa_cfun->m_ssa_map[i]) - hsa_cfun->m_ssa_map[i]->verify_ssa (); - - basic_block bb; - FOR_EACH_BB_FN (bb, cfun) - { - hsa_bb *hbb = hsa_bb_for_bb (bb); - - for (hsa_insn_basic *insn = hbb->m_first_insn; insn; - insn = insn->m_next) - insn->verify (); - } - } - - hsa_regalloc (); - hsa_brig_emit_function (); - - fail: - hsa_deinit_data_for_cfun (); -} - -namespace { - -const pass_data pass_data_gen_hsail = -{ - GIMPLE_PASS, - "hsagen", /* name */ - OPTGROUP_OMP, /* optinfo_flags */ - TV_NONE, /* tv_id */ - PROP_cfg | PROP_ssa, /* properties_required */ - 0, /* properties_provided */ - 0, /* properties_destroyed */ - 0, /* todo_flags_start */ - 0 /* todo_flags_finish */ -}; - -class pass_gen_hsail : public gimple_opt_pass -{ -public: - pass_gen_hsail (gcc::context *ctxt) - : gimple_opt_pass(pass_data_gen_hsail, ctxt) - {} - - /* opt_pass methods: */ - bool gate (function *); - unsigned int execute (function *); - -}; // class pass_gen_hsail - -/* Determine whether or not to run generation of HSAIL. */ - -bool -pass_gen_hsail::gate (function *f) -{ - return hsa_gen_requested_p () - && hsa_gpu_implementation_p (f->decl); -} - -unsigned int -pass_gen_hsail::execute (function *) -{ - cgraph_node *node = cgraph_node::get_create (current_function_decl); - hsa_function_summary *s = hsa_summaries->get_create (node); - - expand_builtins (); - generate_hsa (s->m_kind == HSA_KERNEL); - TREE_ASM_WRITTEN (current_function_decl) = 1; - return TODO_discard_function; -} - -} // anon namespace - -/* Create the instance of hsa gen pass. */ - -gimple_opt_pass * -make_pass_gen_hsail (gcc::context *ctxt) -{ - return new pass_gen_hsail (ctxt); -} diff --git a/gcc/hsa-regalloc.c b/gcc/hsa-regalloc.c deleted file mode 100644 index 7614efe..0000000 --- a/gcc/hsa-regalloc.c +++ /dev/null @@ -1,729 +0,0 @@ -/* HSAIL IL Register allocation and out-of-SSA. - Copyright (C) 2013-2020 Free Software Foundation, Inc. - Contributed by Michael Matz <matz@suse.de> - -This file is part of GCC. - -GCC is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 3, or (at your option) -any later version. - -GCC is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with GCC; see the file COPYING3. If not see -<http://www.gnu.org/licenses/>. */ - -#include "config.h" -#include "system.h" -#include "coretypes.h" -#include "tm.h" -#include "is-a.h" -#include "vec.h" -#include "tree.h" -#include "dominance.h" -#include "basic-block.h" -#include "function.h" -#include "cfganal.h" -#include "cfg.h" -#include "bitmap.h" -#include "dumpfile.h" -#include "cgraph.h" -#include "print-tree.h" -#include "cfghooks.h" -#include "alloc-pool.h" -#include "symbol-summary.h" -#include "hsa-common.h" - - -/* Process a PHI node PHI of basic block BB as a part of naive out-f-ssa. */ - -static void -naive_process_phi (hsa_insn_phi *phi, const vec<edge> &predecessors) -{ - unsigned count = phi->operand_count (); - for (unsigned i = 0; i < count; i++) - { - gcc_checking_assert (phi->get_op (i)); - hsa_op_base *op = phi->get_op (i); - hsa_bb *hbb; - edge e; - - if (!op) - break; - - e = predecessors[i]; - if (single_succ_p (e->src)) - hbb = hsa_bb_for_bb (e->src); - else - { - basic_block old_dest = e->dest; - hbb = hsa_init_new_bb (split_edge (e)); - - /* If switch insn used this edge, fix jump table. */ - hsa_bb *source = hsa_bb_for_bb (e->src); - hsa_insn_sbr *sbr; - if (source->m_last_insn - && (sbr = dyn_cast <hsa_insn_sbr *> (source->m_last_insn))) - sbr->replace_all_labels (old_dest, hbb->m_bb); - } - - hsa_build_append_simple_mov (phi->m_dest, op, hbb); - } -} - -/* Naive out-of SSA. */ - -static void -naive_outof_ssa (void) -{ - basic_block bb; - - hsa_cfun->m_in_ssa = false; - - FOR_ALL_BB_FN (bb, cfun) - { - hsa_bb *hbb = hsa_bb_for_bb (bb); - hsa_insn_phi *phi; - - /* naive_process_phi can call split_edge on an incoming edge which order if - the incoming edges to the basic block and thus make it inconsistent with - the ordering of PHI arguments, so we collect them in advance. */ - auto_vec<edge, 8> predecessors; - unsigned pred_count = EDGE_COUNT (bb->preds); - for (unsigned i = 0; i < pred_count; i++) - predecessors.safe_push (EDGE_PRED (bb, i)); - - for (phi = hbb->m_first_phi; - phi; - phi = phi->m_next ? as_a <hsa_insn_phi *> (phi->m_next) : NULL) - naive_process_phi (phi, predecessors); - - /* Zap PHI nodes, they will be deallocated when everything else will. */ - hbb->m_first_phi = NULL; - hbb->m_last_phi = NULL; - } -} - -/* Return register class number for the given HSA TYPE. 0 means the 'c' one - bit register class, 1 means 's' 32 bit class, 2 stands for 'd' 64 bit class - and 3 for 'q' 128 bit class. */ - -static int -m_reg_class_for_type (BrigType16_t type) -{ - switch (type) - { - case BRIG_TYPE_B1: - return 0; - - case BRIG_TYPE_U8: - case BRIG_TYPE_U16: - case BRIG_TYPE_U32: - case BRIG_TYPE_S8: - case BRIG_TYPE_S16: - case BRIG_TYPE_S32: - case BRIG_TYPE_F16: - case BRIG_TYPE_F32: - case BRIG_TYPE_B8: - case BRIG_TYPE_B16: - case BRIG_TYPE_B32: - case BRIG_TYPE_U8X4: - case BRIG_TYPE_S8X4: - case BRIG_TYPE_U16X2: - case BRIG_TYPE_S16X2: - case BRIG_TYPE_F16X2: - return 1; - - case BRIG_TYPE_U64: - case BRIG_TYPE_S64: - case BRIG_TYPE_F64: - case BRIG_TYPE_B64: - case BRIG_TYPE_U8X8: - case BRIG_TYPE_S8X8: - case BRIG_TYPE_U16X4: - case BRIG_TYPE_S16X4: - case BRIG_TYPE_F16X4: - case BRIG_TYPE_U32X2: - case BRIG_TYPE_S32X2: - case BRIG_TYPE_F32X2: - return 2; - - case BRIG_TYPE_B128: - case BRIG_TYPE_U8X16: - case BRIG_TYPE_S8X16: - case BRIG_TYPE_U16X8: - case BRIG_TYPE_S16X8: - case BRIG_TYPE_F16X8: - case BRIG_TYPE_U32X4: - case BRIG_TYPE_U64X2: - case BRIG_TYPE_S32X4: - case BRIG_TYPE_S64X2: - case BRIG_TYPE_F32X4: - case BRIG_TYPE_F64X2: - return 3; - - default: - gcc_unreachable (); - } -} - -/* If the Ith operands of INSN is or contains a register (in an address), - return the address of that register operand. If not return NULL. */ - -static hsa_op_reg ** -insn_reg_addr (hsa_insn_basic *insn, int i) -{ - hsa_op_base *op = insn->get_op (i); - if (!op) - return NULL; - hsa_op_reg *reg = dyn_cast <hsa_op_reg *> (op); - if (reg) - return (hsa_op_reg **) insn->get_op_addr (i); - hsa_op_address *addr = dyn_cast <hsa_op_address *> (op); - if (addr && addr->m_reg) - return &addr->m_reg; - return NULL; -} - -struct m_reg_class_desc -{ - unsigned next_avail, max_num; - unsigned used_num, max_used; - uint64_t used[2]; - char cl_char; -}; - -/* Rewrite the instructions in BB to observe spilled live ranges. - CLASSES is the global register class state. */ - -static void -rewrite_code_bb (basic_block bb, struct m_reg_class_desc *classes) -{ - hsa_bb *hbb = hsa_bb_for_bb (bb); - hsa_insn_basic *insn, *next_insn; - - for (insn = hbb->m_first_insn; insn; insn = next_insn) - { - next_insn = insn->m_next; - unsigned count = insn->operand_count (); - for (unsigned i = 0; i < count; i++) - { - gcc_checking_assert (insn->get_op (i)); - hsa_op_reg **regaddr = insn_reg_addr (insn, i); - - if (regaddr) - { - hsa_op_reg *reg = *regaddr; - if (reg->m_reg_class) - continue; - gcc_assert (reg->m_spill_sym); - - int cl = m_reg_class_for_type (reg->m_type); - hsa_op_reg *tmp, *tmp2; - if (insn->op_output_p (i)) - tmp = hsa_spill_out (insn, reg, &tmp2); - else - tmp = hsa_spill_in (insn, reg, &tmp2); - - *regaddr = tmp; - - tmp->m_reg_class = classes[cl].cl_char; - tmp->m_hard_num = (char) (classes[cl].max_num + i); - if (tmp2) - { - gcc_assert (cl == 0); - tmp2->m_reg_class = classes[1].cl_char; - tmp2->m_hard_num = (char) (classes[1].max_num + i); - } - } - } - } -} - -/* Dump current function to dump file F, with info specific - to register allocation. */ - -void -dump_hsa_cfun_regalloc (FILE *f) -{ - basic_block bb; - - fprintf (f, "\nHSAIL IL for %s\n", hsa_cfun->m_name); - - FOR_ALL_BB_FN (bb, cfun) - { - hsa_bb *hbb = (class hsa_bb *) bb->aux; - bitmap_print (dump_file, hbb->m_livein, "m_livein ", "\n"); - dump_hsa_bb (f, hbb); - bitmap_print (dump_file, hbb->m_liveout, "m_liveout ", "\n"); - } -} - -/* Given the global register allocation state CLASSES and a - register REG, try to give it a hardware register. If successful, - store that hardreg in REG and return it, otherwise return -1. - Also changes CLASSES to accommodate for the allocated register. */ - -static int -try_alloc_reg (struct m_reg_class_desc *classes, hsa_op_reg *reg) -{ - int cl = m_reg_class_for_type (reg->m_type); - int ret = -1; - if (classes[1].used_num + classes[2].used_num * 2 + classes[3].used_num * 4 - >= 128 - 5) - return -1; - if (classes[cl].used_num < classes[cl].max_num) - { - unsigned int i; - classes[cl].used_num++; - if (classes[cl].used_num > classes[cl].max_used) - classes[cl].max_used = classes[cl].used_num; - for (i = 0; i < classes[cl].used_num; i++) - if (! (classes[cl].used[i / 64] & (((uint64_t)1) << (i & 63)))) - break; - ret = i; - classes[cl].used[i / 64] |= (((uint64_t)1) << (i & 63)); - reg->m_reg_class = classes[cl].cl_char; - reg->m_hard_num = i; - } - return ret; -} - -/* Free up hardregs used by REG, into allocation state CLASSES. */ - -static void -free_reg (struct m_reg_class_desc *classes, hsa_op_reg *reg) -{ - int cl = m_reg_class_for_type (reg->m_type); - int ret = reg->m_hard_num; - gcc_assert (reg->m_reg_class == classes[cl].cl_char); - classes[cl].used_num--; - classes[cl].used[ret / 64] &= ~(((uint64_t)1) << (ret & 63)); -} - -/* Note that the live range for REG ends at least at END. */ - -static void -note_lr_end (hsa_op_reg *reg, int end) -{ - if (reg->m_lr_end < end) - reg->m_lr_end = end; -} - -/* Note that the live range for REG starts at least at BEGIN. */ - -static void -note_lr_begin (hsa_op_reg *reg, int begin) -{ - if (reg->m_lr_begin > begin) - reg->m_lr_begin = begin; -} - -/* Given two registers A and B, return -1, 0 or 1 if A's live range - starts before, at or after B's live range. */ - -static int -cmp_begin (const void *a, const void *b) -{ - const hsa_op_reg * const *rega = (const hsa_op_reg * const *)a; - const hsa_op_reg * const *regb = (const hsa_op_reg * const *)b; - int ret; - if (rega == regb) - return 0; - ret = (*rega)->m_lr_begin - (*regb)->m_lr_begin; - if (ret) - return ret; - return ((*rega)->m_order - (*regb)->m_order); -} - -/* Given two registers REGA and REGB, return true if REGA's - live range ends after REGB's. This results in a sorting order - with earlier end points at the end. */ - -static bool -cmp_end (hsa_op_reg * const ®a, hsa_op_reg * const ®b) -{ - int ret; - if (rega == regb) - return false; - ret = (regb)->m_lr_end - (rega)->m_lr_end; - if (ret) - return ret < 0; - return (((regb)->m_order - (rega)->m_order)) < 0; -} - -/* Expire all old intervals in ACTIVE (a per-regclass vector), - that is, those that end before the interval REG starts. Give - back resources freed so into the state CLASSES. */ - -static void -expire_old_intervals (hsa_op_reg *reg, vec<hsa_op_reg*> *active, - struct m_reg_class_desc *classes) -{ - for (int i = 0; i < 4; i++) - while (!active[i].is_empty ()) - { - hsa_op_reg *a = active[i].pop (); - if (a->m_lr_end > reg->m_lr_begin) - { - active[i].quick_push (a); - break; - } - free_reg (classes, a); - } -} - -/* The interval REG didn't get a hardreg. Spill it or one of those - from ACTIVE (if the latter, then REG will become allocated to the - hardreg that formerly was used by it). */ - -static void -spill_at_interval (hsa_op_reg *reg, vec<hsa_op_reg*> *active) -{ - int cl = m_reg_class_for_type (reg->m_type); - gcc_assert (!active[cl].is_empty ()); - hsa_op_reg *cand = active[cl][0]; - if (cand->m_lr_end > reg->m_lr_end) - { - reg->m_reg_class = cand->m_reg_class; - reg->m_hard_num = cand->m_hard_num; - active[cl].ordered_remove (0); - unsigned place = active[cl].lower_bound (reg, cmp_end); - active[cl].quick_insert (place, reg); - } - else - cand = reg; - - gcc_assert (!cand->m_spill_sym); - BrigType16_t type = cand->m_type; - if (type == BRIG_TYPE_B1) - type = BRIG_TYPE_U8; - cand->m_reg_class = 0; - cand->m_spill_sym = hsa_get_spill_symbol (type); - cand->m_spill_sym->m_name_number = cand->m_order; -} - -/* Given the global register state CLASSES allocate all HSA virtual - registers either to hardregs or to a spill symbol. */ - -static void -linear_scan_regalloc (struct m_reg_class_desc *classes) -{ - /* Compute liveness. */ - bool changed; - int i, n; - int insn_order; - int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun)); - bitmap work = BITMAP_ALLOC (NULL); - vec<hsa_op_reg*> ind2reg = vNULL; - vec<hsa_op_reg*> active[4] = {vNULL, vNULL, vNULL, vNULL}; - hsa_insn_basic *m_last_insn; - - /* We will need the reverse post order for linearization, - and the post order for liveness analysis, which is the same - backward. */ - n = pre_and_rev_post_order_compute (NULL, bbs, true); - ind2reg.safe_grow_cleared (hsa_cfun->m_reg_count); - - /* Give all instructions a linearized number, at the same time - build a mapping from register index to register. */ - insn_order = 1; - for (i = 0; i < n; i++) - { - basic_block bb = BASIC_BLOCK_FOR_FN (cfun, bbs[i]); - hsa_bb *hbb = hsa_bb_for_bb (bb); - hsa_insn_basic *insn; - for (insn = hbb->m_first_insn; insn; insn = insn->m_next) - { - unsigned opi; - insn->m_number = insn_order++; - for (opi = 0; opi < insn->operand_count (); opi++) - { - gcc_checking_assert (insn->get_op (opi)); - hsa_op_reg **regaddr = insn_reg_addr (insn, opi); - if (regaddr) - ind2reg[(*regaddr)->m_order] = *regaddr; - } - } - } - - /* Initialize all live ranges to [after-end, 0). */ - for (i = 0; i < hsa_cfun->m_reg_count; i++) - if (ind2reg[i]) - ind2reg[i]->m_lr_begin = insn_order, ind2reg[i]->m_lr_end = 0; - - /* Classic liveness analysis, as long as something changes: - m_liveout is union (m_livein of successors) - m_livein is m_liveout minus defs plus uses. */ - do - { - changed = false; - for (i = n - 1; i >= 0; i--) - { - edge e; - edge_iterator ei; - basic_block bb = BASIC_BLOCK_FOR_FN (cfun, bbs[i]); - hsa_bb *hbb = hsa_bb_for_bb (bb); - - /* Union of successors m_livein (or empty if none). */ - bool first = true; - FOR_EACH_EDGE (e, ei, bb->succs) - if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)) - { - hsa_bb *succ = hsa_bb_for_bb (e->dest); - if (first) - { - bitmap_copy (work, succ->m_livein); - first = false; - } - else - bitmap_ior_into (work, succ->m_livein); - } - if (first) - bitmap_clear (work); - - bitmap_copy (hbb->m_liveout, work); - - /* Remove defs, include uses in a backward insn walk. */ - hsa_insn_basic *insn; - for (insn = hbb->m_last_insn; insn; insn = insn->m_prev) - { - unsigned opi; - unsigned ndefs = insn->input_count (); - for (opi = 0; opi < ndefs && insn->get_op (opi); opi++) - { - gcc_checking_assert (insn->get_op (opi)); - hsa_op_reg **regaddr = insn_reg_addr (insn, opi); - if (regaddr) - bitmap_clear_bit (work, (*regaddr)->m_order); - } - for (; opi < insn->operand_count (); opi++) - { - gcc_checking_assert (insn->get_op (opi)); - hsa_op_reg **regaddr = insn_reg_addr (insn, opi); - if (regaddr) - bitmap_set_bit (work, (*regaddr)->m_order); - } - } - - /* Note if that changed something. */ - if (bitmap_ior_into (hbb->m_livein, work)) - changed = true; - } - } - while (changed); - - /* Make one pass through all instructions in linear order, - noting and merging possible live range start and end points. */ - m_last_insn = NULL; - for (i = n - 1; i >= 0; i--) - { - basic_block bb = BASIC_BLOCK_FOR_FN (cfun, bbs[i]); - hsa_bb *hbb = hsa_bb_for_bb (bb); - hsa_insn_basic *insn; - int after_end_number; - unsigned bit; - bitmap_iterator bi; - - if (m_last_insn) - after_end_number = m_last_insn->m_number; - else - after_end_number = insn_order; - /* Everything live-out in this BB has at least an end point - after us. */ - EXECUTE_IF_SET_IN_BITMAP (hbb->m_liveout, 0, bit, bi) - note_lr_end (ind2reg[bit], after_end_number); - - for (insn = hbb->m_last_insn; insn; insn = insn->m_prev) - { - unsigned opi; - unsigned ndefs = insn->input_count (); - for (opi = 0; opi < insn->operand_count (); opi++) - { - gcc_checking_assert (insn->get_op (opi)); - hsa_op_reg **regaddr = insn_reg_addr (insn, opi); - if (regaddr) - { - hsa_op_reg *reg = *regaddr; - if (opi < ndefs) - note_lr_begin (reg, insn->m_number); - else - note_lr_end (reg, insn->m_number); - } - } - } - - /* Everything live-in in this BB has a start point before - our first insn. */ - int before_start_number; - if (hbb->m_first_insn) - before_start_number = hbb->m_first_insn->m_number; - else - before_start_number = after_end_number; - before_start_number--; - EXECUTE_IF_SET_IN_BITMAP (hbb->m_livein, 0, bit, bi) - note_lr_begin (ind2reg[bit], before_start_number); - - if (hbb->m_first_insn) - m_last_insn = hbb->m_first_insn; - } - - for (i = 0; i < hsa_cfun->m_reg_count; i++) - if (ind2reg[i]) - { - /* All regs that have still their start at after all code actually - are defined at the start of the routine (prologue). */ - if (ind2reg[i]->m_lr_begin == insn_order) - ind2reg[i]->m_lr_begin = 0; - /* All regs that have no use but a def will have lr_end == 0, - they are actually live from def until after the insn they are - defined in. */ - if (ind2reg[i]->m_lr_end == 0) - ind2reg[i]->m_lr_end = ind2reg[i]->m_lr_begin + 1; - } - - /* Sort all intervals by increasing start point. */ - gcc_assert (ind2reg.length () == (size_t) hsa_cfun->m_reg_count); - - if (flag_checking) - for (unsigned i = 0; i < ind2reg.length (); i++) - gcc_assert (ind2reg[i]); - - ind2reg.qsort (cmp_begin); - for (i = 0; i < 4; i++) - active[i].reserve_exact (hsa_cfun->m_reg_count); - - /* Now comes the linear scan allocation. */ - for (i = 0; i < hsa_cfun->m_reg_count; i++) - { - hsa_op_reg *reg = ind2reg[i]; - if (!reg) - continue; - expire_old_intervals (reg, active, classes); - int cl = m_reg_class_for_type (reg->m_type); - if (try_alloc_reg (classes, reg) >= 0) - { - unsigned place = active[cl].lower_bound (reg, cmp_end); - active[cl].quick_insert (place, reg); - } - else - spill_at_interval (reg, active); - - /* Some interesting dumping as we go. */ - if (dump_file && (dump_flags & TDF_DETAILS)) - { - fprintf (dump_file, " reg%d: [%5d, %5d)->", - reg->m_order, reg->m_lr_begin, reg->m_lr_end); - if (reg->m_reg_class) - fprintf (dump_file, "$%c%i", reg->m_reg_class, reg->m_hard_num); - else - fprintf (dump_file, "[%%__%s_%i]", - hsa_seg_name (reg->m_spill_sym->m_segment), - reg->m_spill_sym->m_name_number); - for (int cl = 0; cl < 4; cl++) - { - bool first = true; - hsa_op_reg *r; - fprintf (dump_file, " {"); - for (int j = 0; active[cl].iterate (j, &r); j++) - if (first) - { - fprintf (dump_file, "%d", r->m_order); - first = false; - } - else - fprintf (dump_file, ", %d", r->m_order); - fprintf (dump_file, "}"); - } - fprintf (dump_file, "\n"); - } - } - - BITMAP_FREE (work); - free (bbs); - - if (dump_file && (dump_flags & TDF_DETAILS)) - { - fprintf (dump_file, "------- After liveness: -------\n"); - dump_hsa_cfun_regalloc (dump_file); - fprintf (dump_file, " ----- Intervals:\n"); - for (i = 0; i < hsa_cfun->m_reg_count; i++) - { - hsa_op_reg *reg = ind2reg[i]; - if (!reg) - continue; - fprintf (dump_file, " reg%d: [%5d, %5d)->", reg->m_order, - reg->m_lr_begin, reg->m_lr_end); - if (reg->m_reg_class) - fprintf (dump_file, "$%c%i\n", reg->m_reg_class, reg->m_hard_num); - else - fprintf (dump_file, "[%%__%s_%i]\n", - hsa_seg_name (reg->m_spill_sym->m_segment), - reg->m_spill_sym->m_name_number); - } - } - - for (i = 0; i < 4; i++) - active[i].release (); - ind2reg.release (); -} - -/* Entry point for register allocation. */ - -static void -regalloc (void) -{ - basic_block bb; - m_reg_class_desc classes[4]; - - /* If there are no registers used in the function, exit right away. */ - if (hsa_cfun->m_reg_count == 0) - return; - - memset (classes, 0, sizeof (classes)); - classes[0].next_avail = 0; - classes[0].max_num = 7; - classes[0].cl_char = 'c'; - classes[1].cl_char = 's'; - classes[2].cl_char = 'd'; - classes[3].cl_char = 'q'; - - for (int i = 1; i < 4; i++) - { - classes[i].next_avail = 0; - classes[i].max_num = 20; - } - - linear_scan_regalloc (classes); - - FOR_ALL_BB_FN (bb, cfun) - rewrite_code_bb (bb, classes); -} - -/* Out of SSA and register allocation on HSAIL IL. */ - -void -hsa_regalloc (void) -{ - hsa_cfun->update_dominance (); - naive_outof_ssa (); - - if (dump_file && (dump_flags & TDF_DETAILS)) - { - fprintf (dump_file, "------- After out-of-SSA: -------\n"); - dump_hsa_cfun (dump_file); - } - - regalloc (); - - if (dump_file && (dump_flags & TDF_DETAILS)) - { - fprintf (dump_file, "------- After register allocation: -------\n"); - dump_hsa_cfun (dump_file); - } -} diff --git a/gcc/ipa-hsa.c b/gcc/ipa-hsa.c deleted file mode 100644 index f2980ba..0000000 --- a/gcc/ipa-hsa.c +++ /dev/null @@ -1,336 +0,0 @@ -/* Callgraph based analysis of static variables. - Copyright (C) 2015-2020 Free Software Foundation, Inc. - Contributed by Martin Liska <mliska@suse.cz> - -This file is part of GCC. - -GCC is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free -Software Foundation; either version 3, or (at your option) any later -version. - -GCC is distributed in the hope that it will be useful, but WITHOUT ANY -WARRANTY; without even the implied warranty of MERCHANTABILITY or -FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. - -You should have received a copy of the GNU General Public License -along with GCC; see the file COPYING3. If not see -<http://www.gnu.org/licenses/>. */ - -/* Interprocedural HSA pass is responsible for creation of HSA clones. - For all these HSA clones, we emit HSAIL instructions and pass processing - is terminated. */ - -#include "config.h" -#include "system.h" -#include "coretypes.h" -#include "tm.h" -#include "is-a.h" -#include "hash-set.h" -#include "vec.h" -#include "tree.h" -#include "tree-pass.h" -#include "function.h" -#include "basic-block.h" -#include "gimple.h" -#include "dumpfile.h" -#include "gimple-pretty-print.h" -#include "tree-streamer.h" -#include "stringpool.h" -#include "cgraph.h" -#include "print-tree.h" -#include "alloc-pool.h" -#include "symbol-summary.h" -#include "hsa-common.h" - -namespace { - -/* If NODE is not versionable, warn about not emiting HSAIL and return false. - Otherwise return true. */ - -static bool -check_warn_node_versionable (cgraph_node *node) -{ - if (!node->versionable) - { - warning_at (EXPR_LOCATION (node->decl), OPT_Whsa, - "could not emit HSAIL for function %s: function cannot be " - "cloned", node->dump_name ()); - return false; - } - return true; -} - -/* The function creates HSA clones for all functions that were either - marked as HSA kernels or are callable HSA functions. Apart from that, - we redirect all edges that come from an HSA clone and end in another - HSA clone to connect these two functions. */ - -static unsigned int -process_hsa_functions (void) -{ - struct cgraph_node *node; - - if (hsa_summaries == NULL) - hsa_summaries = new hsa_summary_t (symtab); - - FOR_EACH_DEFINED_FUNCTION (node) - { - hsa_function_summary *s = hsa_summaries->get (node); - - /* A linked function is skipped. */ - if (s != NULL && s->m_bound_function != NULL) - continue; - - if (s != NULL) - { - if (!check_warn_node_versionable (node)) - continue; - cgraph_node *clone - = node->create_virtual_clone (vec <cgraph_edge *> (), - NULL, NULL, "hsa", 0); - TREE_PUBLIC (clone->decl) = TREE_PUBLIC (node->decl); - clone->externally_visible = node->externally_visible; - - clone->force_output = true; - hsa_summaries->link_functions (clone, node, s->m_kind, false); - - if (dump_file) - fprintf (dump_file, "Created a new HSA clone: %s, type: %s\n", - clone->dump_name (), - s->m_kind == HSA_KERNEL ? "kernel" : "function"); - } - else if (hsa_callable_function_p (node->decl) - /* At this point, this is enough to identify clones for - parallel, which for HSA would need to be kernels anyway. */ - && !DECL_ARTIFICIAL (node->decl)) - { - if (!check_warn_node_versionable (node)) - continue; - cgraph_node *clone - = node->create_virtual_clone (vec <cgraph_edge *> (), - NULL, NULL, "hsa", 0); - TREE_PUBLIC (clone->decl) = TREE_PUBLIC (node->decl); - clone->externally_visible = node->externally_visible; - - if (!node->local) - clone->force_output = true; - hsa_summaries->link_functions (clone, node, HSA_FUNCTION, false); - - if (dump_file) - fprintf (dump_file, "Created a new HSA function clone: %s\n", - clone->dump_name ()); - } - } - - /* Redirect all edges that are between HSA clones. */ - FOR_EACH_DEFINED_FUNCTION (node) - { - cgraph_edge *e = node->callees; - - while (e) - { - hsa_function_summary *src = hsa_summaries->get (node); - if (src != NULL && src->m_gpu_implementation_p) - { - hsa_function_summary *dst = hsa_summaries->get (e->callee); - if (dst != NULL && !dst->m_gpu_implementation_p) - { - e->redirect_callee (dst->m_bound_function); - if (dump_file) - fprintf (dump_file, - "Redirecting edge to HSA function: %s->%s\n", - e->caller->dump_name (), - e->callee->dump_name ()); - } - } - - e = e->next_callee; - } - } - - return 0; -} - -/* Iterate all HSA functions and stream out HSA function summary. */ - -static void -ipa_hsa_write_summary (void) -{ - struct bitpack_d bp; - struct cgraph_node *node; - struct output_block *ob; - unsigned int count = 0; - lto_symtab_encoder_iterator lsei; - lto_symtab_encoder_t encoder; - - if (!hsa_summaries) - return; - - ob = create_output_block (LTO_section_ipa_hsa); - encoder = ob->decl_state->symtab_node_encoder; - ob->symbol = NULL; - for (lsei = lsei_start_function_in_partition (encoder); !lsei_end_p (lsei); - lsei_next_function_in_partition (&lsei)) - { - node = lsei_cgraph_node (lsei); - hsa_function_summary *s = hsa_summaries->get (node); - - if (s != NULL) - count++; - } - - streamer_write_uhwi (ob, count); - - /* Process all of the functions. */ - for (lsei = lsei_start_function_in_partition (encoder); !lsei_end_p (lsei); - lsei_next_function_in_partition (&lsei)) - { - node = lsei_cgraph_node (lsei); - hsa_function_summary *s = hsa_summaries->get (node); - - if (s != NULL) - { - encoder = ob->decl_state->symtab_node_encoder; - int node_ref = lto_symtab_encoder_encode (encoder, node); - streamer_write_uhwi (ob, node_ref); - - bp = bitpack_create (ob->main_stream); - bp_pack_value (&bp, s->m_kind, 2); - bp_pack_value (&bp, s->m_gpu_implementation_p, 1); - bp_pack_value (&bp, s->m_bound_function != NULL, 1); - streamer_write_bitpack (&bp); - if (s->m_bound_function) - stream_write_tree (ob, s->m_bound_function->decl, true); - } - } - - streamer_write_char_stream (ob->main_stream, 0); - produce_asm (ob, NULL); - destroy_output_block (ob); -} - -/* Read section in file FILE_DATA of length LEN with data DATA. */ - -static void -ipa_hsa_read_section (struct lto_file_decl_data *file_data, const char *data, - size_t len) -{ - const struct lto_function_header *header - = (const struct lto_function_header *) data; - const int cfg_offset = sizeof (struct lto_function_header); - const int main_offset = cfg_offset + header->cfg_size; - const int string_offset = main_offset + header->main_size; - class data_in *data_in; - unsigned int i; - unsigned int count; - - lto_input_block ib_main ((const char *) data + main_offset, - header->main_size, file_data->mode_table); - - data_in - = lto_data_in_create (file_data, (const char *) data + string_offset, - header->string_size, vNULL); - count = streamer_read_uhwi (&ib_main); - - for (i = 0; i < count; i++) - { - unsigned int index; - struct cgraph_node *node; - lto_symtab_encoder_t encoder; - - index = streamer_read_uhwi (&ib_main); - encoder = file_data->symtab_node_encoder; - node = dyn_cast<cgraph_node *> (lto_symtab_encoder_deref (encoder, - index)); - gcc_assert (node->definition); - hsa_function_summary *s = hsa_summaries->get_create (node); - - struct bitpack_d bp = streamer_read_bitpack (&ib_main); - s->m_kind = (hsa_function_kind) bp_unpack_value (&bp, 2); - s->m_gpu_implementation_p = bp_unpack_value (&bp, 1); - bool has_tree = bp_unpack_value (&bp, 1); - - if (has_tree) - { - tree decl = stream_read_tree (&ib_main, data_in); - s->m_bound_function = cgraph_node::get_create (decl); - } - } - lto_free_section_data (file_data, LTO_section_ipa_hsa, NULL, data, - len); - lto_data_in_delete (data_in); -} - -/* Load streamed HSA functions summary and assign the summary to a function. */ - -static void -ipa_hsa_read_summary (void) -{ - struct lto_file_decl_data **file_data_vec = lto_get_file_decl_data (); - struct lto_file_decl_data *file_data; - unsigned int j = 0; - - if (hsa_summaries == NULL) - hsa_summaries = new hsa_summary_t (symtab); - - while ((file_data = file_data_vec[j++])) - { - size_t len; - const char *data - = lto_get_summary_section_data (file_data, LTO_section_ipa_hsa, &len); - if (data) - ipa_hsa_read_section (file_data, data, len); - } -} - -const pass_data pass_data_ipa_hsa = -{ - IPA_PASS, /* type */ - "hsa", /* name */ - OPTGROUP_OMP, /* optinfo_flags */ - TV_IPA_HSA, /* tv_id */ - 0, /* properties_required */ - 0, /* properties_provided */ - 0, /* properties_destroyed */ - 0, /* todo_flags_start */ - TODO_dump_symtab, /* todo_flags_finish */ -}; - -class pass_ipa_hsa : public ipa_opt_pass_d -{ -public: - pass_ipa_hsa (gcc::context *ctxt) - : ipa_opt_pass_d (pass_data_ipa_hsa, ctxt, - NULL, /* generate_summary */ - ipa_hsa_write_summary, /* write_summary */ - ipa_hsa_read_summary, /* read_summary */ - ipa_hsa_write_summary, /* write_optimization_summary */ - ipa_hsa_read_summary, /* read_optimization_summary */ - NULL, /* stmt_fixup */ - 0, /* function_transform_todo_flags_start */ - NULL, /* function_transform */ - NULL) /* variable_transform */ - {} - - /* opt_pass methods: */ - virtual bool gate (function *); - - virtual unsigned int execute (function *) { return process_hsa_functions (); } - -}; // class pass_ipa_reference - -bool -pass_ipa_hsa::gate (function *) -{ - return hsa_gen_requested_p (); -} - -} // anon namespace - -ipa_opt_pass_d * -make_pass_ipa_hsa (gcc::context *ctxt) -{ - return new pass_ipa_hsa (ctxt); -} diff --git a/gcc/lto-section-in.c b/gcc/lto-section-in.c index 48cf484..8a38fa2 100644 --- a/gcc/lto-section-in.c +++ b/gcc/lto-section-in.c @@ -53,7 +53,6 @@ const char *lto_section_name[LTO_N_SECTION_TYPES] = "icf", "offload_table", "mode_table", - "hsa", "lto", "ipa_sra", "odr_types", diff --git a/gcc/lto-streamer.h b/gcc/lto-streamer.h index b17137f..0129f00 100644 --- a/gcc/lto-streamer.h +++ b/gcc/lto-streamer.h @@ -224,7 +224,6 @@ enum lto_section_type LTO_section_ipa_icf, LTO_section_offload_table, LTO_section_mode_table, - LTO_section_ipa_hsa, LTO_section_lto, LTO_section_ipa_sra, LTO_section_odr_types, diff --git a/gcc/lto-wrapper.c b/gcc/lto-wrapper.c index e3b5cb2..82cfa6b 100644 --- a/gcc/lto-wrapper.c +++ b/gcc/lto-wrapper.c @@ -976,7 +976,6 @@ compile_images_for_offload_targets (unsigned in_argc, char *in_argv[], return; unsigned num_targets = parse_env_var (target_names, &names, NULL); - int next_name_entry = 0; const char *compiler_path = getenv ("COMPILER_PATH"); if (!compiler_path) goto out; @@ -986,19 +985,13 @@ compile_images_for_offload_targets (unsigned in_argc, char *in_argv[], offload_names = XCNEWVEC (char *, num_targets + 1); for (unsigned i = 0; i < num_targets; i++) { - /* HSA does not use LTO-like streaming and a different compiler, skip - it. */ - if (strcmp (names[i], "hsa") == 0) - continue; - - offload_names[next_name_entry] + offload_names[i] = compile_offload_image (names[i], compiler_path, in_argc, in_argv, compiler_opts, compiler_opt_count, linker_opts, linker_opt_count); - if (!offload_names[next_name_entry]) + if (!offload_names[i]) fatal_error (input_location, "problem with building target image for %s", names[i]); - next_name_entry++; } out: diff --git a/gcc/omp-expand.c b/gcc/omp-expand.c index ee354b7..efffac6 100644 --- a/gcc/omp-expand.c +++ b/gcc/omp-expand.c @@ -56,7 +56,6 @@ along with GCC; see the file COPYING3. If not see #include "symbol-summary.h" #include "gomp-constants.h" #include "gimple-pretty-print.h" -#include "hsa-common.h" #include "stringpool.h" #include "attribs.h" @@ -484,37 +483,6 @@ gimple_build_cond_empty (tree cond) return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE); } -/* Return true if a parallel REGION is within a declare target function or - within a target region and is not a part of a gridified target. */ - -static bool -parallel_needs_hsa_kernel_p (struct omp_region *region) -{ - bool indirect = false; - for (region = region->outer; region; region = region->outer) - { - if (region->type == GIMPLE_OMP_PARALLEL) - indirect = true; - else if (region->type == GIMPLE_OMP_TARGET) - { - gomp_target *tgt_stmt - = as_a <gomp_target *> (last_stmt (region->entry)); - - if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt), - OMP_CLAUSE__GRIDDIM_)) - return indirect; - else - return true; - } - } - - if (lookup_attribute ("omp declare target", - DECL_ATTRIBUTES (current_function_decl))) - return true; - - return false; -} - /* Change DECL_CONTEXT of CHILD_FNDECL to that of the parent function. Add CHILD_FNDECL to decl chain of the supercontext of the block ENTRY_BLOCK - this is the block which originally contained the @@ -772,13 +740,6 @@ expand_parallel_call (struct omp_region *region, basic_block bb, } force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, false, GSI_CONTINUE_LINKING); - - if (hsa_gen_requested_p () - && parallel_needs_hsa_kernel_p (region)) - { - cgraph_node *child_cnode = cgraph_node::get (child_fndecl); - hsa_register_kernel (child_cnode); - } } /* Build the function call to GOMP_task to actually @@ -8528,113 +8489,6 @@ mark_loops_in_oacc_kernels_region (basic_block region_entry, loop->in_oacc_kernels_region = true; } -/* Types used to pass grid and wortkgroup sizes to kernel invocation. */ - -struct GTY(()) grid_launch_attributes_trees -{ - tree kernel_dim_array_type; - tree kernel_lattrs_dimnum_decl; - tree kernel_lattrs_grid_decl; - tree kernel_lattrs_group_decl; - tree kernel_launch_attributes_type; -}; - -static GTY(()) struct grid_launch_attributes_trees *grid_attr_trees; - -/* Create types used to pass kernel launch attributes to target. */ - -static void -grid_create_kernel_launch_attr_types (void) -{ - if (grid_attr_trees) - return; - grid_attr_trees = ggc_alloc <grid_launch_attributes_trees> (); - - tree dim_arr_index_type - = build_index_type (build_int_cst (integer_type_node, 2)); - grid_attr_trees->kernel_dim_array_type - = build_array_type (uint32_type_node, dim_arr_index_type); - - grid_attr_trees->kernel_launch_attributes_type = make_node (RECORD_TYPE); - grid_attr_trees->kernel_lattrs_dimnum_decl - = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("ndim"), - uint32_type_node); - DECL_CHAIN (grid_attr_trees->kernel_lattrs_dimnum_decl) = NULL_TREE; - - grid_attr_trees->kernel_lattrs_grid_decl - = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("grid_size"), - grid_attr_trees->kernel_dim_array_type); - DECL_CHAIN (grid_attr_trees->kernel_lattrs_grid_decl) - = grid_attr_trees->kernel_lattrs_dimnum_decl; - grid_attr_trees->kernel_lattrs_group_decl - = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("group_size"), - grid_attr_trees->kernel_dim_array_type); - DECL_CHAIN (grid_attr_trees->kernel_lattrs_group_decl) - = grid_attr_trees->kernel_lattrs_grid_decl; - finish_builtin_struct (grid_attr_trees->kernel_launch_attributes_type, - "__gomp_kernel_launch_attributes", - grid_attr_trees->kernel_lattrs_group_decl, NULL_TREE); -} - -/* Insert before the current statement in GSI a store of VALUE to INDEX of - array (of type kernel_dim_array_type) FLD_DECL of RANGE_VAR. VALUE must be - of type uint32_type_node. */ - -static void -grid_insert_store_range_dim (gimple_stmt_iterator *gsi, tree range_var, - tree fld_decl, int index, tree value) -{ - tree ref = build4 (ARRAY_REF, uint32_type_node, - build3 (COMPONENT_REF, - grid_attr_trees->kernel_dim_array_type, - range_var, fld_decl, NULL_TREE), - build_int_cst (integer_type_node, index), - NULL_TREE, NULL_TREE); - gsi_insert_before (gsi, gimple_build_assign (ref, value), GSI_SAME_STMT); -} - -/* Return a tree representation of a pointer to a structure with grid and - work-group size information. Statements filling that information will be - inserted before GSI, TGT_STMT is the target statement which has the - necessary information in it. */ - -static tree -grid_get_kernel_launch_attributes (gimple_stmt_iterator *gsi, - gomp_target *tgt_stmt) -{ - grid_create_kernel_launch_attr_types (); - tree lattrs = create_tmp_var (grid_attr_trees->kernel_launch_attributes_type, - "__kernel_launch_attrs"); - - unsigned max_dim = 0; - for (tree clause = gimple_omp_target_clauses (tgt_stmt); - clause; - clause = OMP_CLAUSE_CHAIN (clause)) - { - if (OMP_CLAUSE_CODE (clause) != OMP_CLAUSE__GRIDDIM_) - continue; - - unsigned dim = OMP_CLAUSE__GRIDDIM__DIMENSION (clause); - max_dim = MAX (dim, max_dim); - - grid_insert_store_range_dim (gsi, lattrs, - grid_attr_trees->kernel_lattrs_grid_decl, - dim, OMP_CLAUSE__GRIDDIM__SIZE (clause)); - grid_insert_store_range_dim (gsi, lattrs, - grid_attr_trees->kernel_lattrs_group_decl, - dim, OMP_CLAUSE__GRIDDIM__GROUP (clause)); - } - - tree dimref = build3 (COMPONENT_REF, uint32_type_node, lattrs, - grid_attr_trees->kernel_lattrs_dimnum_decl, NULL_TREE); - gcc_checking_assert (max_dim <= 2); - tree dimensions = build_int_cstu (uint32_type_node, max_dim + 1); - gsi_insert_before (gsi, gimple_build_assign (dimref, dimensions), - GSI_SAME_STMT); - TREE_ADDRESSABLE (lattrs) = 1; - return build_fold_addr_expr (lattrs); -} - /* Build target argument identifier from the DEVICE identifier, value identifier ID and whether the element also has a SUBSEQUENT_PARAM. */ @@ -8725,16 +8579,6 @@ get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt) GOMP_TARGET_ARG_THREAD_LIMIT, t, &args); - /* Add HSA-specific grid sizes, if available. */ - if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt), - OMP_CLAUSE__GRIDDIM_)) - { - int id = GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES; - t = get_target_argument_identifier (GOMP_DEVICE_HSA, true, id); - args.quick_push (t); - args.quick_push (grid_get_kernel_launch_attributes (gsi, tgt_stmt)); - } - /* Produce more, perhaps device specific, arguments here. */ tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node, @@ -9351,302 +9195,6 @@ expand_omp_target (struct omp_region *region) } } -/* Expand KFOR loop as a HSA grifidied kernel, i.e. as a body only with - iteration variable derived from the thread number. INTRA_GROUP means this - is an expansion of a loop iterating over work-items within a separate - iteration over groups. */ - -static void -grid_expand_omp_for_loop (struct omp_region *kfor, bool intra_group) -{ - gimple_stmt_iterator gsi; - gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry)); - gcc_checking_assert (gimple_omp_for_kind (for_stmt) - == GF_OMP_FOR_KIND_GRID_LOOP); - size_t collapse = gimple_omp_for_collapse (for_stmt); - struct omp_for_data_loop *loops - = XALLOCAVEC (struct omp_for_data_loop, - gimple_omp_for_collapse (for_stmt)); - struct omp_for_data fd; - - remove_edge (BRANCH_EDGE (kfor->entry)); - basic_block body_bb = FALLTHRU_EDGE (kfor->entry)->dest; - - gcc_assert (kfor->cont); - omp_extract_for_data (for_stmt, &fd, loops); - - gsi = gsi_start_bb (body_bb); - - for (size_t dim = 0; dim < collapse; dim++) - { - tree type, itype; - itype = type = TREE_TYPE (fd.loops[dim].v); - if (POINTER_TYPE_P (type)) - itype = signed_type_for (type); - - tree n1 = fd.loops[dim].n1; - tree step = fd.loops[dim].step; - n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1), - true, NULL_TREE, true, GSI_SAME_STMT); - step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step), - true, NULL_TREE, true, GSI_SAME_STMT); - tree threadid; - if (gimple_omp_for_grid_group_iter (for_stmt)) - { - gcc_checking_assert (!intra_group); - threadid = build_call_expr (builtin_decl_explicit - (BUILT_IN_HSA_WORKGROUPID), 1, - build_int_cstu (unsigned_type_node, dim)); - } - else if (intra_group) - threadid = build_call_expr (builtin_decl_explicit - (BUILT_IN_HSA_WORKITEMID), 1, - build_int_cstu (unsigned_type_node, dim)); - else - threadid = build_call_expr (builtin_decl_explicit - (BUILT_IN_HSA_WORKITEMABSID), 1, - build_int_cstu (unsigned_type_node, dim)); - threadid = fold_convert (itype, threadid); - threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE, - true, GSI_SAME_STMT); - - tree startvar = fd.loops[dim].v; - tree t = fold_build2 (MULT_EXPR, itype, threadid, step); - if (POINTER_TYPE_P (type)) - t = fold_build_pointer_plus (n1, t); - else - t = fold_build2 (PLUS_EXPR, type, t, n1); - t = fold_convert (type, t); - t = force_gimple_operand_gsi (&gsi, t, - DECL_P (startvar) - && TREE_ADDRESSABLE (startvar), - NULL_TREE, true, GSI_SAME_STMT); - gassign *assign_stmt = gimple_build_assign (startvar, t); - gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT); - } - /* Remove the omp for statement. */ - gsi = gsi_last_nondebug_bb (kfor->entry); - gsi_remove (&gsi, true); - - /* Remove the GIMPLE_OMP_CONTINUE statement. */ - gsi = gsi_last_nondebug_bb (kfor->cont); - gcc_assert (!gsi_end_p (gsi) - && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_CONTINUE); - gsi_remove (&gsi, true); - - /* Replace the GIMPLE_OMP_RETURN with a barrier, if necessary. */ - gsi = gsi_last_nondebug_bb (kfor->exit); - gcc_assert (!gsi_end_p (gsi) - && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN); - if (intra_group) - gsi_insert_before (&gsi, omp_build_barrier (NULL_TREE), GSI_SAME_STMT); - gsi_remove (&gsi, true); - - /* Fixup the much simpler CFG. */ - remove_edge (find_edge (kfor->cont, body_bb)); - - if (kfor->cont != body_bb) - set_immediate_dominator (CDI_DOMINATORS, kfor->cont, body_bb); - set_immediate_dominator (CDI_DOMINATORS, kfor->exit, kfor->cont); -} - -/* Structure passed to grid_remap_kernel_arg_accesses so that it can remap - argument_decls. */ - -struct grid_arg_decl_map -{ - tree old_arg; - tree new_arg; -}; - -/* Invoked through walk_gimple_op, will remap all PARM_DECLs to the ones - pertaining to kernel function. */ - -static tree -grid_remap_kernel_arg_accesses (tree *tp, int *walk_subtrees, void *data) -{ - struct walk_stmt_info *wi = (struct walk_stmt_info *) data; - struct grid_arg_decl_map *adm = (struct grid_arg_decl_map *) wi->info; - tree t = *tp; - - if (t == adm->old_arg) - *tp = adm->new_arg; - *walk_subtrees = !TYPE_P (t) && !DECL_P (t); - return NULL_TREE; -} - -/* If TARGET region contains a kernel body for loop, remove its region from the - TARGET and expand it in HSA gridified kernel fashion. */ - -static void -grid_expand_target_grid_body (struct omp_region *target) -{ - if (!hsa_gen_requested_p ()) - return; - - gomp_target *tgt_stmt = as_a <gomp_target *> (last_stmt (target->entry)); - struct omp_region **pp; - - for (pp = &target->inner; *pp; pp = &(*pp)->next) - if ((*pp)->type == GIMPLE_OMP_GRID_BODY) - break; - - struct omp_region *gpukernel = *pp; - - tree orig_child_fndecl = gimple_omp_target_child_fn (tgt_stmt); - if (!gpukernel) - { - /* HSA cannot handle OACC stuff. */ - if (gimple_omp_target_kind (tgt_stmt) != GF_OMP_TARGET_KIND_REGION) - return; - gcc_checking_assert (orig_child_fndecl); - gcc_assert (!omp_find_clause (gimple_omp_target_clauses (tgt_stmt), - OMP_CLAUSE__GRIDDIM_)); - cgraph_node *n = cgraph_node::get (orig_child_fndecl); - - hsa_register_kernel (n); - return; - } - - gcc_assert (omp_find_clause (gimple_omp_target_clauses (tgt_stmt), - OMP_CLAUSE__GRIDDIM_)); - tree inside_block - = gimple_block (first_stmt (single_succ (gpukernel->entry))); - *pp = gpukernel->next; - for (pp = &gpukernel->inner; *pp; pp = &(*pp)->next) - if ((*pp)->type == GIMPLE_OMP_FOR) - break; - - struct omp_region *kfor = *pp; - gcc_assert (kfor); - gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry)); - gcc_assert (gimple_omp_for_kind (for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP); - *pp = kfor->next; - if (kfor->inner) - { - if (gimple_omp_for_grid_group_iter (for_stmt)) - { - struct omp_region **next_pp; - for (pp = &kfor->inner; *pp; pp = next_pp) - { - next_pp = &(*pp)->next; - if ((*pp)->type != GIMPLE_OMP_FOR) - continue; - gomp_for *inner = as_a <gomp_for *> (last_stmt ((*pp)->entry)); - gcc_assert (gimple_omp_for_kind (inner) - == GF_OMP_FOR_KIND_GRID_LOOP); - grid_expand_omp_for_loop (*pp, true); - *pp = (*pp)->next; - next_pp = pp; - } - } - expand_omp (kfor->inner); - } - if (gpukernel->inner) - expand_omp (gpukernel->inner); - - tree kern_fndecl = copy_node (orig_child_fndecl); - DECL_NAME (kern_fndecl) = clone_function_name_numbered (kern_fndecl, - "kernel"); - SET_DECL_ASSEMBLER_NAME (kern_fndecl, DECL_NAME (kern_fndecl)); - tree tgtblock = gimple_block (tgt_stmt); - tree fniniblock = make_node (BLOCK); - BLOCK_ABSTRACT_ORIGIN (fniniblock) = BLOCK_ORIGIN (tgtblock); - BLOCK_SOURCE_LOCATION (fniniblock) = BLOCK_SOURCE_LOCATION (tgtblock); - BLOCK_SOURCE_END_LOCATION (fniniblock) = BLOCK_SOURCE_END_LOCATION (tgtblock); - BLOCK_SUPERCONTEXT (fniniblock) = kern_fndecl; - DECL_INITIAL (kern_fndecl) = fniniblock; - push_struct_function (kern_fndecl); - cfun->function_end_locus = gimple_location (tgt_stmt); - init_tree_ssa (cfun); - pop_cfun (); - - tree old_parm_decl = DECL_ARGUMENTS (kern_fndecl); - gcc_assert (!DECL_CHAIN (old_parm_decl)); - tree new_parm_decl = copy_node (DECL_ARGUMENTS (kern_fndecl)); - DECL_CONTEXT (new_parm_decl) = kern_fndecl; - DECL_ARGUMENTS (kern_fndecl) = new_parm_decl; - gcc_assert (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (kern_fndecl)))); - DECL_RESULT (kern_fndecl) = copy_node (DECL_RESULT (kern_fndecl)); - DECL_CONTEXT (DECL_RESULT (kern_fndecl)) = kern_fndecl; - struct function *kern_cfun = DECL_STRUCT_FUNCTION (kern_fndecl); - kern_cfun->curr_properties = cfun->curr_properties; - - grid_expand_omp_for_loop (kfor, false); - - /* Remove the omp for statement. */ - gimple_stmt_iterator gsi = gsi_last_nondebug_bb (gpukernel->entry); - gsi_remove (&gsi, true); - /* Replace the GIMPLE_OMP_RETURN at the end of the kernel region with a real - return. */ - gsi = gsi_last_nondebug_bb (gpukernel->exit); - gcc_assert (!gsi_end_p (gsi) - && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN); - gimple *ret_stmt = gimple_build_return (NULL); - gsi_insert_after (&gsi, ret_stmt, GSI_SAME_STMT); - gsi_remove (&gsi, true); - - /* Statements in the first BB in the target construct have been produced by - target lowering and must be copied inside the GPUKERNEL, with the two - exceptions of the first OMP statement and the OMP_DATA assignment - statement. */ - gsi = gsi_start_bb (single_succ (gpukernel->entry)); - tree data_arg = gimple_omp_target_data_arg (tgt_stmt); - tree sender = data_arg ? TREE_VEC_ELT (data_arg, 0) : NULL; - for (gimple_stmt_iterator tsi = gsi_start_bb (single_succ (target->entry)); - !gsi_end_p (tsi); gsi_next (&tsi)) - { - gimple *stmt = gsi_stmt (tsi); - if (is_gimple_omp (stmt)) - break; - if (sender - && is_gimple_assign (stmt) - && TREE_CODE (gimple_assign_rhs1 (stmt)) == ADDR_EXPR - && TREE_OPERAND (gimple_assign_rhs1 (stmt), 0) == sender) - continue; - gimple *copy = gimple_copy (stmt); - gsi_insert_before (&gsi, copy, GSI_SAME_STMT); - gimple_set_block (copy, fniniblock); - } - - move_sese_region_to_fn (kern_cfun, single_succ (gpukernel->entry), - gpukernel->exit, inside_block); - - cgraph_node *kcn = cgraph_node::get_create (kern_fndecl); - kcn->mark_force_output (); - cgraph_node *orig_child = cgraph_node::get (orig_child_fndecl); - - hsa_register_kernel (kcn, orig_child); - - cgraph_node::add_new_function (kern_fndecl, true); - push_cfun (kern_cfun); - cgraph_edge::rebuild_edges (); - - /* Re-map any mention of the PARM_DECL of the original function to the - PARM_DECL of the new one. - - TODO: It would be great if lowering produced references into the GPU - kernel decl straight away and we did not have to do this. */ - struct grid_arg_decl_map adm; - adm.old_arg = old_parm_decl; - adm.new_arg = new_parm_decl; - basic_block bb; - FOR_EACH_BB_FN (bb, kern_cfun) - { - for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) - { - gimple *stmt = gsi_stmt (gsi); - struct walk_stmt_info wi; - memset (&wi, 0, sizeof (wi)); - wi.info = &adm; - walk_gimple_op (stmt, grid_remap_kernel_arg_accesses, &wi); - } - } - pop_cfun (); - - return; -} - /* Expand the parallel region tree rooted at REGION. Expansion proceeds in depth-first order. Innermost regions are expanded first. This way, parallel regions that require a new function to @@ -9666,8 +9214,6 @@ expand_omp (struct omp_region *region) region. */ if (region->type == GIMPLE_OMP_PARALLEL) determine_parallel_type (region); - else if (region->type == GIMPLE_OMP_TARGET) - grid_expand_target_grid_body (region); if (region->type == GIMPLE_OMP_FOR && gimple_omp_for_combined_p (last_stmt (region->entry))) @@ -10039,7 +9585,6 @@ omp_make_gimple_edges (basic_block bb, struct omp_region **region, case GIMPLE_OMP_TASKGROUP: case GIMPLE_OMP_CRITICAL: case GIMPLE_OMP_SECTION: - case GIMPLE_OMP_GRID_BODY: cur_region = new_omp_region (bb, code, cur_region); fallthru = true; break; @@ -10181,5 +9726,3 @@ omp_make_gimple_edges (basic_block bb, struct omp_region **region, return fallthru; } - -#include "gt-omp-expand.h" diff --git a/gcc/omp-general.c b/gcc/omp-general.c index c539038..6e6d3e1 100644 --- a/gcc/omp-general.c +++ b/gcc/omp-general.c @@ -39,7 +39,6 @@ along with GCC; see the file COPYING3. If not see #include "cgraph.h" #include "alloc-pool.h" #include "symbol-summary.h" -#include "hsa-common.h" #include "tree-pass.h" #include "omp-device-properties.h" #include "tree-iterator.h" @@ -1052,14 +1051,12 @@ omp_offload_device_kind_arch_isa (const char *props, const char *prop) static bool omp_maybe_offloaded (void) { - if (!hsa_gen_requested_p ()) - { - if (!ENABLE_OFFLOADING) - return false; - const char *names = getenv ("OFFLOAD_TARGET_NAMES"); - if (names == NULL || *names == '\0') - return false; - } + if (!ENABLE_OFFLOADING) + return false; + const char *names = getenv ("OFFLOAD_TARGET_NAMES"); + if (names == NULL || *names == '\0') + return false; + if (symtab->state == PARSING) /* Maybe. */ return true; @@ -1234,12 +1231,6 @@ omp_context_selector_matches (tree ctx) also offloading values. */ if (!omp_maybe_offloaded ()) return 0; - if (strcmp (arch, "hsa") == 0 - && hsa_gen_requested_p ()) - { - ret = -1; - continue; - } if (ENABLE_OFFLOADING) { const char *arches = omp_offload_device_arch; @@ -1360,12 +1351,6 @@ omp_context_selector_matches (tree ctx) also offloading values. */ if (!omp_maybe_offloaded ()) return 0; - if (strcmp (prop, "gpu") == 0 - && hsa_gen_requested_p ()) - { - ret = -1; - continue; - } if (ENABLE_OFFLOADING) { const char *kinds = omp_offload_device_kind; diff --git a/gcc/omp-grid.c b/gcc/omp-grid.c deleted file mode 100644 index ba635fd..0000000 --- a/gcc/omp-grid.c +++ /dev/null @@ -1,1419 +0,0 @@ -/* Lowering and expansion of OpenMP directives for HSA GPU agents. - - Copyright (C) 2013-2020 Free Software Foundation, Inc. - -This file is part of GCC. - -GCC is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free -Software Foundation; either version 3, or (at your option) any later -version. - -GCC is distributed in the hope that it will be useful, but WITHOUT ANY -WARRANTY; without even the implied warranty of MERCHANTABILITY or -FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. - -You should have received a copy of the GNU General Public License -along with GCC; see the file COPYING3. If not see -<http://www.gnu.org/licenses/>. */ - -#include "config.h" -#include "system.h" -#include "coretypes.h" -#include "backend.h" -#include "tree.h" -#include "gimple.h" -#include "tree-pass.h" -#include "ssa.h" -#include "cgraph.h" -#include "pretty-print.h" -#include "fold-const.h" -#include "gimplify.h" -#include "gimple-iterator.h" -#include "gimple-walk.h" -#include "tree-inline.h" -#include "langhooks.h" -#include "omp-general.h" -#include "omp-low.h" -#include "omp-grid.h" -#include "gimple-pretty-print.h" - -/* Return the lastprivate predicate for a given gridified loop described by - FD). */ - -tree -omp_grid_lastprivate_predicate (struct omp_for_data *fd) -{ - /* When dealing with a gridified loop, we need to check up to three collapsed - iteration variables but they are not actually captured in this fd. - Fortunately, we can easily rely on HSA builtins to get this - information. */ - - tree id, size; - if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP - && gimple_omp_for_grid_intra_group (fd->for_stmt)) - { - id = builtin_decl_explicit (BUILT_IN_HSA_WORKITEMID); - size = builtin_decl_explicit (BUILT_IN_HSA_CURRENTWORKGROUPSIZE); - } - else - { - id = builtin_decl_explicit (BUILT_IN_HSA_WORKITEMABSID); - size = builtin_decl_explicit (BUILT_IN_HSA_GRIDSIZE); - } - tree cond = NULL; - for (int dim = 0; dim < fd->collapse; dim++) - { - tree dim_tree = build_int_cstu (unsigned_type_node, dim); - tree u1 = build_int_cstu (unsigned_type_node, 1); - tree c2 - = build2 (EQ_EXPR, boolean_type_node, - build2 (PLUS_EXPR, unsigned_type_node, - build_call_expr (id, 1, dim_tree), u1), - build_call_expr (size, 1, dim_tree)); - if (cond) - cond = build2 (TRUTH_AND_EXPR, boolean_type_node, cond, c2); - else - cond = c2; - } - return cond; -} - -/* Structure describing the basic properties of the loop we ara analyzing - whether it can be gridified and when it is gridified. */ - -class grid_prop -{ -public: - /* True when we are doing tiling gridification, i.e. when there is a distinct - distribute loop over groups and a loop construct over work-items. False - when distribute and parallel for loops form a combined construct. */ - bool tiling; - /* Location of the target construct for optimization information - messages. */ - dump_user_location_t target_loc; - /* The collapse clause of the involved loops. Collapse value of all of them - must be the same for gridification to take place. */ - size_t collapse; - /* Group sizes, if requested by the user or NULL if not requested. */ - tree group_sizes[3]; -}; - -#define GRID_MISSED_MSG_PREFIX "Will not turn target construct into a " \ - "gridified HSA kernel because " - -/* Return true if STMT is an assignment of a register-type into a local - VAR_DECL. If GRID is non-NULL, the assignment additionally must not be to - any of the trees specifying group sizes there. */ - -static bool -grid_safe_assignment_p (gimple *stmt, grid_prop *grid) -{ - gassign *assign = dyn_cast <gassign *> (stmt); - if (!assign) - return false; - if (gimple_clobber_p (assign)) - return true; - tree lhs = gimple_assign_lhs (assign); - if (!VAR_P (lhs) - || !is_gimple_reg_type (TREE_TYPE (lhs)) - || is_global_var (lhs)) - return false; - if (grid) - for (unsigned i = 0; i < grid->collapse; i++) - if (lhs == grid->group_sizes[i]) - return false; - return true; -} - -/* Return true if all statements in SEQ are assignments to local register-type - variables that do not hold group size information. */ - -static bool -grid_seq_only_contains_local_assignments (gimple_seq seq, grid_prop *grid) -{ - if (!seq) - return true; - - gimple_stmt_iterator gsi; - for (gsi = gsi_start (seq); !gsi_end_p (gsi); gsi_next (&gsi)) - if (!grid_safe_assignment_p (gsi_stmt (gsi), grid)) - return false; - return true; -} - -/* Scan statements in SEQ and call itself recursively on any bind. GRID - describes hitherto discovered properties of the loop that is evaluated for - possible gridification. If during whole search only assignments to - register-type local variables (that do not overwrite group size information) - and one single OMP statement is encountered, return true, otherwise return - false. RET is where we store any OMP statement encountered. */ - -static bool -grid_find_single_omp_among_assignments_1 (gimple_seq seq, grid_prop *grid, - const char *name, gimple **ret) -{ - gimple_stmt_iterator gsi; - for (gsi = gsi_start (seq); !gsi_end_p (gsi); gsi_next (&gsi)) - { - gimple *stmt = gsi_stmt (gsi); - - if (grid_safe_assignment_p (stmt, grid)) - continue; - if (gbind *bind = dyn_cast <gbind *> (stmt)) - { - gimple_seq bind_body = gimple_bind_body (bind); - if (!grid_find_single_omp_among_assignments_1 (bind_body, grid, name, - ret)) - return false; - } - else if (is_gimple_omp (stmt)) - { - if (*ret) - { - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, - GRID_MISSED_MSG_PREFIX "%s construct " - "contains multiple OpenMP constructs\n", - name); - dump_printf_loc (MSG_NOTE, *ret, - "The first OpenMP construct within " - "a parallel\n"); - dump_printf_loc (MSG_NOTE, stmt, - "The second OpenMP construct within " - "a parallel\n"); - } - return false; - } - *ret = stmt; - } - else - { - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, - GRID_MISSED_MSG_PREFIX "%s construct contains " - "a complex statement\n", name); - dump_printf_loc (MSG_NOTE, stmt, - "This statement cannot be analyzed for " - "gridification\n"); - } - return false; - } - } - return true; -} - -/* Scan statements in SEQ and make sure that it and any binds in it contain - only assignments to local register-type variables (that do not overwrite - group size information) and one OMP construct. If so, return that - construct, otherwise return NULL. GRID describes hitherto discovered - properties of the loop that is evaluated for possible gridification. If - dumping is enabled and function fails, use NAME to dump a note with the - reason for failure. */ - -static gimple * -grid_find_single_omp_among_assignments (gimple_seq seq, grid_prop *grid, - const char *name) -{ - if (!seq) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, - GRID_MISSED_MSG_PREFIX "%s construct has empty body\n", - name); - return NULL; - } - - gimple *ret = NULL; - if (grid_find_single_omp_among_assignments_1 (seq, grid, name, &ret)) - { - if (!ret && dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, - GRID_MISSED_MSG_PREFIX "%s construct does not contain" - " any other OpenMP construct\n", name); - return ret; - } - else - return NULL; -} - -/* Walker function looking for statements there is no point gridifying (and for - noreturn function calls which we cannot do). Return non-NULL if such a - function is found. */ - -static tree -grid_find_ungridifiable_statement (gimple_stmt_iterator *gsi, - bool *handled_ops_p, - struct walk_stmt_info *wi) -{ - *handled_ops_p = false; - gimple *stmt = gsi_stmt (*gsi); - switch (gimple_code (stmt)) - { - case GIMPLE_CALL: - if (gimple_call_noreturn_p (as_a <gcall *> (stmt))) - { - *handled_ops_p = true; - wi->info = stmt; - return error_mark_node; - } - break; - - /* We may reduce the following list if we find a way to implement the - clauses, but now there is no point trying further. */ - case GIMPLE_OMP_CRITICAL: - case GIMPLE_OMP_TASKGROUP: - case GIMPLE_OMP_TASK: - case GIMPLE_OMP_SECTION: - case GIMPLE_OMP_SECTIONS: - case GIMPLE_OMP_SECTIONS_SWITCH: - case GIMPLE_OMP_TARGET: - case GIMPLE_OMP_ORDERED: - *handled_ops_p = true; - wi->info = stmt; - return error_mark_node; - default: - break; - } - return NULL; -} - -/* Examine clauses of omp parallel statement PAR and if any prevents - gridification, issue a missed-optimization diagnostics and return false, - otherwise return true. GRID describes hitherto discovered properties of the - loop that is evaluated for possible gridification. */ - -static bool -grid_parallel_clauses_gridifiable (gomp_parallel *par, dump_user_location_t tloc) -{ - tree clauses = gimple_omp_parallel_clauses (par); - while (clauses) - { - switch (OMP_CLAUSE_CODE (clauses)) - { - case OMP_CLAUSE_NUM_THREADS: - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, - GRID_MISSED_MSG_PREFIX "because there is " - "a num_threads clause of the parallel " - "construct\n"); - dump_printf_loc (MSG_NOTE, par, - "Parallel construct has a num_threads clause\n"); - } - return false; - - case OMP_CLAUSE_REDUCTION: - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, - GRID_MISSED_MSG_PREFIX "a reduction clause " - "is present\n "); - dump_printf_loc (MSG_NOTE, par, - "Parallel construct has a reduction clause\n"); - } - return false; - - default: - break; - } - clauses = OMP_CLAUSE_CHAIN (clauses); - } - return true; -} - -/* Examine clauses and the body of omp loop statement GFOR and if something - prevents gridification, issue a missed-optimization diagnostics and return - false, otherwise return true. GRID describes hitherto discovered properties - of the loop that is evaluated for possible gridification. */ - -static bool -grid_inner_loop_gridifiable_p (gomp_for *gfor, grid_prop *grid) -{ - if (!grid_seq_only_contains_local_assignments (gimple_omp_for_pre_body (gfor), - grid)) - { - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, - GRID_MISSED_MSG_PREFIX "the inner loop " - "loop bounds computation contains a complex " - "statement\n"); - dump_printf_loc (MSG_NOTE, gfor, - "Loop construct cannot be analyzed for " - "gridification\n"); - } - return false; - } - - tree clauses = gimple_omp_for_clauses (gfor); - while (clauses) - { - switch (OMP_CLAUSE_CODE (clauses)) - { - case OMP_CLAUSE_SCHEDULE: - if (OMP_CLAUSE_SCHEDULE_KIND (clauses) != OMP_CLAUSE_SCHEDULE_AUTO) - { - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, - GRID_MISSED_MSG_PREFIX "the inner loop " - "has a non-automatic schedule clause\n"); - dump_printf_loc (MSG_NOTE, gfor, - "Loop construct has a non automatic " - "schedule clause\n"); - } - return false; - } - break; - - case OMP_CLAUSE_REDUCTION: - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, - GRID_MISSED_MSG_PREFIX "a reduction " - "clause is present\n "); - dump_printf_loc (MSG_NOTE, gfor, - "Loop construct has a reduction schedule " - "clause\n"); - } - return false; - - default: - break; - } - clauses = OMP_CLAUSE_CHAIN (clauses); - } - struct walk_stmt_info wi; - memset (&wi, 0, sizeof (wi)); - if (walk_gimple_seq (gimple_omp_body (gfor), - grid_find_ungridifiable_statement, - NULL, &wi)) - { - gimple *bad = (gimple *) wi.info; - if (dump_enabled_p ()) - { - if (is_gimple_call (bad)) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, - GRID_MISSED_MSG_PREFIX "the inner loop contains " - "call to a noreturn function\n"); - else - dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, - GRID_MISSED_MSG_PREFIX "the inner loop contains " - "statement %s which cannot be transformed\n", - gimple_code_name[(int) gimple_code (bad)]); - dump_printf_loc (MSG_NOTE, bad, - "This statement cannot be analyzed for " - "gridification\n"); - } - return false; - } - return true; -} - -/* Given distribute omp construct represented by DIST, which in the original - source forms a compound construct with a looping construct, return true if it - can be turned into a gridified HSA kernel. Otherwise return false. GRID - describes hitherto discovered properties of the loop that is evaluated for - possible gridification. */ - -static bool -grid_dist_follows_simple_pattern (gomp_for *dist, grid_prop *grid) -{ - dump_user_location_t tloc = grid->target_loc; - gimple *stmt = grid_find_single_omp_among_assignments (gimple_omp_body (dist), - grid, "distribute"); - gomp_parallel *par; - if (!stmt - || !(par = dyn_cast <gomp_parallel *> (stmt)) - || !grid_parallel_clauses_gridifiable (par, tloc)) - return false; - - stmt = grid_find_single_omp_among_assignments (gimple_omp_body (par), grid, - "parallel"); - gomp_for *gfor; - if (!stmt || !(gfor = dyn_cast <gomp_for *> (stmt))) - return false; - - if (gimple_omp_for_kind (gfor) != GF_OMP_FOR_KIND_FOR) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, - GRID_MISSED_MSG_PREFIX "the inner loop is not " - "a simple for loop\n"); - return false; - } - gcc_assert (gimple_omp_for_collapse (gfor) == grid->collapse); - - if (!grid_inner_loop_gridifiable_p (gfor, grid)) - return false; - - return true; -} - -/* Given an omp loop statement GFOR, return true if it can participate in - tiling gridification, i.e. in one where the distribute and parallel for - loops do not form a compound statement. GRID describes hitherto discovered - properties of the loop that is evaluated for possible gridification. */ - -static bool -grid_gfor_follows_tiling_pattern (gomp_for *gfor, grid_prop *grid) -{ - if (gimple_omp_for_kind (gfor) != GF_OMP_FOR_KIND_FOR) - { - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, - GRID_MISSED_MSG_PREFIX "an inner loop is not " - "a simple for loop\n"); - dump_printf_loc (MSG_NOTE, gfor, - "This statement is not a simple for loop\n"); - } - return false; - } - - if (!grid_inner_loop_gridifiable_p (gfor, grid)) - return false; - - if (gimple_omp_for_collapse (gfor) != grid->collapse) - { - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, - GRID_MISSED_MSG_PREFIX "an inner loop does not " - "have use the same collapse clause\n"); - dump_printf_loc (MSG_NOTE, gfor, - "Loop construct uses a different collapse clause\n"); - } - return false; - } - - struct omp_for_data fd; - struct omp_for_data_loop *loops - = (struct omp_for_data_loop *)alloca (grid->collapse - * sizeof (struct omp_for_data_loop)); - omp_extract_for_data (gfor, &fd, loops); - for (unsigned i = 0; i < grid->collapse; i++) - { - tree itype, type = TREE_TYPE (fd.loops[i].v); - if (POINTER_TYPE_P (type)) - itype = signed_type_for (type); - else - itype = type; - - tree n1 = fold_convert (itype, fd.loops[i].n1); - tree n2 = fold_convert (itype, fd.loops[i].n2); - tree t = build_int_cst (itype, - (fd.loops[i].cond_code == LT_EXPR ? -1 : 1)); - t = fold_build2 (PLUS_EXPR, itype, fd.loops[i].step, t); - t = fold_build2 (PLUS_EXPR, itype, t, n2); - t = fold_build2 (MINUS_EXPR, itype, t, n1); - if (TYPE_UNSIGNED (itype) && fd.loops[i].cond_code == GT_EXPR) - t = fold_build2 (TRUNC_DIV_EXPR, itype, - fold_build1 (NEGATE_EXPR, itype, t), - fold_build1 (NEGATE_EXPR, itype, fd.loops[i].step)); - else - t = fold_build2 (TRUNC_DIV_EXPR, itype, t, fd.loops[i].step); - - if (!operand_equal_p (grid->group_sizes[i], t, 0)) - { - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, - GRID_MISSED_MSG_PREFIX "the distribute and " - "an internal loop do not agree on tile size\n"); - dump_printf_loc (MSG_NOTE, gfor, - "Loop construct does not seem to loop over " - "a tile size\n"); - } - return false; - } - } - return true; -} - -/* Facing a call to FNDECL in the body of a distribute construct, return true - if we can handle it or false if it precludes gridification. */ - -static bool -grid_call_permissible_in_distribute_p (tree fndecl) -{ - if (DECL_PURE_P (fndecl) || TREE_READONLY (fndecl)) - return true; - - const char *name = IDENTIFIER_POINTER (DECL_NAME (fndecl)); - if (strstr (name, "omp_") != name) - return false; - - if ((strcmp (name, "omp_get_thread_num") == 0) - || (strcmp (name, "omp_get_num_threads") == 0) - || (strcmp (name, "omp_get_num_teams") == 0) - || (strcmp (name, "omp_get_team_num") == 0) - || (strcmp (name, "omp_get_level") == 0) - || (strcmp (name, "omp_get_active_level") == 0) - || (strcmp (name, "omp_in_parallel") == 0)) - return true; - - return false; -} - -/* Facing a call satisfying grid_call_permissible_in_distribute_p in the body - of a distribute construct that is pointed at by GSI, modify it as necessary - for gridification. If the statement itself got removed, return true. */ - -static bool -grid_handle_call_in_distribute (gimple_stmt_iterator *gsi) -{ - gimple *stmt = gsi_stmt (*gsi); - tree fndecl = gimple_call_fndecl (stmt); - gcc_checking_assert (stmt); - if (DECL_PURE_P (fndecl) || TREE_READONLY (fndecl)) - return false; - - const char *name = IDENTIFIER_POINTER (DECL_NAME (fndecl)); - if ((strcmp (name, "omp_get_thread_num") == 0) - || (strcmp (name, "omp_get_level") == 0) - || (strcmp (name, "omp_get_active_level") == 0) - || (strcmp (name, "omp_in_parallel") == 0)) - { - tree lhs = gimple_call_lhs (stmt); - if (lhs) - { - gassign *assign - = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs))); - gsi_insert_before (gsi, assign, GSI_SAME_STMT); - } - gsi_remove (gsi, true); - return true; - } - - /* The rest of the omp functions can stay as they are, HSA back-end will - handle them correctly. */ - gcc_checking_assert ((strcmp (name, "omp_get_num_threads") == 0) - || (strcmp (name, "omp_get_num_teams") == 0) - || (strcmp (name, "omp_get_team_num") == 0)); - return false; -} - -/* Given a sequence of statements within a distribute omp construct or a - parallel construct, which in the original source does not form a compound - construct with a looping construct, return true if it does not prevent us - from turning it into a gridified HSA kernel. Otherwise return false. GRID - describes hitherto discovered properties of the loop that is evaluated for - possible gridification. IN_PARALLEL must be true if seq is within a - parallel construct and flase if it is only within a distribute - construct. */ - -static bool -grid_dist_follows_tiling_pattern (gimple_seq seq, grid_prop *grid, - bool in_parallel) -{ - gimple_stmt_iterator gsi; - for (gsi = gsi_start (seq); !gsi_end_p (gsi); gsi_next (&gsi)) - { - gimple *stmt = gsi_stmt (gsi); - - if (grid_safe_assignment_p (stmt, grid) - || gimple_code (stmt) == GIMPLE_GOTO - || gimple_code (stmt) == GIMPLE_LABEL - || gimple_code (stmt) == GIMPLE_COND) - continue; - else if (gbind *bind = dyn_cast <gbind *> (stmt)) - { - if (!grid_dist_follows_tiling_pattern (gimple_bind_body (bind), - grid, in_parallel)) - return false; - continue; - } - else if (gtry *try_stmt = dyn_cast <gtry *> (stmt)) - { - if (gimple_try_kind (try_stmt) == GIMPLE_TRY_CATCH) - { - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, - GRID_MISSED_MSG_PREFIX "the distribute " - "construct contains a try..catch region\n"); - dump_printf_loc (MSG_NOTE, try_stmt, - "This statement cannot be analyzed for " - "tiled gridification\n"); - } - return false; - } - if (!grid_dist_follows_tiling_pattern (gimple_try_eval (try_stmt), - grid, in_parallel)) - return false; - if (!grid_dist_follows_tiling_pattern (gimple_try_cleanup (try_stmt), - grid, in_parallel)) - return false; - continue; - } - else if (is_gimple_call (stmt)) - { - tree fndecl = gimple_call_fndecl (stmt); - if (fndecl && grid_call_permissible_in_distribute_p (fndecl)) - continue; - - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, - GRID_MISSED_MSG_PREFIX "the distribute " - "construct contains a call\n"); - dump_printf_loc (MSG_NOTE, stmt, - "This statement cannot be analyzed for " - "tiled gridification\n"); - } - return false; - } - else if (gomp_parallel *par = dyn_cast <gomp_parallel *> (stmt)) - { - if (in_parallel) - { - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, - GRID_MISSED_MSG_PREFIX "a parallel " - "construct contains another parallel " - "construct\n"); - dump_printf_loc (MSG_NOTE, stmt, - "This parallel construct is nested in " - "another one\n"); - } - return false; - } - if (!grid_parallel_clauses_gridifiable (par, grid->target_loc) - || !grid_dist_follows_tiling_pattern (gimple_omp_body (par), - grid, true)) - return false; - } - else if (gomp_for *gfor = dyn_cast <gomp_for *> (stmt)) - { - if (!in_parallel) - { - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, - GRID_MISSED_MSG_PREFIX "a loop " - "construct is not nested within a parallel " - "construct\n"); - dump_printf_loc (MSG_NOTE, stmt, - "This loop construct is not nested in " - "a parallel construct\n"); - } - return false; - } - if (!grid_gfor_follows_tiling_pattern (gfor, grid)) - return false; - } - else - { - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc, - GRID_MISSED_MSG_PREFIX "the distribute " - "construct contains a complex statement\n"); - dump_printf_loc (MSG_NOTE, stmt, - "This statement cannot be analyzed for " - "tiled gridification\n"); - } - return false; - } - } - return true; -} - -/* If TARGET follows a pattern that can be turned into a gridified HSA kernel, - return true, otherwise return false. In the case of success, also fill in - GRID with information describing the kernel grid. */ - -static bool -grid_target_follows_gridifiable_pattern (gomp_target *target, grid_prop *grid) -{ - if (gimple_omp_target_kind (target) != GF_OMP_TARGET_KIND_REGION) - return false; - - dump_user_location_t tloc = target; - grid->target_loc = tloc; - gimple *stmt - = grid_find_single_omp_among_assignments (gimple_omp_body (target), - grid, "target"); - if (!stmt) - return false; - gomp_teams *teams = dyn_cast <gomp_teams *> (stmt); - tree group_size = NULL; - if (!teams) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, - GRID_MISSED_MSG_PREFIX "it does not have a sole " - "teams construct in it.\n"); - return false; - } - - tree clauses = gimple_omp_teams_clauses (teams); - while (clauses) - { - switch (OMP_CLAUSE_CODE (clauses)) - { - case OMP_CLAUSE_NUM_TEAMS: - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, - GRID_MISSED_MSG_PREFIX "the teams construct " - "contains a num_teams clause\n "); - return false; - - case OMP_CLAUSE_REDUCTION: - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, - GRID_MISSED_MSG_PREFIX "a reduction " - "clause is present\n "); - return false; - - case OMP_CLAUSE_THREAD_LIMIT: - if (!integer_zerop (OMP_CLAUSE_OPERAND (clauses, 0))) - group_size = OMP_CLAUSE_OPERAND (clauses, 0); - break; - - default: - break; - } - clauses = OMP_CLAUSE_CHAIN (clauses); - } - - stmt = grid_find_single_omp_among_assignments (gimple_omp_body (teams), grid, - "teams"); - if (!stmt) - return false; - gomp_for *dist = dyn_cast <gomp_for *> (stmt); - if (!dist) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, - GRID_MISSED_MSG_PREFIX "the teams construct does not " - "have a single distribute construct in it.\n"); - return false; - } - - gcc_assert (gimple_omp_for_kind (dist) == GF_OMP_FOR_KIND_DISTRIBUTE); - - grid->collapse = gimple_omp_for_collapse (dist); - if (grid->collapse > 3) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, - GRID_MISSED_MSG_PREFIX "the distribute construct " - "contains collapse clause with parameter greater " - "than 3\n"); - return false; - } - - struct omp_for_data fd; - struct omp_for_data_loop *dist_loops - = (struct omp_for_data_loop *)alloca (grid->collapse - * sizeof (struct omp_for_data_loop)); - omp_extract_for_data (dist, &fd, dist_loops); - if (fd.chunk_size) - { - if (group_size && !operand_equal_p (group_size, fd.chunk_size, 0)) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, - GRID_MISSED_MSG_PREFIX "the teams " - "thread limit is different from distribute " - "schedule chunk\n"); - return false; - } - group_size = fd.chunk_size; - } - if (group_size && grid->collapse > 1) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, - GRID_MISSED_MSG_PREFIX "group size cannot be " - "set using thread_limit or schedule clauses " - "when also using a collapse clause greater than 1\n"); - return false; - } - - if (gimple_omp_for_combined_p (dist)) - { - grid->tiling = false; - grid->group_sizes[0] = group_size; - for (unsigned i = 1; i < grid->collapse; i++) - grid->group_sizes[i] = NULL; - return grid_dist_follows_simple_pattern (dist, grid); - } - else - { - grid->tiling = true; - if (group_size) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc, - GRID_MISSED_MSG_PREFIX "group size cannot be set " - "using thread_limit or schedule clauses when " - "distribute and loop constructs do not form " - "one combined construct\n"); - return false; - } - for (unsigned i = 0; i < grid->collapse; i++) - { - if (fd.loops[i].cond_code == GT_EXPR) - grid->group_sizes[i] = fold_build1 (NEGATE_EXPR, - TREE_TYPE (fd.loops[i].step), - fd.loops[i].step); - else - grid->group_sizes[i] = fd.loops[i].step; - } - return grid_dist_follows_tiling_pattern (gimple_omp_body (dist), grid, - false); - } -} - -/* Operand walker, used to remap pre-body declarations according to a hash map - provided in DATA. */ - -static tree -grid_remap_prebody_decls (tree *tp, int *walk_subtrees, void *data) -{ - tree t = *tp; - - if (DECL_P (t) || TYPE_P (t)) - *walk_subtrees = 0; - else - *walk_subtrees = 1; - - if (VAR_P (t)) - { - struct walk_stmt_info *wi = (struct walk_stmt_info *) data; - hash_map<tree, tree> *declmap = (hash_map<tree, tree> *) wi->info; - tree *repl = declmap->get (t); - if (repl) - *tp = *repl; - } - return NULL_TREE; -} - -/* Identifiers of segments into which a particular variable should be places - when gridifying. */ - -enum grid_var_segment {GRID_SEGMENT_PRIVATE, GRID_SEGMENT_GROUP, - GRID_SEGMENT_GLOBAL}; - -/* Mark VAR so that it is eventually placed into SEGMENT. Place an artificial - builtin call into SEQ that will make sure the variable is always considered - address taken. */ - -static void -grid_mark_variable_segment (tree var, enum grid_var_segment segment) -{ - /* Making a non-addressable variables would require that we re-gimplify all - their uses. Fortunately, we do not have to do this because if they are - not addressable, it means they are not used in atomic or parallel - statements and so relaxed GPU consistency rules mean we can just keep them - private. */ - if (!TREE_ADDRESSABLE (var)) - return; - - switch (segment) - { - case GRID_SEGMENT_GROUP: - DECL_ATTRIBUTES (var) = tree_cons (get_identifier ("hsa_group_segment"), - NULL, DECL_ATTRIBUTES (var)); - break; - case GRID_SEGMENT_GLOBAL: - DECL_ATTRIBUTES (var) = tree_cons (get_identifier ("hsa_global_segment"), - NULL, DECL_ATTRIBUTES (var)); - break; - default: - gcc_unreachable (); - } - - if (!TREE_STATIC (var)) - { - TREE_STATIC (var) = 1; - const char *prefix = IDENTIFIER_POINTER (DECL_NAME (var)); - SET_DECL_ASSEMBLER_NAME (var, create_tmp_var_name (prefix)); - varpool_node::finalize_decl (var); - } - -} - -/* Copy leading register-type assignments to local variables in SRC to just - before DST, Creating temporaries, adjusting mapping of operands in WI and - remapping operands as necessary. Add any new temporaries to TGT_BIND. - Return the first statement that does not conform to grid_safe_assignment_p - or NULL. If VAR_SEGMENT is not GRID_SEGMENT_PRIVATE, also mark all - variables in traversed bind statements so that they are put into the - appropriate segment. */ - -static gimple * -grid_copy_leading_local_assignments (gimple_seq src, gimple_stmt_iterator *dst, - gbind *tgt_bind, - enum grid_var_segment var_segment, - struct walk_stmt_info *wi) -{ - hash_map<tree, tree> *declmap = (hash_map<tree, tree> *) wi->info; - gimple_stmt_iterator gsi; - for (gsi = gsi_start (src); !gsi_end_p (gsi); gsi_next (&gsi)) - { - gimple *stmt = gsi_stmt (gsi); - if (gbind *bind = dyn_cast <gbind *> (stmt)) - { - gimple *r = grid_copy_leading_local_assignments - (gimple_bind_body (bind), dst, tgt_bind, var_segment, wi); - - if (var_segment != GRID_SEGMENT_PRIVATE) - for (tree var = gimple_bind_vars (bind); - var; - var = DECL_CHAIN (var)) - grid_mark_variable_segment (var, var_segment); - if (r) - return r; - else - continue; - } - if (!grid_safe_assignment_p (stmt, NULL)) - return stmt; - tree lhs = gimple_assign_lhs (as_a <gassign *> (stmt)); - tree repl = copy_var_decl (lhs, create_tmp_var_name (NULL), - TREE_TYPE (lhs)); - DECL_CONTEXT (repl) = current_function_decl; - gimple_bind_append_vars (tgt_bind, repl); - - declmap->put (lhs, repl); - gassign *copy = as_a <gassign *> (gimple_copy (stmt)); - walk_gimple_op (copy, grid_remap_prebody_decls, wi); - gsi_insert_before (dst, copy, GSI_SAME_STMT); - } - return NULL; -} - -/* Statement walker function to make adjustments to statements within the - gridifed kernel copy. */ - -static tree -grid_process_grid_body (gimple_stmt_iterator *gsi, bool *handled_ops_p, - struct walk_stmt_info *) -{ - *handled_ops_p = false; - gimple *stmt = gsi_stmt (*gsi); - if (gimple_code (stmt) == GIMPLE_OMP_FOR - && gimple_omp_for_kind (stmt) == GF_OMP_FOR_KIND_SIMD) - { - gomp_for *loop = as_a <gomp_for *> (stmt); - tree clauses = gimple_omp_for_clauses (loop); - tree cl = omp_find_clause (clauses, OMP_CLAUSE_SAFELEN); - if (cl) - OMP_CLAUSE_SAFELEN_EXPR (cl) = integer_one_node; - else - { - tree c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE_SAFELEN); - OMP_CLAUSE_SAFELEN_EXPR (c) = integer_one_node; - OMP_CLAUSE_CHAIN (c) = clauses; - gimple_omp_for_set_clauses (loop, c); - } - } - return NULL_TREE; -} - -/* Given a PARLOOP that is a normal for looping construct but also a part of a - combined construct with a simd loop, eliminate the simd loop. */ - -static void -grid_eliminate_combined_simd_part (gomp_for *parloop) -{ - struct walk_stmt_info wi; - - memset (&wi, 0, sizeof (wi)); - wi.val_only = true; - enum gf_mask msk = GF_OMP_FOR_KIND_SIMD; - wi.info = (void *) &msk; - walk_gimple_seq (gimple_omp_body (parloop), omp_find_combined_for, NULL, &wi); - gimple *stmt = (gimple *) wi.info; - /* We expect that the SIMD id the only statement in the parallel loop. */ - gcc_assert (stmt - && gimple_code (stmt) == GIMPLE_OMP_FOR - && (gimple_omp_for_kind (stmt) == GF_OMP_FOR_KIND_SIMD) - && gimple_omp_for_combined_into_p (stmt) - && !gimple_omp_for_combined_p (stmt)); - gomp_for *simd = as_a <gomp_for *> (stmt); - - /* Copy over the iteration properties because the body refers to the index in - the bottmom-most loop. */ - unsigned i, collapse = gimple_omp_for_collapse (parloop); - gcc_checking_assert (collapse == gimple_omp_for_collapse (simd)); - for (i = 0; i < collapse; i++) - { - gimple_omp_for_set_index (parloop, i, gimple_omp_for_index (simd, i)); - gimple_omp_for_set_initial (parloop, i, gimple_omp_for_initial (simd, i)); - gimple_omp_for_set_final (parloop, i, gimple_omp_for_final (simd, i)); - gimple_omp_for_set_incr (parloop, i, gimple_omp_for_incr (simd, i)); - } - - tree *tgt= gimple_omp_for_clauses_ptr (parloop); - while (*tgt) - tgt = &OMP_CLAUSE_CHAIN (*tgt); - - /* Copy over all clauses, except for linear clauses, which are turned into - private clauses, and all other simd-specific clauses, which are - ignored. */ - tree *pc = gimple_omp_for_clauses_ptr (simd); - while (*pc) - { - tree c = *pc; - switch (OMP_CLAUSE_CODE (c)) - { - case OMP_CLAUSE_LINEAR: - { - tree priv = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE_PRIVATE); - OMP_CLAUSE_DECL (priv) = OMP_CLAUSE_DECL (c); - OMP_CLAUSE_CHAIN (priv) = NULL; - *tgt = priv; - tgt = &OMP_CLAUSE_CHAIN (priv); - pc = &OMP_CLAUSE_CHAIN (c); - break; - } - - case OMP_CLAUSE_SAFELEN: - case OMP_CLAUSE_SIMDLEN: - case OMP_CLAUSE_ALIGNED: - pc = &OMP_CLAUSE_CHAIN (c); - break; - - default: - *pc = OMP_CLAUSE_CHAIN (c); - OMP_CLAUSE_CHAIN (c) = NULL; - *tgt = c; - tgt = &OMP_CLAUSE_CHAIN (c); - break; - } - } - - /* Finally, throw away the simd and mark the parallel loop as not - combined. */ - gimple_omp_set_body (parloop, gimple_omp_body (simd)); - gimple_omp_for_set_combined_p (parloop, false); -} - -/* Statement walker function marking all parallels as grid_phony and loops as - grid ones representing threads of a particular thread group. */ - -static tree -grid_mark_tiling_loops (gimple_stmt_iterator *gsi, bool *handled_ops_p, - struct walk_stmt_info *wi_in) -{ - *handled_ops_p = false; - if (gomp_for *loop = dyn_cast <gomp_for *> (gsi_stmt (*gsi))) - { - *handled_ops_p = true; - gimple_omp_for_set_kind (loop, GF_OMP_FOR_KIND_GRID_LOOP); - gimple_omp_for_set_grid_intra_group (loop, true); - if (gimple_omp_for_combined_p (loop)) - grid_eliminate_combined_simd_part (loop); - - struct walk_stmt_info body_wi; - memset (&body_wi, 0, sizeof (body_wi)); - walk_gimple_seq_mod (gimple_omp_body_ptr (loop), - grid_process_grid_body, NULL, &body_wi); - - gbind *bind = (gbind *) wi_in->info; - tree c; - for (c = gimple_omp_for_clauses (loop); c; c = OMP_CLAUSE_CHAIN (c)) - if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE) - { - push_gimplify_context (); - tree ov = OMP_CLAUSE_DECL (c); - tree gv = copy_var_decl (ov, create_tmp_var_name (NULL), - TREE_TYPE (ov)); - - grid_mark_variable_segment (gv, GRID_SEGMENT_GROUP); - DECL_CONTEXT (gv) = current_function_decl; - gimple_bind_append_vars (bind, gv); - tree x = lang_hooks.decls.omp_clause_assign_op (c, gv, ov); - gimplify_and_add (x, &OMP_CLAUSE_LASTPRIVATE_GIMPLE_SEQ (c)); - x = lang_hooks.decls.omp_clause_copy_ctor (c, ov, gv); - gimple_seq l = NULL; - gimplify_and_add (x, &l); - gsi_insert_seq_after (gsi, l, GSI_SAME_STMT); - pop_gimplify_context (bind); - } - } - return NULL_TREE; -} - -/* Statement walker function marking all parallels as grid_phony and loops as - grid ones representing threads of a particular thread group. */ - -static tree -grid_mark_tiling_parallels_and_loops (gimple_stmt_iterator *gsi, - bool *handled_ops_p, - struct walk_stmt_info *wi_in) -{ - *handled_ops_p = false; - wi_in->removed_stmt = false; - gimple *stmt = gsi_stmt (*gsi); - if (gbind *bind = dyn_cast <gbind *> (stmt)) - { - for (tree var = gimple_bind_vars (bind); var; var = DECL_CHAIN (var)) - grid_mark_variable_segment (var, GRID_SEGMENT_GROUP); - } - else if (gomp_parallel *parallel = dyn_cast <gomp_parallel *> (stmt)) - { - *handled_ops_p = true; - gimple_omp_parallel_set_grid_phony (parallel, true); - - gbind *new_bind = gimple_build_bind (NULL, NULL, make_node (BLOCK)); - gimple_bind_set_body (new_bind, gimple_omp_body (parallel)); - gimple_seq s = NULL; - gimple_seq_add_stmt (&s, new_bind); - gimple_omp_set_body (parallel, s); - - struct walk_stmt_info wi_par; - memset (&wi_par, 0, sizeof (wi_par)); - wi_par.info = new_bind; - walk_gimple_seq_mod (gimple_bind_body_ptr (new_bind), - grid_mark_tiling_loops, NULL, &wi_par); - } - else if (is_a <gcall *> (stmt)) - wi_in->removed_stmt = grid_handle_call_in_distribute (gsi); - return NULL_TREE; -} - -/* Given freshly copied top level kernel SEQ, identify the individual OMP - components, mark them as part of kernel, copy assignment leading to them - just before DST, remapping them using WI and adding new temporaries to - TGT_BIND, and return the loop that will be used for kernel dispatch. */ - -static gomp_for * -grid_process_kernel_body_copy (grid_prop *grid, gimple_seq seq, - gimple_stmt_iterator *dst, - gbind *tgt_bind, struct walk_stmt_info *wi) -{ - gimple *stmt = grid_copy_leading_local_assignments (seq, dst, tgt_bind, - GRID_SEGMENT_GLOBAL, wi); - gomp_teams *teams = dyn_cast <gomp_teams *> (stmt); - gcc_assert (teams); - gimple_omp_teams_set_grid_phony (teams, true); - stmt = grid_copy_leading_local_assignments (gimple_omp_body (teams), dst, - tgt_bind, GRID_SEGMENT_GLOBAL, - wi); - gcc_checking_assert (stmt); - gomp_for *dist = dyn_cast <gomp_for *> (stmt); - gcc_assert (dist); - gimple_seq prebody = gimple_omp_for_pre_body (dist); - if (prebody) - grid_copy_leading_local_assignments (prebody, dst, tgt_bind, - GRID_SEGMENT_GROUP, wi); - - if (grid->tiling) - { - gimple_omp_for_set_kind (dist, GF_OMP_FOR_KIND_GRID_LOOP); - gimple_omp_for_set_grid_group_iter (dist, true); - - struct walk_stmt_info wi_tiled; - memset (&wi_tiled, 0, sizeof (wi_tiled)); - walk_gimple_seq_mod (gimple_omp_body_ptr (dist), - grid_mark_tiling_parallels_and_loops, NULL, - &wi_tiled); - return dist; - } - else - { - gimple_omp_for_set_grid_phony (dist, true); - stmt = grid_copy_leading_local_assignments (gimple_omp_body (dist), dst, - tgt_bind, - GRID_SEGMENT_PRIVATE, wi); - gcc_checking_assert (stmt); - gomp_parallel *parallel = as_a <gomp_parallel *> (stmt); - gimple_omp_parallel_set_grid_phony (parallel, true); - stmt = grid_copy_leading_local_assignments (gimple_omp_body (parallel), - dst, tgt_bind, - GRID_SEGMENT_PRIVATE, wi); - gomp_for *inner_loop = as_a <gomp_for *> (stmt); - gimple_omp_for_set_kind (inner_loop, GF_OMP_FOR_KIND_GRID_LOOP); - prebody = gimple_omp_for_pre_body (inner_loop); - if (prebody) - grid_copy_leading_local_assignments (prebody, dst, tgt_bind, - GRID_SEGMENT_PRIVATE, wi); - - if (gimple_omp_for_combined_p (inner_loop)) - grid_eliminate_combined_simd_part (inner_loop); - struct walk_stmt_info body_wi; - memset (&body_wi, 0, sizeof (body_wi)); - walk_gimple_seq_mod (gimple_omp_body_ptr (inner_loop), - grid_process_grid_body, NULL, &body_wi); - - return inner_loop; - } -} - -/* If TARGET points to a GOMP_TARGET which follows a gridifiable pattern, - create a GPU kernel for it. GSI must point to the same statement, TGT_BIND - is the bind into which temporaries inserted before TARGET should be - added. */ - -static void -grid_attempt_target_gridification (gomp_target *target, - gimple_stmt_iterator *gsi, - gbind *tgt_bind) -{ - /* removed group_size */ - grid_prop grid = {}; - if (!target || !grid_target_follows_gridifiable_pattern (target, &grid)) - return; - - location_t loc = gimple_location (target); - if (dump_enabled_p ()) - dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, target, - "Target construct will be turned into a gridified HSA " - "kernel\n"); - - /* Copy target body to a GPUKERNEL construct: */ - gimple_seq kernel_seq = copy_gimple_seq_and_replace_locals - (gimple_omp_body (target)); - - hash_map<tree, tree> *declmap = new hash_map<tree, tree>; - struct walk_stmt_info wi; - memset (&wi, 0, sizeof (struct walk_stmt_info)); - wi.info = declmap; - - /* Copy assignments in between OMP statements before target, mark OMP - statements within copy appropriately. */ - gomp_for *inner_loop = grid_process_kernel_body_copy (&grid, kernel_seq, gsi, - tgt_bind, &wi); - - gbind *old_bind - = as_a <gbind *> (gimple_seq_first (gimple_omp_body (target))); - gbind *new_bind = as_a <gbind *> (gimple_seq_first (kernel_seq)); - tree new_block = gimple_bind_block (new_bind); - tree enc_block = BLOCK_SUPERCONTEXT (gimple_bind_block (old_bind)); - BLOCK_CHAIN (new_block) = BLOCK_SUBBLOCKS (enc_block); - BLOCK_SUBBLOCKS (enc_block) = new_block; - BLOCK_SUPERCONTEXT (new_block) = enc_block; - gimple *gpukernel = gimple_build_omp_grid_body (kernel_seq); - gimple_seq_add_stmt - (gimple_bind_body_ptr (as_a <gbind *> (gimple_omp_body (target))), - gpukernel); - - for (size_t i = 0; i < grid.collapse; i++) - walk_tree (&grid.group_sizes[i], grid_remap_prebody_decls, &wi, NULL); - push_gimplify_context (); - for (size_t i = 0; i < grid.collapse; i++) - { - tree index_var = gimple_omp_for_index (inner_loop, i); - tree itype, type = TREE_TYPE (index_var); - if (POINTER_TYPE_P (type)) - itype = signed_type_for (type); - else - itype = type; - - enum tree_code cond_code = gimple_omp_for_cond (inner_loop, i); - tree n1 = unshare_expr (gimple_omp_for_initial (inner_loop, i)); - walk_tree (&n1, grid_remap_prebody_decls, &wi, NULL); - tree n2 = unshare_expr (gimple_omp_for_final (inner_loop, i)); - walk_tree (&n2, grid_remap_prebody_decls, &wi, NULL); - tree step - = omp_get_for_step_from_incr (loc, gimple_omp_for_incr (inner_loop, i)); - omp_adjust_for_condition (loc, &cond_code, &n2, index_var, step); - n1 = fold_convert (itype, n1); - n2 = fold_convert (itype, n2); - - tree cond = fold_build2 (cond_code, boolean_type_node, n1, n2); - - tree t = build_int_cst (itype, (cond_code == LT_EXPR ? -1 : 1)); - t = fold_build2 (PLUS_EXPR, itype, step, t); - t = fold_build2 (PLUS_EXPR, itype, t, n2); - t = fold_build2 (MINUS_EXPR, itype, t, n1); - if (TYPE_UNSIGNED (itype) && cond_code == GT_EXPR) - t = fold_build2 (TRUNC_DIV_EXPR, itype, - fold_build1 (NEGATE_EXPR, itype, t), - fold_build1 (NEGATE_EXPR, itype, step)); - else - t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step); - t = fold_build3 (COND_EXPR, itype, cond, t, build_zero_cst (itype)); - if (grid.tiling) - { - if (cond_code == GT_EXPR) - step = fold_build1 (NEGATE_EXPR, itype, step); - t = fold_build2 (MULT_EXPR, itype, t, step); - } - - tree gs = fold_convert (uint32_type_node, t); - gimple_seq tmpseq = NULL; - gimplify_expr (&gs, &tmpseq, NULL, is_gimple_val, fb_rvalue); - if (!gimple_seq_empty_p (tmpseq)) - gsi_insert_seq_before (gsi, tmpseq, GSI_SAME_STMT); - - tree ws; - if (grid.group_sizes[i]) - { - ws = fold_convert (uint32_type_node, grid.group_sizes[i]); - tmpseq = NULL; - gimplify_expr (&ws, &tmpseq, NULL, is_gimple_val, fb_rvalue); - if (!gimple_seq_empty_p (tmpseq)) - gsi_insert_seq_before (gsi, tmpseq, GSI_SAME_STMT); - } - else - ws = build_zero_cst (uint32_type_node); - - tree c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE__GRIDDIM_); - OMP_CLAUSE__GRIDDIM__DIMENSION (c) = i; - OMP_CLAUSE__GRIDDIM__SIZE (c) = gs; - OMP_CLAUSE__GRIDDIM__GROUP (c) = ws; - OMP_CLAUSE_CHAIN (c) = gimple_omp_target_clauses (target); - gimple_omp_target_set_clauses (target, c); - } - pop_gimplify_context (tgt_bind); - delete declmap; - return; -} - -/* Walker function doing all the work for create_target_kernels. */ - -static tree -grid_gridify_all_targets_stmt (gimple_stmt_iterator *gsi, - bool *handled_ops_p, - struct walk_stmt_info *incoming) -{ - *handled_ops_p = false; - - gimple *stmt = gsi_stmt (*gsi); - gomp_target *target = dyn_cast <gomp_target *> (stmt); - if (target) - { - gbind *tgt_bind = (gbind *) incoming->info; - gcc_checking_assert (tgt_bind); - grid_attempt_target_gridification (target, gsi, tgt_bind); - return NULL_TREE; - } - gbind *bind = dyn_cast <gbind *> (stmt); - if (bind) - { - *handled_ops_p = true; - struct walk_stmt_info wi; - memset (&wi, 0, sizeof (wi)); - wi.info = bind; - walk_gimple_seq_mod (gimple_bind_body_ptr (bind), - grid_gridify_all_targets_stmt, NULL, &wi); - } - return NULL_TREE; -} - -/* Attempt to gridify all target constructs in BODY_P. All such targets will - have their bodies duplicated, with the new copy being put into a - gimple_omp_grid_body statement. All kernel-related construct within the - grid_body will be marked with phony flags or kernel kinds. Moreover, some - re-structuring is often needed, such as copying pre-bodies before the target - construct so that kernel grid sizes can be computed. */ - -void -omp_grid_gridify_all_targets (gimple_seq *body_p) -{ - struct walk_stmt_info wi; - memset (&wi, 0, sizeof (wi)); - walk_gimple_seq_mod (body_p, grid_gridify_all_targets_stmt, NULL, &wi); -} diff --git a/gcc/omp-grid.h b/gcc/omp-grid.h deleted file mode 100644 index 38679f1..0000000 --- a/gcc/omp-grid.h +++ /dev/null @@ -1,27 +0,0 @@ -/* Lowering and expansion of OpenMP directives for HSA GPU agents. - - Copyright (C) 2013-2020 Free Software Foundation, Inc. - -This file is part of GCC. - -GCC is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free -Software Foundation; either version 3, or (at your option) any later -version. - -GCC is distributed in the hope that it will be useful, but WITHOUT ANY -WARRANTY; without even the implied warranty of MERCHANTABILITY or -FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. - -You should have received a copy of the GNU General Public License -along with GCC; see the file COPYING3. If not see -<http://www.gnu.org/licenses/>. */ - -#ifndef GCC_OMP_GRID_H -#define GCC_OMP_GRID_H - -extern tree omp_grid_lastprivate_predicate (struct omp_for_data *fd); -extern void omp_grid_gridify_all_targets (gimple_seq *body_p); - -#endif /* GCC_OMP_GRID_H */ diff --git a/gcc/omp-low.c b/gcc/omp-low.c index da6c275..52c2cae 100644 --- a/gcc/omp-low.c +++ b/gcc/omp-low.c @@ -50,7 +50,6 @@ along with GCC; see the file COPYING3. If not see #include "splay-tree.h" #include "omp-general.h" #include "omp-low.h" -#include "omp-grid.h" #include "gimple-low.h" #include "alloc-pool.h" #include "symbol-summary.h" @@ -58,7 +57,6 @@ along with GCC; see the file COPYING3. If not see #include "context.h" #include "gomp-constants.h" #include "gimple-pretty-print.h" -#include "hsa-common.h" #include "stringpool.h" #include "attribs.h" @@ -681,15 +679,7 @@ build_outer_var_ref (tree var, omp_context *ctx, } } else if (outer) - { - if (gimple_code (outer->stmt) == GIMPLE_OMP_GRID_BODY) - { - outer = outer->outer; - gcc_assert (outer - && gimple_code (outer->stmt) != GIMPLE_OMP_GRID_BODY); - } - x = lookup_decl (var, outer); - } + x = lookup_decl (var, outer); else if (omp_is_reference (var)) /* This can happen with orphaned constructs. If var is reference, it is possible it is shared and as such valid. */ @@ -1460,14 +1450,6 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) } break; - case OMP_CLAUSE__GRIDDIM_: - if (ctx->outer) - { - scan_omp_op (&OMP_CLAUSE__GRIDDIM__SIZE (c), ctx->outer); - scan_omp_op (&OMP_CLAUSE__GRIDDIM__GROUP (c), ctx->outer); - } - break; - case OMP_CLAUSE_ORDER: ctx->order_concurrent = true; break; @@ -1698,7 +1680,6 @@ scan_sharing_clauses (tree clauses, omp_context *ctx) case OMP_CLAUSE_AUTO: case OMP_CLAUSE_SEQ: case OMP_CLAUSE_TILE: - case OMP_CLAUSE__GRIDDIM_: case OMP_CLAUSE__SIMT_: case OMP_CLAUSE_IF_PRESENT: case OMP_CLAUSE_FINALIZE: @@ -2021,11 +2002,8 @@ scan_omp_parallel (gimple_stmt_iterator *gsi, omp_context *outer_ctx) DECL_NAMELESS (name) = 1; TYPE_NAME (ctx->record_type) = name; TYPE_ARTIFICIAL (ctx->record_type) = 1; - if (!gimple_omp_parallel_grid_phony (stmt)) - { - create_omp_child_function (ctx, false); - gimple_omp_parallel_set_child_fn (stmt, ctx->cb.dst_fn); - } + create_omp_child_function (ctx, false); + gimple_omp_parallel_set_child_fn (stmt, ctx->cb.dst_fn); scan_sharing_clauses (gimple_omp_parallel_clauses (stmt), ctx); scan_omp (gimple_omp_body_ptr (stmt), ctx); @@ -2801,11 +2779,6 @@ check_omp_nesting_restrictions (gimple *stmt, omp_context *ctx) { tree c; - if (ctx && gimple_code (ctx->stmt) == GIMPLE_OMP_GRID_BODY) - /* GRID_BODY is an artificial construct, nesting rules will be checked in - the original copy of its contents. */ - return true; - /* No nesting of non-OpenACC STMT (that is, an OpenMP one, or a GOMP builtin) inside an OpenACC CTX. */ if (!(is_gimple_omp (stmt) @@ -2891,7 +2864,6 @@ check_omp_nesting_restrictions (gimple *stmt, omp_context *ctx) { if ((gimple_code (stmt) != GIMPLE_OMP_FOR || (gimple_omp_for_kind (stmt) != GF_OMP_FOR_KIND_DISTRIBUTE - && gimple_omp_for_kind (stmt) != GF_OMP_FOR_KIND_GRID_LOOP && omp_find_clause (gimple_omp_for_clauses (stmt), OMP_CLAUSE_BIND) == NULL_TREE)) && gimple_code (stmt) != GIMPLE_OMP_PARALLEL) @@ -3783,7 +3755,6 @@ scan_omp_1_stmt (gimple_stmt_iterator *gsi, bool *handled_ops_p, case GIMPLE_OMP_MASTER: case GIMPLE_OMP_ORDERED: case GIMPLE_OMP_CRITICAL: - case GIMPLE_OMP_GRID_BODY: ctx = new_omp_context (stmt, ctx); scan_omp (gimple_omp_body_ptr (stmt), ctx); break; @@ -9518,65 +9489,59 @@ lower_omp_for_lastprivate (struct omp_for_data *fd, gimple_seq *body_p, cond_code = EQ_EXPR; } - if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP - || gimple_omp_for_grid_phony (fd->for_stmt)) - cond = omp_grid_lastprivate_predicate (fd); - else + tree n2 = fd->loop.n2; + if (fd->collapse > 1 + && TREE_CODE (n2) != INTEGER_CST + && gimple_omp_for_combined_into_p (fd->for_stmt)) { - tree n2 = fd->loop.n2; - if (fd->collapse > 1 - && TREE_CODE (n2) != INTEGER_CST - && gimple_omp_for_combined_into_p (fd->for_stmt)) + struct omp_context *taskreg_ctx = NULL; + if (gimple_code (ctx->outer->stmt) == GIMPLE_OMP_FOR) { - struct omp_context *taskreg_ctx = NULL; - if (gimple_code (ctx->outer->stmt) == GIMPLE_OMP_FOR) + gomp_for *gfor = as_a <gomp_for *> (ctx->outer->stmt); + if (gimple_omp_for_kind (gfor) == GF_OMP_FOR_KIND_FOR + || gimple_omp_for_kind (gfor) == GF_OMP_FOR_KIND_DISTRIBUTE) { - gomp_for *gfor = as_a <gomp_for *> (ctx->outer->stmt); - if (gimple_omp_for_kind (gfor) == GF_OMP_FOR_KIND_FOR - || gimple_omp_for_kind (gfor) == GF_OMP_FOR_KIND_DISTRIBUTE) + if (gimple_omp_for_combined_into_p (gfor)) { - if (gimple_omp_for_combined_into_p (gfor)) - { - gcc_assert (ctx->outer->outer - && is_parallel_ctx (ctx->outer->outer)); - taskreg_ctx = ctx->outer->outer; - } - else - { - struct omp_for_data outer_fd; - omp_extract_for_data (gfor, &outer_fd, NULL); - n2 = fold_convert (TREE_TYPE (n2), outer_fd.loop.n2); - } + gcc_assert (ctx->outer->outer + && is_parallel_ctx (ctx->outer->outer)); + taskreg_ctx = ctx->outer->outer; } - else if (gimple_omp_for_kind (gfor) == GF_OMP_FOR_KIND_TASKLOOP) - taskreg_ctx = ctx->outer->outer; - } - else if (is_taskreg_ctx (ctx->outer)) - taskreg_ctx = ctx->outer; - if (taskreg_ctx) - { - int i; - tree taskreg_clauses - = gimple_omp_taskreg_clauses (taskreg_ctx->stmt); - tree innerc = omp_find_clause (taskreg_clauses, - OMP_CLAUSE__LOOPTEMP_); - gcc_assert (innerc); - for (i = 0; i < fd->collapse; i++) + else { - innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), - OMP_CLAUSE__LOOPTEMP_); - gcc_assert (innerc); + struct omp_for_data outer_fd; + omp_extract_for_data (gfor, &outer_fd, NULL); + n2 = fold_convert (TREE_TYPE (n2), outer_fd.loop.n2); } + } + else if (gimple_omp_for_kind (gfor) == GF_OMP_FOR_KIND_TASKLOOP) + taskreg_ctx = ctx->outer->outer; + } + else if (is_taskreg_ctx (ctx->outer)) + taskreg_ctx = ctx->outer; + if (taskreg_ctx) + { + int i; + tree taskreg_clauses + = gimple_omp_taskreg_clauses (taskreg_ctx->stmt); + tree innerc = omp_find_clause (taskreg_clauses, + OMP_CLAUSE__LOOPTEMP_); + gcc_assert (innerc); + for (i = 0; i < fd->collapse; i++) + { innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_); - if (innerc) - n2 = fold_convert (TREE_TYPE (n2), - lookup_decl (OMP_CLAUSE_DECL (innerc), - taskreg_ctx)); + gcc_assert (innerc); } + innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), + OMP_CLAUSE__LOOPTEMP_); + if (innerc) + n2 = fold_convert (TREE_TYPE (n2), + lookup_decl (OMP_CLAUSE_DECL (innerc), + taskreg_ctx)); } - cond = build2 (cond_code, boolean_type_node, fd->loop.v, n2); } + cond = build2 (cond_code, boolean_type_node, fd->loop.v, n2); clauses = gimple_omp_for_clauses (fd->for_stmt); stmts = NULL; @@ -10638,24 +10603,17 @@ lower_omp_for (gimple_stmt_iterator *gsi_p, omp_context *ctx) ctx); } - bool phony_loop = (gimple_omp_for_kind (stmt) != GF_OMP_FOR_KIND_GRID_LOOP - && gimple_omp_for_grid_phony (stmt)); if ((ctx->scan_inclusive || ctx->scan_exclusive) && gimple_omp_for_kind (stmt) == GF_OMP_FOR_KIND_FOR) - { - gcc_assert (!phony_loop); - lower_omp_for_scan (&body, &dlist, stmt, &fd, ctx); - } + lower_omp_for_scan (&body, &dlist, stmt, &fd, ctx); else { - if (!phony_loop) - gimple_seq_add_stmt (&body, stmt); + gimple_seq_add_stmt (&body, stmt); gimple_seq_add_seq (&body, gimple_omp_body (stmt)); } - if (!phony_loop) - gimple_seq_add_stmt (&body, gimple_build_omp_continue (fd.loop.v, - fd.loop.v)); + gimple_seq_add_stmt (&body, gimple_build_omp_continue (fd.loop.v, + fd.loop.v)); /* After the loop, add exit clauses. */ lower_reduction_clauses (gimple_omp_for_clauses (stmt), &body, &clist, ctx); @@ -10684,19 +10642,16 @@ lower_omp_for (gimple_stmt_iterator *gsi_p, omp_context *ctx) body = maybe_catch_exception (body); - if (!phony_loop) - { - /* Region exit marker goes at the end of the loop body. */ - gimple *g = gimple_build_omp_return (fd.have_nowait); - gimple_seq_add_stmt (&body, g); + /* Region exit marker goes at the end of the loop body. */ + gimple *g = gimple_build_omp_return (fd.have_nowait); + gimple_seq_add_stmt (&body, g); - gimple_seq_add_seq (&body, tred_dlist); + gimple_seq_add_seq (&body, tred_dlist); - maybe_add_implicit_barrier_cancel (ctx, g, &body); + maybe_add_implicit_barrier_cancel (ctx, g, &body); - if (rclauses) - OMP_CLAUSE_DECL (rclauses) = rtmp; - } + if (rclauses) + OMP_CLAUSE_DECL (rclauses) = rtmp; /* Add OpenACC joining and reduction markers just after the loop. */ if (oacc_tail) @@ -11279,14 +11234,6 @@ lower_omp_taskreg (gimple_stmt_iterator *gsi_p, omp_context *ctx) gimple_seq par_olist = NULL; gimple_seq par_ilist = NULL; gimple_seq par_rlist = NULL; - bool phony_construct = gimple_code (stmt) == GIMPLE_OMP_PARALLEL - && gimple_omp_parallel_grid_phony (as_a <gomp_parallel *> (stmt)); - if (phony_construct && ctx->record_type) - { - gcc_checking_assert (!ctx->receiver_decl); - ctx->receiver_decl = create_tmp_var - (build_reference_type (ctx->record_type), ".omp_rec"); - } lower_rec_input_clauses (clauses, &par_ilist, &par_olist, ctx, NULL); lower_omp (&par_body, ctx); if (gimple_code (stmt) == GIMPLE_OMP_PARALLEL) @@ -11345,11 +11292,8 @@ lower_omp_taskreg (gimple_stmt_iterator *gsi_p, omp_context *ctx) gimple_seq_add_stmt (&new_body, gimple_build_omp_continue (integer_zero_node, integer_zero_node)); - if (!phony_construct) - { - gimple_seq_add_stmt (&new_body, gimple_build_omp_return (false)); - gimple_omp_set_body (stmt, new_body); - } + gimple_seq_add_stmt (&new_body, gimple_build_omp_return (false)); + gimple_omp_set_body (stmt, new_body); if (dep_bind && gimple_bind_block (par_bind) == NULL_TREE) bind = gimple_build_bind (NULL, NULL, make_node (BLOCK)); @@ -11357,10 +11301,7 @@ lower_omp_taskreg (gimple_stmt_iterator *gsi_p, omp_context *ctx) bind = gimple_build_bind (NULL, NULL, gimple_bind_block (par_bind)); gsi_replace (gsi_p, dep_bind ? dep_bind : bind, true); gimple_bind_add_seq (bind, ilist); - if (!phony_construct) - gimple_bind_add_stmt (bind, stmt); - else - gimple_bind_add_seq (bind, new_body); + gimple_bind_add_stmt (bind, stmt); gimple_bind_add_seq (bind, olist); pop_gimplify_context (NULL); @@ -12641,22 +12582,19 @@ lower_omp_teams (gimple_stmt_iterator *gsi_p, omp_context *ctx) lower_omp (gimple_omp_body_ptr (teams_stmt), ctx); lower_reduction_clauses (gimple_omp_teams_clauses (teams_stmt), &olist, NULL, ctx); - if (!gimple_omp_teams_grid_phony (teams_stmt)) - { - gimple_seq_add_stmt (&bind_body, teams_stmt); - location_t loc = gimple_location (teams_stmt); - tree decl = builtin_decl_explicit (BUILT_IN_GOMP_TEAMS); - gimple *call = gimple_build_call (decl, 2, num_teams, thread_limit); - gimple_set_location (call, loc); - gimple_seq_add_stmt (&bind_body, call); - } + gimple_seq_add_stmt (&bind_body, teams_stmt); + + location_t loc = gimple_location (teams_stmt); + tree decl = builtin_decl_explicit (BUILT_IN_GOMP_TEAMS); + gimple *call = gimple_build_call (decl, 2, num_teams, thread_limit); + gimple_set_location (call, loc); + gimple_seq_add_stmt (&bind_body, call); gimple_seq_add_seq (&bind_body, gimple_omp_body (teams_stmt)); gimple_omp_set_body (teams_stmt, NULL); gimple_seq_add_seq (&bind_body, olist); gimple_seq_add_seq (&bind_body, dlist); - if (!gimple_omp_teams_grid_phony (teams_stmt)) - gimple_seq_add_stmt (&bind_body, gimple_build_omp_return (true)); + gimple_seq_add_stmt (&bind_body, gimple_build_omp_return (true)); gimple_bind_set_body (bind, bind_body); pop_gimplify_context (bind); @@ -12667,18 +12605,6 @@ lower_omp_teams (gimple_stmt_iterator *gsi_p, omp_context *ctx) TREE_USED (block) = 1; } -/* Expand code within an artificial GIMPLE_OMP_GRID_BODY OMP construct. */ - -static void -lower_omp_grid_body (gimple_stmt_iterator *gsi_p, omp_context *ctx) -{ - gimple *stmt = gsi_stmt (*gsi_p); - lower_omp (gimple_omp_body_ptr (stmt), ctx); - gimple_seq_add_stmt (gimple_omp_body_ptr (stmt), - gimple_build_omp_return (false)); -} - - /* Callback for lower_omp_1. Return non-NULL if *tp needs to be regimplified. If DATA is non-NULL, lower_omp_1 is outside of OMP context, but with task_shared_vars set. */ @@ -12897,11 +12823,6 @@ lower_omp_1 (gimple_stmt_iterator *gsi_p, omp_context *ctx) else lower_omp_teams (gsi_p, ctx); break; - case GIMPLE_OMP_GRID_BODY: - ctx = maybe_lookup_ctx (stmt); - gcc_assert (ctx); - lower_omp_grid_body (gsi_p, ctx); - break; case GIMPLE_CALL: tree fndecl; call_stmt = as_a <gcall *> (stmt); @@ -13059,9 +12980,6 @@ execute_lower_omp (void) body = gimple_body (current_function_decl); - if (hsa_gen_requested_p ()) - omp_grid_gridify_all_targets (&body); - scan_omp (&body, NULL); gcc_assert (taskreg_nesting_level == 0); FOR_EACH_VEC_ELT (taskreg_contexts, i, ctx) @@ -2484,35 +2484,8 @@ common_handle_option (struct gcc_options *opts, break; case OPT_foffload_: - { - const char *p = arg; - opts->x_flag_disable_hsa = true; - while (*p != 0) - { - const char *comma = strchr (p, ','); - - if ((strncmp (p, "disable", 7) == 0) - && (p[7] == ',' || p[7] == '\0')) - { - opts->x_flag_disable_hsa = true; - break; - } - - if ((strncmp (p, "hsa", 3) == 0) - && (p[3] == ',' || p[3] == '\0')) - { -#ifdef ENABLE_HSA - opts->x_flag_disable_hsa = false; -#else - sorry ("HSA has not been enabled during configuration"); -#endif - } - if (!comma) - break; - p = comma + 1; - } - break; - } + /* Deferred. */ + break; #ifndef ACCEL_COMPILER case OPT_foffload_abi_: diff --git a/gcc/params.opt b/gcc/params.opt index b36eee0..f39e5d1 100644 --- a/gcc/params.opt +++ b/gcc/params.opt @@ -170,10 +170,6 @@ The number of most executed permilles of the profiled execution of the entire pr Common Joined UInteger Var(param_hot_bb_frequency_fraction) Init(1000) Param The denominator n of fraction 1/n of the execution frequency of the entry block of a function that a basic block of this function needs to at least have in order to be considered hot. --param=hsa-gen-debug-stores= -Common Joined UInteger Var(param_hsa_gen_debug_stores) IntegerRange(0, 1) Param -Level of hsa debug stores verbosity. - -param=inline-heuristics-hint-percent= Common Joined UInteger Var(param_inline_heuristics_hint_percent) Init(200) Optimization IntegerRange(100, 1000000) Param The scale (in percents) applied to inline-insns-single and auto limits when heuristics hints that inlining is very profitable. diff --git a/gcc/passes.def b/gcc/passes.def index 2b1e09f..c0098d7 100644 --- a/gcc/passes.def +++ b/gcc/passes.def @@ -153,7 +153,6 @@ along with GCC; see the file COPYING3. If not see NEXT_PASS (pass_ipa_cp); NEXT_PASS (pass_ipa_sra); NEXT_PASS (pass_ipa_cdtor_merge); - NEXT_PASS (pass_ipa_hsa); NEXT_PASS (pass_ipa_fn_summary); NEXT_PASS (pass_ipa_inline); NEXT_PASS (pass_ipa_pure_const); @@ -402,7 +401,6 @@ along with GCC; see the file COPYING3. If not see NEXT_PASS (pass_gimple_isel); NEXT_PASS (pass_cleanup_cfg_post_optimizing); NEXT_PASS (pass_warn_function_noreturn); - NEXT_PASS (pass_gen_hsail); NEXT_PASS (pass_expand); diff --git a/gcc/testsuite/c-c++-common/gomp/gridify-1.c b/gcc/testsuite/c-c++-common/gomp/gridify-1.c deleted file mode 100644 index f9b03eb..0000000 --- a/gcc/testsuite/c-c++-common/gomp/gridify-1.c +++ /dev/null @@ -1,54 +0,0 @@ -/* { dg-do compile } */ -/* { dg-require-effective-target offload_hsa } */ -/* { dg-options "-fopenmp -fdump-tree-omplower-details" } */ - -void -foo1 (int n, int *a, int workgroup_size) -{ - int i; -#pragma omp target -#pragma omp teams thread_limit(workgroup_size) -#pragma omp distribute parallel for shared(a) firstprivate(n) private(i) - for (i = 0; i < n; i++) - a[i]++; -} - -void -foo2 (int j, int n, int *a) -{ - int i; -#pragma omp target teams -#pragma omp distribute parallel for shared(a) firstprivate(n) private(i) firstprivate(j) - for (i = j + 1; i < n; i++) - a[i] = i; -} - -void -foo3 (int j, int n, int *a) -{ - int i; -#pragma omp target teams -#pragma omp distribute parallel for shared(a) firstprivate(n) private(i) firstprivate(j) - for (i = j + 1; i < n; i += 3) - a[i] = i; -} - -void -foo4 (int j, int n, int *a) -{ -#pragma omp parallel - { - #pragma omp single - { - int i; -#pragma omp target -#pragma omp teams -#pragma omp distribute parallel for shared(a) firstprivate(n) private(i) firstprivate(j) - for (i = j + 1; i < n; i += 3) - a[i] = i; - } - } -} - - -/* { dg-final { scan-tree-dump-times "Target construct will be turned into a gridified HSA kernel" 4 "omplower" } } */ diff --git a/gcc/testsuite/c-c++-common/gomp/gridify-2.c b/gcc/testsuite/c-c++-common/gomp/gridify-2.c deleted file mode 100644 index 6b5cc9a..0000000 --- a/gcc/testsuite/c-c++-common/gomp/gridify-2.c +++ /dev/null @@ -1,66 +0,0 @@ -/* { dg-do compile } */ -/* { dg-require-effective-target offload_hsa } */ -/* { dg-options "-fopenmp -fdump-tree-omplower-details" } */ - -#define BLOCK_SIZE 16 - - -void tiled_sgemm_tt(const int M, const int N, const int K, const float alpha, const float*A, const int LDA, - const float*B, const int LDB, const float beta, float*C, const int LDC){ - -#pragma omp target teams map(to:A[M*K],B[K*N]) map(from:C[M*N]) -#pragma omp distribute collapse(2) - for (int C_row_start=0 ; C_row_start < M ; C_row_start+=BLOCK_SIZE) - for (int C_col_start=0 ; C_col_start < N ; C_col_start+=BLOCK_SIZE) - { -// Each team has a local copy of these mini matrices - float As[BLOCK_SIZE][BLOCK_SIZE]; - float Bs[BLOCK_SIZE][BLOCK_SIZE]; -#pragma omp parallel - { - int C_row, C_col; - float Cval = 0.0; - - for (int kblock = 0; kblock < K ; kblock += BLOCK_SIZE ) - { -#pragma omp for collapse(2) - for (int row=0 ; row < BLOCK_SIZE ; row++) - for (int col=0 ; col < BLOCK_SIZE ; col++) - { - C_row = C_row_start + row; - C_col = C_col_start + col; - if ((C_row < M) && (kblock + col < K)) - As[row][col] = A[(C_row*LDA)+ kblock + col]; - else - As[row][col] = 0; - if ((kblock + row < K) && C_col < N) - Bs[row][col] = B[((kblock+row)*LDB)+ C_col]; - else - Bs[row][col] = 0; - } - -#pragma omp for collapse(2) - for (int row=0 ; row < BLOCK_SIZE ; row++) - for (int col=0 ; col < BLOCK_SIZE ; col++) - { - for (int e = 0; e < BLOCK_SIZE; ++e) - Cval += As[row][e] * Bs[e][col]; - } - } /* End for kblock .. */ - - -#pragma omp for collapse(2) - for (int row=0 ; row < BLOCK_SIZE ; row++) - for (int col=0 ; col < BLOCK_SIZE ; col++) - { - C_row = C_row_start + row; - C_col = C_col_start + col; - if ((C_row < M) && (C_col < N)) - C[(C_row*LDC)+C_col] = alpha*Cval + beta*C[(C_row*LDC)+C_col]; - - } - } /* end parallel */ - } /* end target teams distribute */ -} - -/* { dg-final { scan-tree-dump "Target construct will be turned into a gridified HSA kernel" "omplower" } } */ diff --git a/gcc/testsuite/c-c++-common/gomp/gridify-3.c b/gcc/testsuite/c-c++-common/gomp/gridify-3.c deleted file mode 100644 index 8dbeaef..0000000 --- a/gcc/testsuite/c-c++-common/gomp/gridify-3.c +++ /dev/null @@ -1,68 +0,0 @@ -/* { dg-do compile } */ -/* { dg-require-effective-target offload_hsa } */ -/* { dg-options "-fopenmp -fdump-tree-omplower-details" } */ - -#define BLOCK_SIZE 16 - -void tiled_sgemm_tt(const int M, const int N, const int K, const float alpha, const float*A, const int LDA, - const float*B, const int LDB, const float beta, float*C, const int LDC) -{ -#pragma omp target teams map(to:A[M*K],B[K*N]) map(from:C[M*N]) -#pragma omp distribute collapse(2) - for (int C_row_start=0 ; C_row_start < M ; C_row_start+=BLOCK_SIZE) - for (int C_col_start=0 ; C_col_start < N ; C_col_start+=BLOCK_SIZE) - { - float As[BLOCK_SIZE][BLOCK_SIZE]; - float Bs[BLOCK_SIZE][BLOCK_SIZE]; - float Cs[BLOCK_SIZE][BLOCK_SIZE]; - int C_row, C_col; - -#pragma omp parallel for collapse(2) - for (int row=0 ; row < BLOCK_SIZE ; row++) - for (int col=0 ; col < BLOCK_SIZE ; col++) - { - Cs[row][col] = 0.0; - } - - - for (int kblock = 0; kblock < K ; kblock += BLOCK_SIZE ) - { -#pragma omp parallel for collapse(2) - for (int row=0 ; row < BLOCK_SIZE ; row++) - for (int col=0 ; col < BLOCK_SIZE ; col++) - { - C_row = C_row_start + row; - C_col = C_col_start + col; - if ((C_row < M) && (kblock + col < K)) - As[row][col] = A[(C_row*LDA)+ kblock + col]; - else - As[row][col] = 0; - if ((kblock + row < K) && C_col < N) - Bs[row][col] = B[((kblock+row)*LDB)+ C_col]; - else - Bs[row][col] = 0; - } - -#pragma omp parallel for collapse(2) - for (int row=0 ; row < BLOCK_SIZE ; row++) - for (int col=0 ; col < BLOCK_SIZE ; col++) - { - for (int e = 0; e < BLOCK_SIZE; ++e) - Cs[row][col] += As[row][e] * Bs[e][col]; - } - } /* End for kblock .. */ - - -#pragma omp parallel for collapse(2) - for (int row=0 ; row < BLOCK_SIZE ; row++) - for (int col=0 ; col < BLOCK_SIZE ; col++) - { - C_row = C_row_start + row; - C_col = C_col_start + col; - if ((C_row < M) && (C_col < N)) - C[(C_row*LDC)+C_col] = alpha*Cs[row][col] + beta*C[(C_row*LDC)+C_col]; - } - } /* End distribute */ -} - -/* { dg-final { scan-tree-dump "Target construct will be turned into a gridified HSA kernel" "omplower" } } */ diff --git a/gcc/testsuite/c-c++-common/gomp/hsa-indirect-call-1.c b/gcc/testsuite/c-c++-common/gomp/hsa-indirect-call-1.c deleted file mode 100644 index 67ee6af..0000000 --- a/gcc/testsuite/c-c++-common/gomp/hsa-indirect-call-1.c +++ /dev/null @@ -1,24 +0,0 @@ -/* Instead of ICE, we'd like "HSA does not implement indirect calls". */ - -/* Reduced from 'libgomp.c/target-39.c'. */ - -/* { dg-require-effective-target offload_hsa } */ -/* { dg-additional-options "-Whsa" } to override '{gcc,g++}.dg/gomp/gomp.exp'. */ - -typedef void (*fnp) (void); -void f1 (void) { } -fnp f2 (void) { return f1; } -#pragma omp declare target to (f1, f2) - -int -main () -{ - #pragma omp target - { - fnp fnp = f2 (); - fnp (); /* { dg-message "note: support for HSA does not implement indirect calls" } */ - } - return 0; -} - -/* { dg-warning "could not emit HSAIL for the function" "" { target *-*-* } 0 } */ diff --git a/gcc/testsuite/g++.dg/gomp/gomp.exp b/gcc/testsuite/g++.dg/gomp/gomp.exp index 56357a9..f5c68fb 100644 --- a/gcc/testsuite/g++.dg/gomp/gomp.exp +++ b/gcc/testsuite/g++.dg/gomp/gomp.exp @@ -29,7 +29,7 @@ dg-init # Main loop. g++-dg-runtest [lsort [concat \ [find $srcdir/$subdir *.C] \ - [find $srcdir/c-c++-common/gomp *.c]]] "" "-fopenmp -Wno-hsa" + [find $srcdir/c-c++-common/gomp *.c]]] "" "-fopenmp" # All done. dg-finish diff --git a/gcc/testsuite/gcc.dg/gomp/gomp.exp b/gcc/testsuite/gcc.dg/gomp/gomp.exp index 2414c22..8a7f18e 100644 --- a/gcc/testsuite/gcc.dg/gomp/gomp.exp +++ b/gcc/testsuite/gcc.dg/gomp/gomp.exp @@ -31,7 +31,7 @@ dg-init # Main loop. dg-runtest [lsort [concat \ [find $srcdir/$subdir *.c] \ - [find $srcdir/c-c++-common/gomp *.c]]] "" "-fopenmp -Wno-hsa" + [find $srcdir/c-c++-common/gomp *.c]]] "" "-fopenmp" # All done. dg-finish diff --git a/gcc/testsuite/gfortran.dg/gomp/gomp.exp b/gcc/testsuite/gfortran.dg/gomp/gomp.exp index f3a6799..1b0fc0d 100644 --- a/gcc/testsuite/gfortran.dg/gomp/gomp.exp +++ b/gcc/testsuite/gfortran.dg/gomp/gomp.exp @@ -30,7 +30,7 @@ dg-init # Main loop. gfortran-dg-runtest [lsort \ - [find $srcdir/$subdir *.\[fF\]{,90,95,03,08} ] ] "" "-fopenmp -Wno-hsa" + [find $srcdir/$subdir *.\[fF\]{,90,95,03,08} ] ] "" "-fopenmp" # All done. dg-finish diff --git a/gcc/testsuite/gfortran.dg/gomp/gridify-1.f90 b/gcc/testsuite/gfortran.dg/gomp/gridify-1.f90 deleted file mode 100644 index 7def279..0000000 --- a/gcc/testsuite/gfortran.dg/gomp/gridify-1.f90 +++ /dev/null @@ -1,16 +0,0 @@ -! { dg-do compile } -! { dg-require-effective-target offload_hsa } -! { dg-options "-fopenmp -fdump-tree-omplower-details" } */ - -subroutine vector_square(n, a, b) - integer i, n, b(n), a(n) -!$omp target teams -!$omp distribute parallel do - do i=1,n - b(i) = a(i) * a(i) - enddo -!$omp end distribute parallel do -!$omp end target teams -end subroutine vector_square - -! { dg-final { scan-tree-dump "Target construct will be turned into a gridified HSA kernel" "omplower" } } diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 57eed30..ba9db0b 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -9858,14 +9858,6 @@ proc check_effective_target_offload_nvptx { } { } "-foffload=nvptx-none" ] } -# Return 1 if the compiler has been configured with hsa offloading. - -proc check_effective_target_offload_hsa { } { - return [check_no_compiler_messages offload_hsa assembly { - int main () {return 0;} - } "-foffload=hsa" ] -} - # Return 1 if the compiler has been configured with gcn offloading. proc check_effective_target_offload_gcn { } { diff --git a/gcc/timevar.def b/gcc/timevar.def index 2947920..7dd1e26 100644 --- a/gcc/timevar.def +++ b/gcc/timevar.def @@ -99,7 +99,6 @@ DEFTIMEVAR (TV_WHOPR_WPA_IO , "whopr wpa I/O") DEFTIMEVAR (TV_WHOPR_PARTITIONING , "whopr partitioning") DEFTIMEVAR (TV_WHOPR_LTRANS , "whopr ltrans") DEFTIMEVAR (TV_IPA_REFERENCE , "ipa reference") -DEFTIMEVAR (TV_IPA_HSA , "ipa HSA") DEFTIMEVAR (TV_IPA_PROFILE , "ipa profile") DEFTIMEVAR (TV_IPA_AUTOFDO , "auto profile") DEFTIMEVAR (TV_IPA_PURE_CONST , "ipa pure const") diff --git a/gcc/toplev.c b/gcc/toplev.c index 2bd1888..9a78af1 100644 --- a/gcc/toplev.c +++ b/gcc/toplev.c @@ -77,7 +77,6 @@ along with GCC; see the file COPYING3. If not see #include "ipa-prop.h" #include "gcse.h" #include "omp-offload.h" -#include "hsa-common.h" #include "edit-context.h" #include "tree-pass.h" #include "dumpfile.h" @@ -512,8 +511,6 @@ compile_file (void) omp_finish_file (); - hsa_output_brig (); - output_shared_constant_pool (); output_object_blocks (); finish_tm_clone_pairs (); diff --git a/gcc/tree-core.h b/gcc/tree-core.h index ba7f9ce..0e15878 100644 --- a/gcc/tree-core.h +++ b/gcc/tree-core.h @@ -488,10 +488,6 @@ enum omp_clause_code { /* OpenACC clause: tile ( size-expr-list ). */ OMP_CLAUSE_TILE, - /* OpenMP internal-only clause to specify grid dimensions of a gridified - kernel. */ - OMP_CLAUSE__GRIDDIM_, - /* OpenACC clause: if_present. */ OMP_CLAUSE_IF_PRESENT, @@ -1557,9 +1553,6 @@ struct GTY(()) tree_omp_clause { enum omp_clause_defaultmap_kind defaultmap_kind; enum omp_clause_bind_kind bind_kind; enum omp_clause_device_type_kind device_type_kind; - /* The dimension a OMP_CLAUSE__GRIDDIM_ clause of a gridified target - construct describes. */ - unsigned int dimension; } GTY ((skip)) subcode; /* The gimplification of OMP_CLAUSE_REDUCTION_{INIT,MERGE} for omp-low's diff --git a/gcc/tree-nested.c b/gcc/tree-nested.c index 4dc5533..6ce89a7 100644 --- a/gcc/tree-nested.c +++ b/gcc/tree-nested.c @@ -1394,7 +1394,6 @@ convert_nonlocal_omp_clauses (tree *pclauses, struct walk_stmt_info *wi) case OMP_CLAUSE__LOOPTEMP_: case OMP_CLAUSE__REDUCTEMP_: case OMP_CLAUSE__SIMDUID_: - case OMP_CLAUSE__GRIDDIM_: case OMP_CLAUSE__SIMT_: /* Anything else. */ default: @@ -2137,7 +2136,6 @@ convert_local_omp_clauses (tree *pclauses, struct walk_stmt_info *wi) case OMP_CLAUSE__LOOPTEMP_: case OMP_CLAUSE__REDUCTEMP_: case OMP_CLAUSE__SIMDUID_: - case OMP_CLAUSE__GRIDDIM_: case OMP_CLAUSE__SIMT_: /* Anything else. */ default: diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h index 215c8f2..f01e811 100644 --- a/gcc/tree-pass.h +++ b/gcc/tree-pass.h @@ -474,7 +474,6 @@ extern gimple_opt_pass *make_pass_sanopt (gcc::context *ctxt); extern gimple_opt_pass *make_pass_oacc_kernels (gcc::context *ctxt); extern simple_ipa_opt_pass *make_pass_ipa_oacc (gcc::context *ctxt); extern simple_ipa_opt_pass *make_pass_ipa_oacc_kernels (gcc::context *ctxt); -extern gimple_opt_pass *make_pass_gen_hsail (gcc::context *ctxt); extern gimple_opt_pass *make_pass_warn_nonnull_compare (gcc::context *ctxt); extern gimple_opt_pass *make_pass_sprintf_length (gcc::context *ctxt); extern gimple_opt_pass *make_pass_walloca (gcc::context *ctxt); @@ -508,7 +507,6 @@ extern ipa_opt_pass_d *make_pass_ipa_icf (gcc::context *ctxt); extern ipa_opt_pass_d *make_pass_ipa_devirt (gcc::context *ctxt); extern ipa_opt_pass_d *make_pass_ipa_odr (gcc::context *ctxt); extern ipa_opt_pass_d *make_pass_ipa_reference (gcc::context *ctxt); -extern ipa_opt_pass_d *make_pass_ipa_hsa (gcc::context *ctxt); extern ipa_opt_pass_d *make_pass_ipa_pure_const (gcc::context *ctxt); extern simple_ipa_opt_pass *make_pass_ipa_pta (gcc::context *ctxt); extern simple_ipa_opt_pass *make_pass_ipa_tm (gcc::context *ctxt); diff --git a/gcc/tree-pretty-print.c b/gcc/tree-pretty-print.c index be1ed90..655061c 100644 --- a/gcc/tree-pretty-print.c +++ b/gcc/tree-pretty-print.c @@ -1246,17 +1246,6 @@ dump_omp_clause (pretty_printer *pp, tree clause, int spc, dump_flags_t flags) pp_right_paren (pp); break; - case OMP_CLAUSE__GRIDDIM_: - pp_string (pp, "_griddim_("); - pp_unsigned_wide_integer (pp, OMP_CLAUSE__GRIDDIM__DIMENSION (clause)); - pp_colon (pp); - dump_generic_node (pp, OMP_CLAUSE__GRIDDIM__SIZE (clause), spc, flags, - false); - pp_comma (pp); - dump_generic_node (pp, OMP_CLAUSE__GRIDDIM__GROUP (clause), spc, flags, - false); - pp_right_paren (pp); - break; case OMP_CLAUSE_IF_PRESENT: pp_string (pp, "if_present"); break; @@ -357,7 +357,6 @@ unsigned const char omp_clause_num_ops[] = 1, /* OMP_CLAUSE_NUM_WORKERS */ 1, /* OMP_CLAUSE_VECTOR_LENGTH */ 3, /* OMP_CLAUSE_TILE */ - 2, /* OMP_CLAUSE__GRIDDIM_ */ 0, /* OMP_CLAUSE_IF_PRESENT */ 0, /* OMP_CLAUSE_FINALIZE */ }; @@ -442,7 +441,6 @@ const char * const omp_clause_code_name[] = "num_workers", "vector_length", "tile", - "_griddim_", "if_present", "finalize", }; @@ -12098,7 +12096,6 @@ walk_tree_1 (tree *tp, walk_tree_fn func, void *data, switch (OMP_CLAUSE_CODE (*tp)) { case OMP_CLAUSE_GANG: - case OMP_CLAUSE__GRIDDIM_: WALK_SUBTREE (OMP_CLAUSE_OPERAND (*tp, 1)); /* FALLTHRU */ @@ -1779,14 +1779,6 @@ class auto_suppress_location_wrappers #define OMP_CLAUSE_TILE_COUNT(NODE) \ OMP_CLAUSE_OPERAND (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_TILE), 2) -#define OMP_CLAUSE__GRIDDIM__DIMENSION(NODE) \ - (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE__GRIDDIM_)\ - ->omp_clause.subcode.dimension) -#define OMP_CLAUSE__GRIDDIM__SIZE(NODE) \ - OMP_CLAUSE_OPERAND (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE__GRIDDIM_), 0) -#define OMP_CLAUSE__GRIDDIM__GROUP(NODE) \ - OMP_CLAUSE_OPERAND (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE__GRIDDIM_), 1) - /* _CONDTEMP_ holding temporary with iteration count. */ #define OMP_CLAUSE__CONDTEMP__ITER(NODE) \ (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE__CONDTEMP_)->base.public_flag) |