aboutsummaryrefslogtreecommitdiff
path: root/libgomp/plugin/plugin-hsa.c
diff options
context:
space:
mode:
Diffstat (limited to 'libgomp/plugin/plugin-hsa.c')
-rw-r--r--libgomp/plugin/plugin-hsa.c1871
1 files changed, 0 insertions, 1871 deletions
diff --git a/libgomp/plugin/plugin-hsa.c b/libgomp/plugin/plugin-hsa.c
deleted file mode 100644
index abd3bc6..0000000
--- a/libgomp/plugin/plugin-hsa.c
+++ /dev/null
@@ -1,1871 +0,0 @@
-/* Plugin for HSAIL execution.
-
- Copyright (C) 2013-2020 Free Software Foundation, Inc.
-
- Contributed by Martin Jambor <mjambor@suse.cz> and
- Martin Liska <mliska@suse.cz>.
-
- This file is part of the GNU Offloading and Multi Processing Library
- (libgomp).
-
- Libgomp is free software; you can redistribute it and/or modify it
- under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 3, or (at your option)
- any later version.
-
- Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
- WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- Under Section 7 of GPL version 3, you are granted additional
- permissions described in the GCC Runtime Library Exception, version
- 3.1, as published by the Free Software Foundation.
-
- You should have received a copy of the GNU General Public License and
- a copy of the GCC Runtime Library Exception along with this program;
- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- <http://www.gnu.org/licenses/>. */
-
-#include "config.h"
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <pthread.h>
-#ifdef HAVE_INTTYPES_H
-#include <inttypes.h>
-#endif
-#include <stdbool.h>
-#include <hsa.h>
-#include <plugin/hsa_ext_finalize.h>
-#include <dlfcn.h>
-#include "libgomp-plugin.h"
-#include "gomp-constants.h"
-#include "secure_getenv.h"
-
-#ifdef HAVE_INTTYPES_H
-typedef uint64_t print_uint64_t;
-#else
-#define PRIu64 "lu"
-typedef unsigned long print_uint64_t;
-#endif
-
-/* As an HSA runtime is dlopened, following structure defines function
- pointers utilized by the HSA plug-in. */
-
-struct hsa_runtime_fn_info
-{
- /* HSA runtime. */
- hsa_status_t (*hsa_status_string_fn) (hsa_status_t status,
- const char **status_string);
- hsa_status_t (*hsa_agent_get_info_fn) (hsa_agent_t agent,
- hsa_agent_info_t attribute,
- void *value);
- hsa_status_t (*hsa_init_fn) (void);
- hsa_status_t (*hsa_iterate_agents_fn)
- (hsa_status_t (*callback)(hsa_agent_t agent, void *data), void *data);
- hsa_status_t (*hsa_region_get_info_fn) (hsa_region_t region,
- hsa_region_info_t attribute,
- void *value);
- hsa_status_t (*hsa_queue_create_fn)
- (hsa_agent_t agent, uint32_t size, hsa_queue_type_t type,
- void (*callback)(hsa_status_t status, hsa_queue_t *source, void *data),
- void *data, uint32_t private_segment_size,
- uint32_t group_segment_size, hsa_queue_t **queue);
- hsa_status_t (*hsa_agent_iterate_regions_fn)
- (hsa_agent_t agent,
- hsa_status_t (*callback)(hsa_region_t region, void *data), void *data);
- hsa_status_t (*hsa_executable_destroy_fn) (hsa_executable_t executable);
- hsa_status_t (*hsa_executable_create_fn)
- (hsa_profile_t profile, hsa_executable_state_t executable_state,
- const char *options, hsa_executable_t *executable);
- hsa_status_t (*hsa_executable_global_variable_define_fn)
- (hsa_executable_t executable, const char *variable_name, void *address);
- hsa_status_t (*hsa_executable_load_code_object_fn)
- (hsa_executable_t executable, hsa_agent_t agent,
- hsa_code_object_t code_object, const char *options);
- hsa_status_t (*hsa_executable_freeze_fn)(hsa_executable_t executable,
- const char *options);
- hsa_status_t (*hsa_signal_create_fn) (hsa_signal_value_t initial_value,
- uint32_t num_consumers,
- const hsa_agent_t *consumers,
- hsa_signal_t *signal);
- hsa_status_t (*hsa_memory_allocate_fn) (hsa_region_t region, size_t size,
- void **ptr);
- hsa_status_t (*hsa_memory_free_fn) (void *ptr);
- hsa_status_t (*hsa_signal_destroy_fn) (hsa_signal_t signal);
- hsa_status_t (*hsa_executable_get_symbol_fn)
- (hsa_executable_t executable, const char *module_name,
- const char *symbol_name, hsa_agent_t agent, int32_t call_convention,
- hsa_executable_symbol_t *symbol);
- hsa_status_t (*hsa_executable_symbol_get_info_fn)
- (hsa_executable_symbol_t executable_symbol,
- hsa_executable_symbol_info_t attribute, void *value);
- uint64_t (*hsa_queue_add_write_index_release_fn) (const hsa_queue_t *queue,
- uint64_t value);
- uint64_t (*hsa_queue_load_read_index_acquire_fn) (const hsa_queue_t *queue);
- void (*hsa_signal_store_relaxed_fn) (hsa_signal_t signal,
- hsa_signal_value_t value);
- void (*hsa_signal_store_release_fn) (hsa_signal_t signal,
- hsa_signal_value_t value);
- hsa_signal_value_t (*hsa_signal_wait_acquire_fn)
- (hsa_signal_t signal, hsa_signal_condition_t condition,
- hsa_signal_value_t compare_value, uint64_t timeout_hint,
- hsa_wait_state_t wait_state_hint);
- hsa_signal_value_t (*hsa_signal_load_acquire_fn) (hsa_signal_t signal);
- hsa_status_t (*hsa_queue_destroy_fn) (hsa_queue_t *queue);
-
- /* HSA finalizer. */
- hsa_status_t (*hsa_ext_program_add_module_fn) (hsa_ext_program_t program,
- hsa_ext_module_t module);
- hsa_status_t (*hsa_ext_program_create_fn)
- (hsa_machine_model_t machine_model, hsa_profile_t profile,
- hsa_default_float_rounding_mode_t default_float_rounding_mode,
- const char *options, hsa_ext_program_t *program);
- hsa_status_t (*hsa_ext_program_destroy_fn) (hsa_ext_program_t program);
- hsa_status_t (*hsa_ext_program_finalize_fn)
- (hsa_ext_program_t program,hsa_isa_t isa,
- int32_t call_convention, hsa_ext_control_directives_t control_directives,
- const char *options, hsa_code_object_type_t code_object_type,
- hsa_code_object_t *code_object);
-};
-
-/* HSA runtime functions that are initialized in init_hsa_context. */
-
-static struct hsa_runtime_fn_info hsa_fns;
-
-/* Keep the following GOMP prefixed structures in sync with respective parts of
- the compiler. */
-
-/* Structure describing the run-time and grid properties of an HSA kernel
- lauch. */
-
-struct GOMP_kernel_launch_attributes
-{
- /* Number of dimensions the workload has. Maximum number is 3. */
- uint32_t ndim;
- /* Size of the grid in the three respective dimensions. */
- uint32_t gdims[3];
- /* Size of work-groups in the respective dimensions. */
- uint32_t wdims[3];
-};
-
-/* Collection of information needed for a dispatch of a kernel from a
- kernel. */
-
-struct GOMP_hsa_kernel_dispatch
-{
- /* Pointer to a command queue associated with a kernel dispatch agent. */
- void *queue;
- /* Pointer to reserved memory for OMP data struct copying. */
- void *omp_data_memory;
- /* Pointer to a memory space used for kernel arguments passing. */
- void *kernarg_address;
- /* Kernel object. */
- uint64_t object;
- /* Synchronization signal used for dispatch synchronization. */
- uint64_t signal;
- /* Private segment size. */
- uint32_t private_segment_size;
- /* Group segment size. */
- uint32_t group_segment_size;
- /* Number of children kernel dispatches. */
- uint64_t kernel_dispatch_count;
- /* Debug purpose argument. */
- uint64_t debug;
- /* Levels-var ICV. */
- uint64_t omp_level;
- /* Kernel dispatch structures created for children kernel dispatches. */
- struct GOMP_hsa_kernel_dispatch **children_dispatches;
- /* Number of threads. */
- uint32_t omp_num_threads;
-};
-
-/* Part of the libgomp plugin interface. Return the name of the accelerator,
- which is "hsa". */
-
-const char *
-GOMP_OFFLOAD_get_name (void)
-{
- return "hsa";
-}
-
-/* Part of the libgomp plugin interface. Return the specific capabilities the
- HSA accelerator have. */
-
-unsigned int
-GOMP_OFFLOAD_get_caps (void)
-{
- return GOMP_OFFLOAD_CAP_SHARED_MEM | GOMP_OFFLOAD_CAP_OPENMP_400;
-}
-
-/* Part of the libgomp plugin interface. Identify as HSA accelerator. */
-
-int
-GOMP_OFFLOAD_get_type (void)
-{
- return OFFLOAD_TARGET_TYPE_HSA;
-}
-
-/* Return the libgomp version number we're compatible with. There is
- no requirement for cross-version compatibility. */
-
-unsigned
-GOMP_OFFLOAD_version (void)
-{
- return GOMP_VERSION;
-}
-
-/* Flag to decide whether print to stderr information about what is going on.
- Set in init_debug depending on environment variables. */
-
-static bool debug;
-
-/* Flag to decide if the runtime should suppress a possible fallback to host
- execution. */
-
-static bool suppress_host_fallback;
-
-/* Flag to locate HSA runtime shared library that is dlopened
- by this plug-in. */
-
-static const char *hsa_runtime_lib;
-
-/* Flag to decide if the runtime should support also CPU devices (can be
- a simulator). */
-
-static bool support_cpu_devices;
-
-/* Initialize debug and suppress_host_fallback according to the environment. */
-
-static void
-init_enviroment_variables (void)
-{
- if (secure_getenv ("HSA_DEBUG"))
- debug = true;
- else
- debug = false;
-
- if (secure_getenv ("HSA_SUPPRESS_HOST_FALLBACK"))
- suppress_host_fallback = true;
- else
- suppress_host_fallback = false;
-
- hsa_runtime_lib = secure_getenv ("HSA_RUNTIME_LIB");
- if (hsa_runtime_lib == NULL)
- hsa_runtime_lib = HSA_RUNTIME_LIB "libhsa-runtime64.so";
-
- support_cpu_devices = secure_getenv ("HSA_SUPPORT_CPU_DEVICES");
-}
-
-/* Print a logging message with PREFIX to stderr if HSA_DEBUG value
- is set to true. */
-
-#define HSA_LOG(prefix, ...) \
- do \
- { \
- if (debug) \
- { \
- fprintf (stderr, prefix); \
- fprintf (stderr, __VA_ARGS__); \
- } \
- } \
- while (false)
-
-/* Print a debugging message to stderr. */
-
-#define HSA_DEBUG(...) HSA_LOG ("HSA debug: ", __VA_ARGS__)
-
-/* Print a warning message to stderr. */
-
-#define HSA_WARNING(...) HSA_LOG ("HSA warning: ", __VA_ARGS__)
-
-/* Print HSA warning STR with an HSA STATUS code. */
-
-static void
-hsa_warn (const char *str, hsa_status_t status)
-{
- if (!debug)
- return;
-
- const char *hsa_error_msg = "[unknown]";
- hsa_fns.hsa_status_string_fn (status, &hsa_error_msg);
-
- fprintf (stderr, "HSA warning: %s\nRuntime message: %s", str, hsa_error_msg);
-}
-
-/* Report a fatal error STR together with the HSA error corresponding to STATUS
- and terminate execution of the current process. */
-
-static void
-hsa_fatal (const char *str, hsa_status_t status)
-{
- const char *hsa_error_msg = "[unknown]";
- hsa_fns.hsa_status_string_fn (status, &hsa_error_msg);
- GOMP_PLUGIN_fatal ("HSA fatal error: %s\nRuntime message: %s", str,
- hsa_error_msg);
-}
-
-/* Like hsa_fatal, except only report error message, and return FALSE
- for propagating error processing to outside of plugin. */
-
-static bool
-hsa_error (const char *str, hsa_status_t status)
-{
- const char *hsa_error_msg = "[unknown]";
- hsa_fns.hsa_status_string_fn (status, &hsa_error_msg);
- GOMP_PLUGIN_error ("HSA fatal error: %s\nRuntime message: %s", str,
- hsa_error_msg);
- return false;
-}
-
-struct hsa_kernel_description
-{
- const char *name;
- unsigned omp_data_size;
- bool gridified_kernel_p;
- unsigned kernel_dependencies_count;
- const char **kernel_dependencies;
-};
-
-struct global_var_info
-{
- const char *name;
- void *address;
-};
-
-/* Data passed by the static initializer of a compilation unit containing BRIG
- to GOMP_offload_register. */
-
-struct brig_image_desc
-{
- hsa_ext_module_t brig_module;
- const unsigned kernel_count;
- struct hsa_kernel_description *kernel_infos;
- const unsigned global_variable_count;
- struct global_var_info *global_variables;
-};
-
-struct agent_info;
-
-/* Information required to identify, finalize and run any given kernel. */
-
-struct kernel_info
-{
- /* Name of the kernel, required to locate it within the brig module. */
- const char *name;
- /* Size of memory space for OMP data. */
- unsigned omp_data_size;
- /* The specific agent the kernel has been or will be finalized for and run
- on. */
- struct agent_info *agent;
- /* The specific module where the kernel takes place. */
- struct module_info *module;
- /* Mutex enforcing that at most once thread ever initializes a kernel for
- use. A thread should have locked agent->modules_rwlock for reading before
- acquiring it. */
- pthread_mutex_t init_mutex;
- /* Flag indicating whether the kernel has been initialized and all fields
- below it contain valid data. */
- bool initialized;
- /* Flag indicating that the kernel has a problem that blocks an execution. */
- bool initialization_failed;
- /* The object to be put into the dispatch queue. */
- uint64_t object;
- /* Required size of kernel arguments. */
- uint32_t kernarg_segment_size;
- /* Required size of group segment. */
- uint32_t group_segment_size;
- /* Required size of private segment. */
- uint32_t private_segment_size;
- /* List of all kernel dependencies. */
- const char **dependencies;
- /* Number of dependencies. */
- unsigned dependencies_count;
- /* Maximum OMP data size necessary for kernel from kernel dispatches. */
- unsigned max_omp_data_size;
- /* True if the kernel is gridified. */
- bool gridified_kernel_p;
-};
-
-/* Information about a particular brig module, its image and kernels. */
-
-struct module_info
-{
- /* The next and previous module in the linked list of modules of an agent. */
- struct module_info *next, *prev;
- /* The description with which the program has registered the image. */
- struct brig_image_desc *image_desc;
-
- /* Number of kernels in this module. */
- int kernel_count;
- /* An array of kernel_info structures describing each kernel in this
- module. */
- struct kernel_info kernels[];
-};
-
-/* Information about shared brig library. */
-
-struct brig_library_info
-{
- char *file_name;
- hsa_ext_module_t image;
-};
-
-/* Description of an HSA GPU agent and the program associated with it. */
-
-struct agent_info
-{
- /* The HSA ID of the agent. Assigned when hsa_context is initialized. */
- hsa_agent_t id;
- /* Whether the agent has been initialized. The fields below are usable only
- if it has been. */
- bool initialized;
- /* The HSA ISA of this agent. */
- hsa_isa_t isa;
- /* Command queue of the agent. */
- hsa_queue_t *command_q;
- /* Kernel from kernel dispatch command queue. */
- hsa_queue_t *kernel_dispatch_command_q;
- /* The HSA memory region from which to allocate kernel arguments. */
- hsa_region_t kernarg_region;
-
- /* Read-write lock that protects kernels which are running or about to be run
- from interference with loading and unloading of images. Needs to be
- locked for reading while a kernel is being run, and for writing if the
- list of modules is manipulated (and thus the HSA program invalidated). */
- pthread_rwlock_t modules_rwlock;
- /* The first module in a linked list of modules associated with this
- kernel. */
- struct module_info *first_module;
-
- /* Mutex enforcing that only one thread will finalize the HSA program. A
- thread should have locked agent->modules_rwlock for reading before
- acquiring it. */
- pthread_mutex_t prog_mutex;
- /* Flag whether the HSA program that consists of all the modules has been
- finalized. */
- bool prog_finalized;
- /* Flag whether the program was finalized but with a failure. */
- bool prog_finalized_error;
- /* HSA executable - the finalized program that is used to locate kernels. */
- hsa_executable_t executable;
- /* List of BRIG libraries. */
- struct brig_library_info **brig_libraries;
- /* Number of loaded shared BRIG libraries. */
- unsigned brig_libraries_count;
-};
-
-/* Information about the whole HSA environment and all of its agents. */
-
-struct hsa_context_info
-{
- /* Whether the structure has been initialized. */
- bool initialized;
- /* Number of usable GPU HSA agents in the system. */
- int agent_count;
- /* Array of agent_info structures describing the individual HSA agents. */
- struct agent_info *agents;
-};
-
-/* Information about the whole HSA environment and all of its agents. */
-
-static struct hsa_context_info hsa_context;
-
-#define DLSYM_FN(function) \
- hsa_fns.function##_fn = dlsym (handle, #function); \
- if (hsa_fns.function##_fn == NULL) \
- goto dl_fail;
-
-static bool
-init_hsa_runtime_functions (void)
-{
- void *handle = dlopen (hsa_runtime_lib, RTLD_LAZY);
- if (handle == NULL)
- goto dl_fail;
-
- DLSYM_FN (hsa_status_string)
- DLSYM_FN (hsa_agent_get_info)
- DLSYM_FN (hsa_init)
- DLSYM_FN (hsa_iterate_agents)
- DLSYM_FN (hsa_region_get_info)
- DLSYM_FN (hsa_queue_create)
- DLSYM_FN (hsa_agent_iterate_regions)
- DLSYM_FN (hsa_executable_destroy)
- DLSYM_FN (hsa_executable_create)
- DLSYM_FN (hsa_executable_global_variable_define)
- DLSYM_FN (hsa_executable_load_code_object)
- DLSYM_FN (hsa_executable_freeze)
- DLSYM_FN (hsa_signal_create)
- DLSYM_FN (hsa_memory_allocate)
- DLSYM_FN (hsa_memory_free)
- DLSYM_FN (hsa_signal_destroy)
- DLSYM_FN (hsa_executable_get_symbol)
- DLSYM_FN (hsa_executable_symbol_get_info)
- DLSYM_FN (hsa_queue_add_write_index_release)
- DLSYM_FN (hsa_queue_load_read_index_acquire)
- DLSYM_FN (hsa_signal_wait_acquire)
- DLSYM_FN (hsa_signal_store_relaxed)
- DLSYM_FN (hsa_signal_store_release)
- DLSYM_FN (hsa_signal_load_acquire)
- DLSYM_FN (hsa_queue_destroy)
- DLSYM_FN (hsa_ext_program_add_module)
- DLSYM_FN (hsa_ext_program_create)
- DLSYM_FN (hsa_ext_program_destroy)
- DLSYM_FN (hsa_ext_program_finalize)
- return true;
-
- dl_fail:
- HSA_DEBUG ("while loading %s: %s\n", hsa_runtime_lib, dlerror ());
- return false;
-}
-
-/* Find kernel for an AGENT by name provided in KERNEL_NAME. */
-
-static struct kernel_info *
-get_kernel_for_agent (struct agent_info *agent, const char *kernel_name)
-{
- struct module_info *module = agent->first_module;
-
- while (module)
- {
- for (unsigned i = 0; i < module->kernel_count; i++)
- if (strcmp (module->kernels[i].name, kernel_name) == 0)
- return &module->kernels[i];
-
- module = module->next;
- }
-
- return NULL;
-}
-
-/* Return true if the agent is a GPU and acceptable of concurrent submissions
- from different threads. */
-
-static bool
-suitable_hsa_agent_p (hsa_agent_t agent)
-{
- hsa_device_type_t device_type;
- hsa_status_t status
- = hsa_fns.hsa_agent_get_info_fn (agent, HSA_AGENT_INFO_DEVICE,
- &device_type);
- if (status != HSA_STATUS_SUCCESS)
- return false;
-
- switch (device_type)
- {
- case HSA_DEVICE_TYPE_GPU:
- break;
- case HSA_DEVICE_TYPE_CPU:
- if (!support_cpu_devices)
- return false;
- break;
- default:
- return false;
- }
-
- uint32_t features = 0;
- status = hsa_fns.hsa_agent_get_info_fn (agent, HSA_AGENT_INFO_FEATURE,
- &features);
- if (status != HSA_STATUS_SUCCESS
- || !(features & HSA_AGENT_FEATURE_KERNEL_DISPATCH))
- return false;
- hsa_queue_type_t queue_type;
- status = hsa_fns.hsa_agent_get_info_fn (agent, HSA_AGENT_INFO_QUEUE_TYPE,
- &queue_type);
- if (status != HSA_STATUS_SUCCESS
- || (queue_type != HSA_QUEUE_TYPE_MULTI))
- return false;
-
- return true;
-}
-
-/* Callback of hsa_iterate_agents, if AGENT is a GPU device, increment
- agent_count in hsa_context. */
-
-static hsa_status_t
-count_gpu_agents (hsa_agent_t agent, void *data __attribute__ ((unused)))
-{
- if (suitable_hsa_agent_p (agent))
- hsa_context.agent_count++;
- return HSA_STATUS_SUCCESS;
-}
-
-/* Callback of hsa_iterate_agents, if AGENT is a GPU device, assign the agent
- id to the describing structure in the hsa context. The index of the
- structure is pointed to by DATA, increment it afterwards. */
-
-static hsa_status_t
-assign_agent_ids (hsa_agent_t agent, void *data)
-{
- if (suitable_hsa_agent_p (agent))
- {
- int *agent_index = (int *) data;
- hsa_context.agents[*agent_index].id = agent;
- ++*agent_index;
- }
- return HSA_STATUS_SUCCESS;
-}
-
-/* Initialize hsa_context if it has not already been done.
- Return TRUE on success. */
-
-static bool
-init_hsa_context (void)
-{
- hsa_status_t status;
- int agent_index = 0;
-
- if (hsa_context.initialized)
- return true;
- init_enviroment_variables ();
- if (!init_hsa_runtime_functions ())
- {
- HSA_DEBUG ("Run-time could not be dynamically opened\n");
- return false;
- }
- status = hsa_fns.hsa_init_fn ();
- if (status != HSA_STATUS_SUCCESS)
- return hsa_error ("Run-time could not be initialized", status);
- HSA_DEBUG ("HSA run-time initialized\n");
- status = hsa_fns.hsa_iterate_agents_fn (count_gpu_agents, NULL);
- if (status != HSA_STATUS_SUCCESS)
- return hsa_error ("HSA GPU devices could not be enumerated", status);
- HSA_DEBUG ("There are %i HSA GPU devices.\n", hsa_context.agent_count);
-
- hsa_context.agents
- = GOMP_PLUGIN_malloc_cleared (hsa_context.agent_count
- * sizeof (struct agent_info));
- status = hsa_fns.hsa_iterate_agents_fn (assign_agent_ids, &agent_index);
- if (agent_index != hsa_context.agent_count)
- {
- GOMP_PLUGIN_error ("Failed to assign IDs to all HSA agents");
- return false;
- }
- hsa_context.initialized = true;
- return true;
-}
-
-/* Callback of dispatch queues to report errors. */
-
-static void
-queue_callback (hsa_status_t status,
- hsa_queue_t *queue __attribute__ ((unused)),
- void *data __attribute__ ((unused)))
-{
- hsa_fatal ("Asynchronous queue error", status);
-}
-
-/* Callback of hsa_agent_iterate_regions. Determine if a memory REGION can be
- used for kernarg allocations and if so write it to the memory pointed to by
- DATA and break the query. */
-
-static hsa_status_t
-get_kernarg_memory_region (hsa_region_t region, void *data)
-{
- hsa_status_t status;
- hsa_region_segment_t segment;
-
- status = hsa_fns.hsa_region_get_info_fn (region, HSA_REGION_INFO_SEGMENT,
- &segment);
- if (status != HSA_STATUS_SUCCESS)
- return status;
- if (segment != HSA_REGION_SEGMENT_GLOBAL)
- return HSA_STATUS_SUCCESS;
-
- uint32_t flags;
- status = hsa_fns.hsa_region_get_info_fn (region, HSA_REGION_INFO_GLOBAL_FLAGS,
- &flags);
- if (status != HSA_STATUS_SUCCESS)
- return status;
- if (flags & HSA_REGION_GLOBAL_FLAG_KERNARG)
- {
- hsa_region_t *ret = (hsa_region_t *) data;
- *ret = region;
- return HSA_STATUS_INFO_BREAK;
- }
- return HSA_STATUS_SUCCESS;
-}
-
-/* Part of the libgomp plugin interface. Return the number of HSA devices on
- the system. */
-
-int
-GOMP_OFFLOAD_get_num_devices (void)
-{
- if (!init_hsa_context ())
- return 0;
- return hsa_context.agent_count;
-}
-
-/* Part of the libgomp plugin interface. Initialize agent number N so that it
- can be used for computation. Return TRUE on success. */
-
-bool
-GOMP_OFFLOAD_init_device (int n)
-{
- if (!init_hsa_context ())
- return false;
- if (n >= hsa_context.agent_count)
- {
- GOMP_PLUGIN_error ("Request to initialize non-existing HSA device %i", n);
- return false;
- }
- struct agent_info *agent = &hsa_context.agents[n];
-
- if (agent->initialized)
- return true;
-
- if (pthread_rwlock_init (&agent->modules_rwlock, NULL))
- {
- GOMP_PLUGIN_error ("Failed to initialize an HSA agent rwlock");
- return false;
- }
- if (pthread_mutex_init (&agent->prog_mutex, NULL))
- {
- GOMP_PLUGIN_error ("Failed to initialize an HSA agent program mutex");
- return false;
- }
-
- uint32_t queue_size;
- hsa_status_t status;
- status = hsa_fns.hsa_agent_get_info_fn (agent->id,
- HSA_AGENT_INFO_QUEUE_MAX_SIZE,
- &queue_size);
- if (status != HSA_STATUS_SUCCESS)
- return hsa_error ("Error requesting maximum queue size of the HSA agent",
- status);
- status = hsa_fns.hsa_agent_get_info_fn (agent->id, HSA_AGENT_INFO_ISA,
- &agent->isa);
- if (status != HSA_STATUS_SUCCESS)
- return hsa_error ("Error querying the ISA of the agent", status);
- status = hsa_fns.hsa_queue_create_fn (agent->id, queue_size,
- HSA_QUEUE_TYPE_MULTI,
- queue_callback, NULL, UINT32_MAX,
- UINT32_MAX,
- &agent->command_q);
- if (status != HSA_STATUS_SUCCESS)
- return hsa_error ("Error creating command queue", status);
-
- status = hsa_fns.hsa_queue_create_fn (agent->id, queue_size,
- HSA_QUEUE_TYPE_MULTI,
- queue_callback, NULL, UINT32_MAX,
- UINT32_MAX,
- &agent->kernel_dispatch_command_q);
- if (status != HSA_STATUS_SUCCESS)
- return hsa_error ("Error creating kernel dispatch command queue", status);
-
- agent->kernarg_region.handle = (uint64_t) -1;
- status = hsa_fns.hsa_agent_iterate_regions_fn (agent->id,
- get_kernarg_memory_region,
- &agent->kernarg_region);
- if (agent->kernarg_region.handle == (uint64_t) -1)
- {
- GOMP_PLUGIN_error ("Could not find suitable memory region for kernel "
- "arguments");
- return false;
- }
- HSA_DEBUG ("HSA agent initialized, queue has id %llu\n",
- (long long unsigned) agent->command_q->id);
- HSA_DEBUG ("HSA agent initialized, kernel dispatch queue has id %llu\n",
- (long long unsigned) agent->kernel_dispatch_command_q->id);
- agent->initialized = true;
- return true;
-}
-
-/* Verify that hsa_context has already been initialized and return the
- agent_info structure describing device number N. Return NULL on error. */
-
-static struct agent_info *
-get_agent_info (int n)
-{
- if (!hsa_context.initialized)
- {
- GOMP_PLUGIN_error ("Attempt to use uninitialized HSA context.");
- return NULL;
- }
- if (n >= hsa_context.agent_count)
- {
- GOMP_PLUGIN_error ("Request to operate on anon-existing HSA device %i", n);
- return NULL;
- }
- if (!hsa_context.agents[n].initialized)
- {
- GOMP_PLUGIN_error ("Attempt to use an uninitialized HSA agent.");
- return NULL;
- }
- return &hsa_context.agents[n];
-}
-
-/* Insert MODULE to the linked list of modules of AGENT. */
-
-static void
-add_module_to_agent (struct agent_info *agent, struct module_info *module)
-{
- if (agent->first_module)
- agent->first_module->prev = module;
- module->next = agent->first_module;
- module->prev = NULL;
- agent->first_module = module;
-}
-
-/* Remove MODULE from the linked list of modules of AGENT. */
-
-static void
-remove_module_from_agent (struct agent_info *agent, struct module_info *module)
-{
- if (agent->first_module == module)
- agent->first_module = module->next;
- if (module->prev)
- module->prev->next = module->next;
- if (module->next)
- module->next->prev = module->prev;
-}
-
-/* Free the HSA program in agent and everything associated with it and set
- agent->prog_finalized and the initialized flags of all kernels to false.
- Return TRUE on success. */
-
-static bool
-destroy_hsa_program (struct agent_info *agent)
-{
- if (!agent->prog_finalized || agent->prog_finalized_error)
- return true;
-
- hsa_status_t status;
-
- HSA_DEBUG ("Destroying the current HSA program.\n");
-
- status = hsa_fns.hsa_executable_destroy_fn (agent->executable);
- if (status != HSA_STATUS_SUCCESS)
- return hsa_error ("Could not destroy HSA executable", status);
-
- struct module_info *module;
- for (module = agent->first_module; module; module = module->next)
- {
- int i;
- for (i = 0; i < module->kernel_count; i++)
- module->kernels[i].initialized = false;
- }
- agent->prog_finalized = false;
- return true;
-}
-
-/* Initialize KERNEL from D and other parameters. Return true on success. */
-
-static bool
-init_basic_kernel_info (struct kernel_info *kernel,
- struct hsa_kernel_description *d,
- struct agent_info *agent,
- struct module_info *module)
-{
- kernel->agent = agent;
- kernel->module = module;
- kernel->name = d->name;
- kernel->omp_data_size = d->omp_data_size;
- kernel->gridified_kernel_p = d->gridified_kernel_p;
- kernel->dependencies_count = d->kernel_dependencies_count;
- kernel->dependencies = d->kernel_dependencies;
- if (pthread_mutex_init (&kernel->init_mutex, NULL))
- {
- GOMP_PLUGIN_error ("Failed to initialize an HSA kernel mutex");
- return false;
- }
- return true;
-}
-
-/* Part of the libgomp plugin interface. Load BRIG module described by struct
- brig_image_desc in TARGET_DATA and return references to kernel descriptors
- in TARGET_TABLE. */
-
-int
-GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data,
- struct addr_pair **target_table)
-{
- if (GOMP_VERSION_DEV (version) > GOMP_VERSION_HSA)
- {
- GOMP_PLUGIN_error ("Offload data incompatible with HSA plugin"
- " (expected %u, received %u)",
- GOMP_VERSION_HSA, GOMP_VERSION_DEV (version));
- return -1;
- }
-
- struct brig_image_desc *image_desc = (struct brig_image_desc *) target_data;
- struct agent_info *agent;
- struct addr_pair *pair;
- struct module_info *module;
- struct kernel_info *kernel;
- int kernel_count = image_desc->kernel_count;
-
- agent = get_agent_info (ord);
- if (!agent)
- return -1;
-
- if (pthread_rwlock_wrlock (&agent->modules_rwlock))
- {
- GOMP_PLUGIN_error ("Unable to write-lock an HSA agent rwlock");
- return -1;
- }
- if (agent->prog_finalized
- && !destroy_hsa_program (agent))
- return -1;
-
- HSA_DEBUG ("Encountered %d kernels in an image\n", kernel_count);
- pair = GOMP_PLUGIN_malloc (kernel_count * sizeof (struct addr_pair));
- *target_table = pair;
- module = (struct module_info *)
- GOMP_PLUGIN_malloc_cleared (sizeof (struct module_info)
- + kernel_count * sizeof (struct kernel_info));
- module->image_desc = image_desc;
- module->kernel_count = kernel_count;
-
- kernel = &module->kernels[0];
-
- /* Allocate memory for kernel dependencies. */
- for (unsigned i = 0; i < kernel_count; i++)
- {
- pair->start = (uintptr_t) kernel;
- pair->end = (uintptr_t) (kernel + 1);
-
- struct hsa_kernel_description *d = &image_desc->kernel_infos[i];
- if (!init_basic_kernel_info (kernel, d, agent, module))
- return -1;
- kernel++;
- pair++;
- }
-
- add_module_to_agent (agent, module);
- if (pthread_rwlock_unlock (&agent->modules_rwlock))
- {
- GOMP_PLUGIN_error ("Unable to unlock an HSA agent rwlock");
- return -1;
- }
- return kernel_count;
-}
-
-/* Add a shared BRIG library from a FILE_NAME to an AGENT. */
-
-static struct brig_library_info *
-add_shared_library (const char *file_name, struct agent_info *agent)
-{
- struct brig_library_info *library = NULL;
-
- void *f = dlopen (file_name, RTLD_NOW);
- void *start = dlsym (f, "__brig_start");
- void *end = dlsym (f, "__brig_end");
-
- if (start == NULL || end == NULL)
- return NULL;
-
- unsigned size = end - start;
- char *buf = (char *) GOMP_PLUGIN_malloc (size);
- memcpy (buf, start, size);
-
- library = GOMP_PLUGIN_malloc (sizeof (struct agent_info));
- library->file_name = (char *) GOMP_PLUGIN_malloc
- ((strlen (file_name) + 1));
- strcpy (library->file_name, file_name);
- library->image = (hsa_ext_module_t) buf;
-
- return library;
-}
-
-/* Release memory used for BRIG shared libraries that correspond
- to an AGENT. */
-
-static void
-release_agent_shared_libraries (struct agent_info *agent)
-{
- for (unsigned i = 0; i < agent->brig_libraries_count; i++)
- if (agent->brig_libraries[i])
- {
- free (agent->brig_libraries[i]->file_name);
- free (agent->brig_libraries[i]->image);
- free (agent->brig_libraries[i]);
- }
-
- free (agent->brig_libraries);
-}
-
-/* Create and finalize the program consisting of all loaded modules. */
-
-static void
-create_and_finalize_hsa_program (struct agent_info *agent)
-{
- hsa_status_t status;
- hsa_ext_program_t prog_handle;
- int mi = 0;
-
- if (pthread_mutex_lock (&agent->prog_mutex))
- GOMP_PLUGIN_fatal ("Could not lock an HSA agent program mutex");
- if (agent->prog_finalized)
- goto final;
-
- status = hsa_fns.hsa_ext_program_create_fn
- (HSA_MACHINE_MODEL_LARGE, HSA_PROFILE_FULL,
- HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT,
- NULL, &prog_handle);
- if (status != HSA_STATUS_SUCCESS)
- hsa_fatal ("Could not create an HSA program", status);
-
- HSA_DEBUG ("Created a finalized program\n");
-
- struct module_info *module = agent->first_module;
- while (module)
- {
- status = hsa_fns.hsa_ext_program_add_module_fn
- (prog_handle, module->image_desc->brig_module);
- if (status != HSA_STATUS_SUCCESS)
- hsa_fatal ("Could not add a module to the HSA program", status);
- module = module->next;
- mi++;
- }
-
- /* Load all shared libraries. */
- const char *libraries[] = { "libhsamath.so", "libhsastd.so" };
- const unsigned libraries_count = sizeof (libraries) / sizeof (const char *);
-
- agent->brig_libraries_count = libraries_count;
- agent->brig_libraries = GOMP_PLUGIN_malloc_cleared
- (sizeof (struct brig_library_info) * libraries_count);
-
- for (unsigned i = 0; i < libraries_count; i++)
- {
- struct brig_library_info *library = add_shared_library (libraries[i],
- agent);
- if (library == NULL)
- {
- HSA_WARNING ("Could not open a shared BRIG library: %s\n",
- libraries[i]);
- continue;
- }
-
- status = hsa_fns.hsa_ext_program_add_module_fn (prog_handle,
- library->image);
- if (status != HSA_STATUS_SUCCESS)
- hsa_warn ("Could not add a shared BRIG library the HSA program",
- status);
- else
- HSA_DEBUG ("a shared BRIG library has been added to a program: %s\n",
- libraries[i]);
- }
-
- hsa_ext_control_directives_t control_directives;
- memset (&control_directives, 0, sizeof (control_directives));
- hsa_code_object_t code_object;
- status = hsa_fns.hsa_ext_program_finalize_fn
- (prog_handle, agent->isa,HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO,
- control_directives, "", HSA_CODE_OBJECT_TYPE_PROGRAM, &code_object);
- if (status != HSA_STATUS_SUCCESS)
- {
- hsa_warn ("Finalization of the HSA program failed", status);
- goto failure;
- }
-
- HSA_DEBUG ("Finalization done\n");
- hsa_fns.hsa_ext_program_destroy_fn (prog_handle);
-
- status
- = hsa_fns.hsa_executable_create_fn (HSA_PROFILE_FULL,
- HSA_EXECUTABLE_STATE_UNFROZEN,
- "", &agent->executable);
- if (status != HSA_STATUS_SUCCESS)
- hsa_fatal ("Could not create HSA executable", status);
-
- module = agent->first_module;
- while (module)
- {
- /* Initialize all global variables declared in the module. */
- for (unsigned i = 0; i < module->image_desc->global_variable_count; i++)
- {
- struct global_var_info *var;
- var = &module->image_desc->global_variables[i];
- status = hsa_fns.hsa_executable_global_variable_define_fn
- (agent->executable, var->name, var->address);
-
- HSA_DEBUG ("Defining global variable: %s, address: %p\n", var->name,
- var->address);
-
- if (status != HSA_STATUS_SUCCESS)
- hsa_fatal ("Could not define a global variable in the HSA program",
- status);
- }
-
- module = module->next;
- }
-
- status = hsa_fns.hsa_executable_load_code_object_fn (agent->executable,
- agent->id,
- code_object, "");
- if (status != HSA_STATUS_SUCCESS)
- hsa_fatal ("Could not add a code object to the HSA executable", status);
- status = hsa_fns.hsa_executable_freeze_fn (agent->executable, "");
- if (status != HSA_STATUS_SUCCESS)
- hsa_fatal ("Could not freeze the HSA executable", status);
-
- HSA_DEBUG ("Froze HSA executable with the finalized code object\n");
-
- /* If all goes good, jump to final. */
- goto final;
-
-failure:
- agent->prog_finalized_error = true;
-
-final:
- agent->prog_finalized = true;
-
- if (pthread_mutex_unlock (&agent->prog_mutex))
- GOMP_PLUGIN_fatal ("Could not unlock an HSA agent program mutex");
-}
-
-/* Create kernel dispatch data structure for given KERNEL. */
-
-static struct GOMP_hsa_kernel_dispatch *
-create_single_kernel_dispatch (struct kernel_info *kernel,
- unsigned omp_data_size)
-{
- struct agent_info *agent = kernel->agent;
- struct GOMP_hsa_kernel_dispatch *shadow
- = GOMP_PLUGIN_malloc_cleared (sizeof (struct GOMP_hsa_kernel_dispatch));
-
- shadow->queue = agent->command_q;
- shadow->omp_data_memory
- = omp_data_size > 0 ? GOMP_PLUGIN_malloc (omp_data_size) : NULL;
- unsigned dispatch_count = kernel->dependencies_count;
- shadow->kernel_dispatch_count = dispatch_count;
-
- shadow->children_dispatches
- = GOMP_PLUGIN_malloc (dispatch_count * sizeof (shadow));
-
- shadow->object = kernel->object;
-
- hsa_signal_t sync_signal;
- hsa_status_t status = hsa_fns.hsa_signal_create_fn (1, 0, NULL, &sync_signal);
- if (status != HSA_STATUS_SUCCESS)
- hsa_fatal ("Error creating the HSA sync signal", status);
-
- shadow->signal = sync_signal.handle;
- shadow->private_segment_size = kernel->private_segment_size;
- shadow->group_segment_size = kernel->group_segment_size;
-
- status
- = hsa_fns.hsa_memory_allocate_fn (agent->kernarg_region,
- kernel->kernarg_segment_size,
- &shadow->kernarg_address);
- if (status != HSA_STATUS_SUCCESS)
- hsa_fatal ("Could not allocate memory for HSA kernel arguments", status);
-
- return shadow;
-}
-
-/* Release data structure created for a kernel dispatch in SHADOW argument. */
-
-static void
-release_kernel_dispatch (struct GOMP_hsa_kernel_dispatch *shadow)
-{
- HSA_DEBUG ("Released kernel dispatch: %p has value: %" PRIu64 " (%p)\n",
- shadow, (print_uint64_t) shadow->debug,
- (void *) (uintptr_t) shadow->debug);
-
- hsa_fns.hsa_memory_free_fn (shadow->kernarg_address);
-
- hsa_signal_t s;
- s.handle = shadow->signal;
- hsa_fns.hsa_signal_destroy_fn (s);
-
- free (shadow->omp_data_memory);
-
- for (unsigned i = 0; i < shadow->kernel_dispatch_count; i++)
- release_kernel_dispatch (shadow->children_dispatches[i]);
-
- free (shadow->children_dispatches);
- free (shadow);
-}
-
-/* Initialize a KERNEL without its dependencies. MAX_OMP_DATA_SIZE is used
- to calculate maximum necessary memory for OMP data allocation. */
-
-static void
-init_single_kernel (struct kernel_info *kernel, unsigned *max_omp_data_size)
-{
- hsa_status_t status;
- struct agent_info *agent = kernel->agent;
- hsa_executable_symbol_t kernel_symbol;
- status = hsa_fns.hsa_executable_get_symbol_fn (agent->executable, NULL,
- kernel->name, agent->id,
- 0, &kernel_symbol);
- if (status != HSA_STATUS_SUCCESS)
- {
- hsa_warn ("Could not find symbol for kernel in the code object", status);
- goto failure;
- }
- HSA_DEBUG ("Located kernel %s\n", kernel->name);
- status = hsa_fns.hsa_executable_symbol_get_info_fn
- (kernel_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &kernel->object);
- if (status != HSA_STATUS_SUCCESS)
- hsa_fatal ("Could not extract a kernel object from its symbol", status);
- status = hsa_fns.hsa_executable_symbol_get_info_fn
- (kernel_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE,
- &kernel->kernarg_segment_size);
- if (status != HSA_STATUS_SUCCESS)
- hsa_fatal ("Could not get info about kernel argument size", status);
- status = hsa_fns.hsa_executable_symbol_get_info_fn
- (kernel_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE,
- &kernel->group_segment_size);
- if (status != HSA_STATUS_SUCCESS)
- hsa_fatal ("Could not get info about kernel group segment size", status);
- status = hsa_fns.hsa_executable_symbol_get_info_fn
- (kernel_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE,
- &kernel->private_segment_size);
- if (status != HSA_STATUS_SUCCESS)
- hsa_fatal ("Could not get info about kernel private segment size",
- status);
-
- HSA_DEBUG ("Kernel structure for %s fully initialized with "
- "following segment sizes: \n", kernel->name);
- HSA_DEBUG (" group_segment_size: %u\n",
- (unsigned) kernel->group_segment_size);
- HSA_DEBUG (" private_segment_size: %u\n",
- (unsigned) kernel->private_segment_size);
- HSA_DEBUG (" kernarg_segment_size: %u\n",
- (unsigned) kernel->kernarg_segment_size);
- HSA_DEBUG (" omp_data_size: %u\n", kernel->omp_data_size);
- HSA_DEBUG (" gridified_kernel_p: %u\n", kernel->gridified_kernel_p);
-
- if (kernel->omp_data_size > *max_omp_data_size)
- *max_omp_data_size = kernel->omp_data_size;
-
- for (unsigned i = 0; i < kernel->dependencies_count; i++)
- {
- struct kernel_info *dependency
- = get_kernel_for_agent (agent, kernel->dependencies[i]);
-
- if (dependency == NULL)
- {
- HSA_DEBUG ("Could not find a dependency for a kernel: %s, "
- "dependency name: %s\n", kernel->name,
- kernel->dependencies[i]);
- goto failure;
- }
-
- if (dependency->dependencies_count > 0)
- {
- HSA_DEBUG ("HSA does not allow kernel dispatching code with "
- "a depth bigger than one\n");
- goto failure;
- }
-
- init_single_kernel (dependency, max_omp_data_size);
- }
-
- return;
-
-failure:
- kernel->initialization_failed = true;
-}
-
-/* Indent stream F by INDENT spaces. */
-
-static void
-indent_stream (FILE *f, unsigned indent)
-{
- fprintf (f, "%*s", indent, "");
-}
-
-/* Dump kernel DISPATCH data structure and indent it by INDENT spaces. */
-
-static void
-print_kernel_dispatch (struct GOMP_hsa_kernel_dispatch *dispatch, unsigned indent)
-{
- indent_stream (stderr, indent);
- fprintf (stderr, "this: %p\n", dispatch);
- indent_stream (stderr, indent);
- fprintf (stderr, "queue: %p\n", dispatch->queue);
- indent_stream (stderr, indent);
- fprintf (stderr, "omp_data_memory: %p\n", dispatch->omp_data_memory);
- indent_stream (stderr, indent);
- fprintf (stderr, "kernarg_address: %p\n", dispatch->kernarg_address);
- indent_stream (stderr, indent);
- fprintf (stderr, "object: %" PRIu64 "\n", (print_uint64_t) dispatch->object);
- indent_stream (stderr, indent);
- fprintf (stderr, "signal: %" PRIu64 "\n", (print_uint64_t) dispatch->signal);
- indent_stream (stderr, indent);
- fprintf (stderr, "private_segment_size: %u\n",
- dispatch->private_segment_size);
- indent_stream (stderr, indent);
- fprintf (stderr, "group_segment_size: %u\n",
- dispatch->group_segment_size);
- indent_stream (stderr, indent);
- fprintf (stderr, "children dispatches: %" PRIu64 "\n",
- (print_uint64_t) dispatch->kernel_dispatch_count);
- indent_stream (stderr, indent);
- fprintf (stderr, "omp_num_threads: %u\n",
- dispatch->omp_num_threads);
- fprintf (stderr, "\n");
-
- for (unsigned i = 0; i < dispatch->kernel_dispatch_count; i++)
- print_kernel_dispatch (dispatch->children_dispatches[i], indent + 2);
-}
-
-/* Create kernel dispatch data structure for a KERNEL and all its
- dependencies. */
-
-static struct GOMP_hsa_kernel_dispatch *
-create_kernel_dispatch (struct kernel_info *kernel, unsigned omp_data_size)
-{
- struct GOMP_hsa_kernel_dispatch *shadow
- = create_single_kernel_dispatch (kernel, omp_data_size);
- shadow->omp_num_threads = 64;
- shadow->debug = 0;
- shadow->omp_level = kernel->gridified_kernel_p ? 1 : 0;
-
- /* Create kernel dispatch data structures. We do not allow to have
- a kernel dispatch with depth bigger than one. */
- for (unsigned i = 0; i < kernel->dependencies_count; i++)
- {
- struct kernel_info *dependency
- = get_kernel_for_agent (kernel->agent, kernel->dependencies[i]);
- shadow->children_dispatches[i]
- = create_single_kernel_dispatch (dependency, omp_data_size);
- shadow->children_dispatches[i]->queue
- = kernel->agent->kernel_dispatch_command_q;
- shadow->children_dispatches[i]->omp_level = 1;
- }
-
- return shadow;
-}
-
-/* Do all the work that is necessary before running KERNEL for the first time.
- The function assumes the program has been created, finalized and frozen by
- create_and_finalize_hsa_program. */
-
-static void
-init_kernel (struct kernel_info *kernel)
-{
- if (pthread_mutex_lock (&kernel->init_mutex))
- GOMP_PLUGIN_fatal ("Could not lock an HSA kernel initialization mutex");
- if (kernel->initialized)
- {
- if (pthread_mutex_unlock (&kernel->init_mutex))
- GOMP_PLUGIN_fatal ("Could not unlock an HSA kernel initialization "
- "mutex");
-
- return;
- }
-
- /* Precomputed maximum size of OMP data necessary for a kernel from kernel
- dispatch operation. */
- init_single_kernel (kernel, &kernel->max_omp_data_size);
-
- if (!kernel->initialization_failed)
- HSA_DEBUG ("\n");
-
- kernel->initialized = true;
- if (pthread_mutex_unlock (&kernel->init_mutex))
- GOMP_PLUGIN_fatal ("Could not unlock an HSA kernel initialization "
- "mutex");
-}
-
-/* Parse the target attributes INPUT provided by the compiler and return true
- if we should run anything all. If INPUT is NULL, fill DEF with default
- values, then store INPUT or DEF into *RESULT. */
-
-static bool
-parse_target_attributes (void **input,
- struct GOMP_kernel_launch_attributes *def,
- struct GOMP_kernel_launch_attributes **result)
-{
- if (!input)
- GOMP_PLUGIN_fatal ("No target arguments provided");
-
- bool attrs_found = false;
- while (*input)
- {
- uintptr_t id = (uintptr_t) *input;
- if ((id & GOMP_TARGET_ARG_DEVICE_MASK) == GOMP_DEVICE_HSA
- && ((id & GOMP_TARGET_ARG_ID_MASK)
- == GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES))
- {
- input++;
- attrs_found = true;
- break;
- }
-
- if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
- input++;
- input++;
- }
-
- if (!attrs_found)
- {
- def->ndim = 1;
- def->gdims[0] = 1;
- def->gdims[1] = 1;
- def->gdims[2] = 1;
- def->wdims[0] = 1;
- def->wdims[1] = 1;
- def->wdims[2] = 1;
- *result = def;
- HSA_DEBUG ("GOMP_OFFLOAD_run called with no launch attributes\n");
- return true;
- }
-
- struct GOMP_kernel_launch_attributes *kla;
- kla = (struct GOMP_kernel_launch_attributes *) *input;
- *result = kla;
- if (kla->ndim == 0 || kla->ndim > 3)
- GOMP_PLUGIN_fatal ("Invalid number of dimensions (%u)", kla->ndim);
-
- HSA_DEBUG ("GOMP_OFFLOAD_run called with %u dimensions:\n", kla->ndim);
- unsigned i;
- for (i = 0; i < kla->ndim; i++)
- {
- HSA_DEBUG (" Dimension %u: grid size %u and group size %u\n", i,
- kla->gdims[i], kla->wdims[i]);
- if (kla->gdims[i] == 0)
- return false;
- }
- return true;
-}
-
-/* Return the group size given the requested GROUP size, GRID size and number
- of grid dimensions NDIM. */
-
-static uint32_t
-get_group_size (uint32_t ndim, uint32_t grid, uint32_t group)
-{
- if (group == 0)
- {
- /* TODO: Provide a default via environment or device characteristics. */
- if (ndim == 1)
- group = 64;
- else if (ndim == 2)
- group = 8;
- else
- group = 4;
- }
-
- if (group > grid)
- group = grid;
- return group;
-}
-
-/* Return true if the HSA runtime can run function FN_PTR. */
-
-bool
-GOMP_OFFLOAD_can_run (void *fn_ptr)
-{
- struct kernel_info *kernel = (struct kernel_info *) fn_ptr;
- struct agent_info *agent = kernel->agent;
- create_and_finalize_hsa_program (agent);
-
- if (agent->prog_finalized_error)
- goto failure;
-
- init_kernel (kernel);
- if (kernel->initialization_failed)
- goto failure;
-
- return true;
-
-failure:
- if (suppress_host_fallback)
- GOMP_PLUGIN_fatal ("HSA host fallback has been suppressed");
- HSA_DEBUG ("HSA target cannot be launched, doing a host fallback\n");
- return false;
-}
-
-/* Atomically store pair of uint16_t values (HEADER and REST) to a PACKET. */
-
-void
-packet_store_release (uint32_t* packet, uint16_t header, uint16_t rest)
-{
- __atomic_store_n (packet, header | (rest << 16), __ATOMIC_RELEASE);
-}
-
-/* Run KERNEL on its agent, pass VARS to it as arguments and take
- launchattributes from KLA. */
-
-void
-run_kernel (struct kernel_info *kernel, void *vars,
- struct GOMP_kernel_launch_attributes *kla)
-{
- struct agent_info *agent = kernel->agent;
- if (pthread_rwlock_rdlock (&agent->modules_rwlock))
- GOMP_PLUGIN_fatal ("Unable to read-lock an HSA agent rwlock");
-
- if (!agent->initialized)
- GOMP_PLUGIN_fatal ("Agent must be initialized");
-
- if (!kernel->initialized)
- GOMP_PLUGIN_fatal ("Called kernel must be initialized");
-
- struct GOMP_hsa_kernel_dispatch *shadow
- = create_kernel_dispatch (kernel, kernel->max_omp_data_size);
-
- if (debug)
- {
- fprintf (stderr, "\nKernel has following dependencies:\n");
- print_kernel_dispatch (shadow, 2);
- }
-
- uint64_t index
- = hsa_fns.hsa_queue_add_write_index_release_fn (agent->command_q, 1);
- HSA_DEBUG ("Got AQL index %llu\n", (long long int) index);
-
- /* Wait until the queue is not full before writing the packet. */
- while (index - hsa_fns.hsa_queue_load_read_index_acquire_fn (agent->command_q)
- >= agent->command_q->size)
- ;
-
- hsa_kernel_dispatch_packet_t *packet;
- packet = ((hsa_kernel_dispatch_packet_t *) agent->command_q->base_address)
- + index % agent->command_q->size;
-
- memset (((uint8_t *) packet) + 4, 0, sizeof (*packet) - 4);
- packet->grid_size_x = kla->gdims[0];
- packet->workgroup_size_x = get_group_size (kla->ndim, kla->gdims[0],
- kla->wdims[0]);
-
- if (kla->ndim >= 2)
- {
- packet->grid_size_y = kla->gdims[1];
- packet->workgroup_size_y = get_group_size (kla->ndim, kla->gdims[1],
- kla->wdims[1]);
- }
- else
- {
- packet->grid_size_y = 1;
- packet->workgroup_size_y = 1;
- }
-
- if (kla->ndim == 3)
- {
- packet->grid_size_z = kla->gdims[2];
- packet->workgroup_size_z = get_group_size (kla->ndim, kla->gdims[2],
- kla->wdims[2]);
- }
- else
- {
- packet->grid_size_z = 1;
- packet->workgroup_size_z = 1;
- }
-
- packet->private_segment_size = kernel->private_segment_size;
- packet->group_segment_size = kernel->group_segment_size;
- packet->kernel_object = kernel->object;
- packet->kernarg_address = shadow->kernarg_address;
- hsa_signal_t s;
- s.handle = shadow->signal;
- packet->completion_signal = s;
- hsa_fns.hsa_signal_store_relaxed_fn (s, 1);
- memcpy (shadow->kernarg_address, &vars, sizeof (vars));
-
- /* PR hsa/70337. */
- size_t vars_size = sizeof (vars);
- if (kernel->kernarg_segment_size > vars_size)
- {
- if (kernel->kernarg_segment_size != vars_size
- + sizeof (struct hsa_kernel_runtime *))
- GOMP_PLUGIN_fatal ("Kernel segment size has an unexpected value");
- memcpy (packet->kernarg_address + vars_size, &shadow,
- sizeof (struct hsa_kernel_runtime *));
- }
-
- HSA_DEBUG ("Copying kernel runtime pointer to kernarg_address\n");
-
- uint16_t header;
- header = HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE;
- header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE;
- header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE;
-
- HSA_DEBUG ("Going to dispatch kernel %s\n", kernel->name);
-
- packet_store_release ((uint32_t *) packet, header,
- (uint16_t) kla->ndim << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS);
-
- hsa_fns.hsa_signal_store_release_fn (agent->command_q->doorbell_signal,
- index);
-
- /* TODO: GPU agents in Carrizo APUs cannot properly update L2 cache for
- signal wait and signal load operations on their own and we need to
- periodically call the hsa_signal_load_acquire on completion signals of
- children kernels in the CPU to make that happen. As soon the
- limitation will be resolved, this workaround can be removed. */
-
- HSA_DEBUG ("Kernel dispatched, waiting for completion\n");
-
- /* Root signal waits with 1ms timeout. */
- while (hsa_fns.hsa_signal_wait_acquire_fn (s, HSA_SIGNAL_CONDITION_LT, 1,
- 1000 * 1000,
- HSA_WAIT_STATE_BLOCKED) != 0)
- for (unsigned i = 0; i < shadow->kernel_dispatch_count; i++)
- {
- hsa_signal_t child_s;
- child_s.handle = shadow->children_dispatches[i]->signal;
-
- HSA_DEBUG ("Waiting for children completion signal: %" PRIu64 "\n",
- (print_uint64_t) shadow->children_dispatches[i]->signal);
- hsa_fns.hsa_signal_load_acquire_fn (child_s);
- }
-
- release_kernel_dispatch (shadow);
-
- if (pthread_rwlock_unlock (&agent->modules_rwlock))
- GOMP_PLUGIN_fatal ("Unable to unlock an HSA agent rwlock");
-}
-
-/* Part of the libgomp plugin interface. Run a kernel on device N (the number
- is actually ignored, we assume the FN_PTR has been mapped using the correct
- device) and pass it an array of pointers in VARS as a parameter. The kernel
- is identified by FN_PTR which must point to a kernel_info structure. */
-
-void
-GOMP_OFFLOAD_run (int n __attribute__((unused)),
- void *fn_ptr, void *vars, void **args)
-{
- struct kernel_info *kernel = (struct kernel_info *) fn_ptr;
- struct GOMP_kernel_launch_attributes def;
- struct GOMP_kernel_launch_attributes *kla;
- if (!parse_target_attributes (args, &def, &kla))
- {
- HSA_DEBUG ("Will not run HSA kernel because the grid size is zero\n");
- return;
- }
- run_kernel (kernel, vars, kla);
-}
-
-/* Information to be passed to a thread running a kernel asycnronously. */
-
-struct async_run_info
-{
- int device;
- void *tgt_fn;
- void *tgt_vars;
- void **args;
- void *async_data;
-};
-
-/* Thread routine to run a kernel asynchronously. */
-
-static void *
-run_kernel_asynchronously (void *thread_arg)
-{
- struct async_run_info *info = (struct async_run_info *) thread_arg;
- int device = info->device;
- void *tgt_fn = info->tgt_fn;
- void *tgt_vars = info->tgt_vars;
- void **args = info->args;
- void *async_data = info->async_data;
-
- free (info);
- GOMP_OFFLOAD_run (device, tgt_fn, tgt_vars, args);
- GOMP_PLUGIN_target_task_completion (async_data);
- return NULL;
-}
-
-/* Part of the libgomp plugin interface. Run a kernel like GOMP_OFFLOAD_run
- does, but asynchronously and call GOMP_PLUGIN_target_task_completion when it
- has finished. */
-
-void
-GOMP_OFFLOAD_async_run (int device, void *tgt_fn, void *tgt_vars,
- void **args, void *async_data)
-{
- pthread_t pt;
- struct async_run_info *info;
- HSA_DEBUG ("GOMP_OFFLOAD_async_run invoked\n");
- info = GOMP_PLUGIN_malloc (sizeof (struct async_run_info));
-
- info->device = device;
- info->tgt_fn = tgt_fn;
- info->tgt_vars = tgt_vars;
- info->args = args;
- info->async_data = async_data;
-
- int err = pthread_create (&pt, NULL, &run_kernel_asynchronously, info);
- if (err != 0)
- GOMP_PLUGIN_fatal ("HSA asynchronous thread creation failed: %s",
- strerror (err));
- err = pthread_detach (pt);
- if (err != 0)
- GOMP_PLUGIN_fatal ("Failed to detach a thread to run HSA kernel "
- "asynchronously: %s", strerror (err));
-}
-
-/* Deinitialize all information associated with MODULE and kernels within
- it. Return TRUE on success. */
-
-static bool
-destroy_module (struct module_info *module)
-{
- int i;
- for (i = 0; i < module->kernel_count; i++)
- if (pthread_mutex_destroy (&module->kernels[i].init_mutex))
- {
- GOMP_PLUGIN_error ("Failed to destroy an HSA kernel initialization "
- "mutex");
- return false;
- }
- return true;
-}
-
-/* Part of the libgomp plugin interface. Unload BRIG module described by
- struct brig_image_desc in TARGET_DATA from agent number N. Return
- TRUE on success. */
-
-bool
-GOMP_OFFLOAD_unload_image (int n, unsigned version, const void *target_data)
-{
- if (GOMP_VERSION_DEV (version) > GOMP_VERSION_HSA)
- {
- GOMP_PLUGIN_error ("Offload data incompatible with HSA plugin"
- " (expected %u, received %u)",
- GOMP_VERSION_HSA, GOMP_VERSION_DEV (version));
- return false;
- }
-
- struct agent_info *agent;
- agent = get_agent_info (n);
- if (!agent)
- return false;
-
- if (pthread_rwlock_wrlock (&agent->modules_rwlock))
- {
- GOMP_PLUGIN_error ("Unable to write-lock an HSA agent rwlock");
- return false;
- }
- struct module_info *module = agent->first_module;
- while (module)
- {
- if (module->image_desc == target_data)
- break;
- module = module->next;
- }
- if (!module)
- {
- GOMP_PLUGIN_error ("Attempt to unload an image that has never been "
- "loaded before");
- return false;
- }
-
- remove_module_from_agent (agent, module);
- if (!destroy_module (module))
- return false;
- free (module);
- if (!destroy_hsa_program (agent))
- return false;
- if (pthread_rwlock_unlock (&agent->modules_rwlock))
- {
- GOMP_PLUGIN_error ("Unable to unlock an HSA agent rwlock");
- return false;
- }
- return true;
-}
-
-/* Part of the libgomp plugin interface. Deinitialize all information and
- status associated with agent number N. We do not attempt any
- synchronization, assuming the user and libgomp will not attempt
- deinitialization of a device that is in any way being used at the same
- time. Return TRUE on success. */
-
-bool
-GOMP_OFFLOAD_fini_device (int n)
-{
- struct agent_info *agent = get_agent_info (n);
- if (!agent)
- return false;
-
- if (!agent->initialized)
- return true;
-
- struct module_info *next_module = agent->first_module;
- while (next_module)
- {
- struct module_info *module = next_module;
- next_module = module->next;
- if (!destroy_module (module))
- return false;
- free (module);
- }
- agent->first_module = NULL;
- if (!destroy_hsa_program (agent))
- return false;
-
- release_agent_shared_libraries (agent);
-
- hsa_status_t status = hsa_fns.hsa_queue_destroy_fn (agent->command_q);
- if (status != HSA_STATUS_SUCCESS)
- return hsa_error ("Error destroying command queue", status);
- status = hsa_fns.hsa_queue_destroy_fn (agent->kernel_dispatch_command_q);
- if (status != HSA_STATUS_SUCCESS)
- return hsa_error ("Error destroying kernel dispatch command queue", status);
- if (pthread_mutex_destroy (&agent->prog_mutex))
- {
- GOMP_PLUGIN_error ("Failed to destroy an HSA agent program mutex");
- return false;
- }
- if (pthread_rwlock_destroy (&agent->modules_rwlock))
- {
- GOMP_PLUGIN_error ("Failed to destroy an HSA agent rwlock");
- return false;
- }
- agent->initialized = false;
- return true;
-}
-
-/* Part of the libgomp plugin interface. Not implemented as it is not required
- for HSA. */
-
-void *
-GOMP_OFFLOAD_alloc (int ord, size_t size)
-{
- GOMP_PLUGIN_error ("HSA GOMP_OFFLOAD_alloc is not implemented because "
- "it should never be called");
- return NULL;
-}
-
-/* Part of the libgomp plugin interface. Not implemented as it is not required
- for HSA. */
-
-bool
-GOMP_OFFLOAD_free (int ord, void *ptr)
-{
- GOMP_PLUGIN_error ("HSA GOMP_OFFLOAD_free is not implemented because "
- "it should never be called");
- return false;
-}
-
-/* Part of the libgomp plugin interface. Not implemented as it is not required
- for HSA. */
-
-bool
-GOMP_OFFLOAD_dev2host (int ord, void *dst, const void *src, size_t n)
-{
- GOMP_PLUGIN_error ("HSA GOMP_OFFLOAD_dev2host is not implemented because "
- "it should never be called");
- return false;
-}
-
-/* Part of the libgomp plugin interface. Not implemented as it is not required
- for HSA. */
-
-bool
-GOMP_OFFLOAD_host2dev (int ord, void *dst, const void *src, size_t n)
-{
- GOMP_PLUGIN_error ("HSA GOMP_OFFLOAD_host2dev is not implemented because "
- "it should never be called");
- return false;
-}
-
-/* Part of the libgomp plugin interface. Not implemented as it is not required
- for HSA. */
-
-bool
-GOMP_OFFLOAD_dev2dev (int ord, void *dst, const void *src, size_t n)
-{
- GOMP_PLUGIN_error ("HSA GOMP_OFFLOAD_dev2dev is not implemented because "
- "it should never be called");
- return false;
-}