diff options
Diffstat (limited to 'libgomp/plugin/plugin-gcn.c')
-rw-r--r-- | libgomp/plugin/plugin-gcn.c | 464 |
1 files changed, 462 insertions, 2 deletions
diff --git a/libgomp/plugin/plugin-gcn.c b/libgomp/plugin/plugin-gcn.c index 5c65778..4b42a59 100644 --- a/libgomp/plugin/plugin-gcn.c +++ b/libgomp/plugin/plugin-gcn.c @@ -41,7 +41,9 @@ #include <hsa_ext_amd.h> #include <dlfcn.h> #include <signal.h> +#define _LIBGOMP_PLUGIN_INCLUDE 1 #include "libgomp-plugin.h" +#undef _LIBGOMP_PLUGIN_INCLUDE #include "config/gcn/libgomp-gcn.h" /* For struct output. */ #include "gomp-constants.h" #include <elf.h> @@ -190,6 +192,8 @@ struct hsa_runtime_fn_info uint64_t (*hsa_queue_add_write_index_release_fn) (const hsa_queue_t *queue, uint64_t value); uint64_t (*hsa_queue_load_read_index_acquire_fn) (const hsa_queue_t *queue); + uint64_t (*hsa_queue_load_read_index_relaxed_fn) (const hsa_queue_t *queue); + uint64_t (*hsa_queue_load_write_index_relaxed_fn) (const hsa_queue_t *queue); void (*hsa_signal_store_relaxed_fn) (hsa_signal_t signal, hsa_signal_value_t value); void (*hsa_signal_store_release_fn) (hsa_signal_t signal, @@ -216,6 +220,25 @@ struct hsa_runtime_fn_info const hsa_signal_t *dep_signals, hsa_signal_t completion_signal); }; +/* As an HIP runtime is dlopened, following structure defines function + pointers utilized by the interop feature of this plugin. + Add suffient type declarations to get this work. */ + +typedef int hipError_t; /* Actually an enum; 0 == success. */ +typedef void* hipCtx_t; +struct hipStream_s; +typedef struct hipStream_s* hipStream_t; + +struct hip_runtime_fn_info +{ + hipError_t (*hipStreamCreate_fn) (hipStream_t *); + hipError_t (*hipStreamDestroy_fn) (hipStream_t); + hipError_t (*hipStreamSynchronize_fn) (hipStream_t); + hipError_t (*hipCtxGetCurrent_fn) (hipCtx_t *ctx); + hipError_t (*hipSetDevice_fn) (int deviceId); + hipError_t (*hipGetDevice_fn) (int *deviceId); +}; + /* Structure describing the run-time and grid properties of an HSA kernel lauch. This needs to match the format passed to GOMP_OFFLOAD_run. */ @@ -553,9 +576,11 @@ struct hsa_context_info static struct hsa_context_info hsa_context; /* HSA runtime functions that are initialized in init_hsa_context. */ - static struct hsa_runtime_fn_info hsa_fns; +/* HIP runtime functions that are initialized in init_hip_runtime_functions. */ +static struct hip_runtime_fn_info hip_fns; + /* Heap space, allocated target-side, provided for use of newlib malloc. Each module should have it's own heap allocated. Beware that heap usage increases with OpenMP teams. See also arenas. */ @@ -578,10 +603,11 @@ static bool debug; static bool suppress_host_fallback; -/* Flag to locate HSA runtime shared library that is dlopened +/* Flag to locate HSA and HIP runtime shared libraries that are dlopened by this plug-in. */ static const char *hsa_runtime_lib; +static const char *hip_runtime_lib; /* Flag to decide if the runtime should support also CPU devices (can be a simulator). */ @@ -1068,6 +1094,10 @@ init_environment_variables (void) if (hsa_runtime_lib == NULL) hsa_runtime_lib = "libhsa-runtime64.so.1"; + hip_runtime_lib = secure_getenv ("HIP_RUNTIME_LIB"); + if (hip_runtime_lib == NULL) + hip_runtime_lib = "libamdhip64.so"; + support_cpu_devices = secure_getenv ("GCN_SUPPORT_CPU_DEVICES"); const char *x = secure_getenv ("GCN_NUM_TEAMS"); @@ -1418,6 +1448,8 @@ init_hsa_runtime_functions (void) DLSYM_FN (hsa_executable_iterate_symbols) DLSYM_FN (hsa_queue_add_write_index_release) DLSYM_FN (hsa_queue_load_read_index_acquire) + DLSYM_FN (hsa_queue_load_read_index_relaxed) + DLSYM_FN (hsa_queue_load_write_index_relaxed) DLSYM_FN (hsa_signal_wait_acquire) DLSYM_FN (hsa_signal_store_relaxed) DLSYM_FN (hsa_signal_store_release) @@ -4365,6 +4397,434 @@ unlock: return retval; } + +static bool +init_hip_runtime_functions (void) +{ + bool inited = false; + if (inited) + return hip_fns.hipStreamCreate_fn != NULL; + inited = true; + + void *handle = dlopen (hip_runtime_lib, RTLD_LAZY); + if (handle == NULL) + return false; + +#define DLSYM_OPT_FN(function) \ + hip_fns.function##_fn = dlsym (handle, #function) + + DLSYM_OPT_FN (hipStreamCreate); + DLSYM_OPT_FN (hipStreamDestroy); + DLSYM_OPT_FN (hipStreamSynchronize); + DLSYM_OPT_FN (hipCtxGetCurrent); + DLSYM_OPT_FN (hipGetDevice); + DLSYM_OPT_FN (hipSetDevice); +#undef DLSYM_OPT_FN + + if (!hip_fns.hipStreamCreate_fn + || !hip_fns.hipStreamDestroy_fn + || !hip_fns.hipStreamSynchronize_fn + || !hip_fns.hipCtxGetCurrent_fn + || !hip_fns.hipGetDevice_fn + || !hip_fns.hipSetDevice_fn) + { + hip_fns.hipStreamCreate_fn = NULL; + return false; + } + + return true; +} + + +void +GOMP_OFFLOAD_interop (struct interop_obj_t *obj, int ord, + enum gomp_interop_flag action, bool targetsync, + const char *prefer_type) +{ + if ((action == gomp_interop_flag_destroy || action == gomp_interop_flag_use) + && !obj->stream) + return; + if ((action == gomp_interop_flag_destroy || action == gomp_interop_flag_use) + && obj->fr == omp_ifr_hsa) + { + /* Wait until the queue is is empty. */ + bool is_empty; + uint64_t read_index, write_index; + hsa_queue_t *queue = (hsa_queue_t *) obj->stream; + do + { + read_index = hsa_fns.hsa_queue_load_read_index_relaxed_fn (queue); + write_index = hsa_fns.hsa_queue_load_write_index_relaxed_fn (queue); + is_empty = (read_index == write_index); + } + while (!is_empty); + + if (action == gomp_interop_flag_destroy) + { + hsa_status_t status = hsa_fns.hsa_queue_destroy_fn (queue); + if (status != HSA_STATUS_SUCCESS) + hsa_fatal ("Error destroying interop hsa_queue_t", status); + } + return; + } + if (action == gomp_interop_flag_destroy) + { + hipError_t err = hip_fns.hipStreamDestroy_fn ((hipStream_t) obj->stream); + if (err != 0) + GOMP_PLUGIN_fatal ("Error destroying interop hipStream_t: %d", err); + return; + } + if (action == gomp_interop_flag_use) + { + hipError_t err + = hip_fns.hipStreamSynchronize_fn ((hipStream_t) obj->stream); + if (err != 0) + GOMP_PLUGIN_fatal ("Error synchronizing interop hipStream_t: %d", err); + return; + } + + bool fr_set = false; + + /* Check for the preferred type; cf. parser in C/C++/Fortran or + dump_omp_init_prefer_type for the format. + Accept the first '{...}' block that specifies a 'fr' that we support. + Currently, no 'attr(...)' are supported. */ + if (prefer_type) + while (prefer_type[0] == (char) GOMP_INTEROP_IFR_SEPARATOR) + { + /* '{' item block starts. */ + prefer_type++; + /* 'fr(...)' block */ + while (prefer_type[0] != (char) GOMP_INTEROP_IFR_SEPARATOR) + { + omp_interop_fr_t fr = (omp_interop_fr_t) prefer_type[0]; + if (fr == omp_ifr_hip) + { + obj->fr = omp_ifr_hip; + fr_set = true; + } + if (fr == omp_ifr_hsa) + { + obj->fr = omp_ifr_hsa; + fr_set = true; + } + prefer_type++; + } + prefer_type++; + /* 'attr(...)' block */ + while (prefer_type[0] != '\0') + { + /* const char *attr = prefer_type; */ + prefer_type += strlen (prefer_type) + 1; + } + prefer_type++; + /* end of '}'. */ + if (fr_set) + break; + } + + /* Prefer HIP, use HSA as fallback. The warning is only printed if GCN_DEBUG + is set and does not distinguishes between on prefer_type or hip prefer_type + nor whether a later/lower preference also specifies 'hsa'. + The assumption is that the user code handles HSA gracefully, but likely + just by falling back to the host version. On the other hand, have_hip is + likely true if HSA is available. */ + if (!fr_set || obj->fr == omp_ifr_hip) + { + bool have_hip = init_hip_runtime_functions (); + if (have_hip) + obj->fr = omp_ifr_hip; + else + { + GCN_WARNING ("interop object requested, using HSA instead of HIP " + "as %s could not be loaded", hip_runtime_lib); + obj->fr = omp_ifr_hsa; + } + } + + _Static_assert (sizeof (uint64_t) == sizeof (hsa_agent_t), + "sizeof (uint64_t) == sizeof (hsa_agent_t)"); + struct agent_info *agent = get_agent_info (ord); + obj->device_data = agent; + + if (targetsync && obj->fr == omp_ifr_hsa) + { + hsa_status_t status; + /* Queue size must be (for GPUs) a power of 2 >= 40, i.e. at least 64 and + maximally HSA_AGENT_INFO_QUEUE_MAX_SIZE. Arbitrary choice: */ + uint32_t queue_size = ASYNC_QUEUE_SIZE; + status = hsa_fns.hsa_queue_create_fn (agent->id, queue_size, + HSA_QUEUE_TYPE_MULTI, + NULL, NULL, UINT32_MAX, UINT32_MAX, + (hsa_queue_t **) &obj->stream); + if (status != HSA_STATUS_SUCCESS) + hsa_fatal ("Error creating interop hsa_queue_t", status); + } + else if (targetsync) + { + hipError_t err; + int dev_curr; + err = hip_fns.hipGetDevice_fn (&dev_curr); + if (!err && ord != dev_curr) + err = hip_fns.hipSetDevice_fn (ord); + if (!err) + err = hip_fns.hipStreamCreate_fn ((hipStream_t *) &obj->stream); + if (!err && ord != dev_curr) + err = hip_fns.hipSetDevice_fn (dev_curr); + if (err != 0) + GOMP_PLUGIN_fatal ("Error creating interop hipStream_t: %d", err); + } +} + +intptr_t +GOMP_OFFLOAD_get_interop_int (struct interop_obj_t *obj, + omp_interop_property_t property_id, + omp_interop_rc_t *ret_code) +{ + if (obj->fr != omp_ifr_hip && obj->fr != omp_ifr_hsa) + { + if (ret_code) + *ret_code = omp_irc_no_value; /* Hmm. */ + return 0; + } + switch (property_id) + { + case omp_ipr_fr_id: + if (ret_code) + *ret_code = omp_irc_success; + return obj->fr; + case omp_ipr_fr_name: + if (ret_code) + *ret_code = omp_irc_type_str; + return 0; + case omp_ipr_vendor: + if (ret_code) + *ret_code = omp_irc_success; + return 1; /* amd */ + case omp_ipr_vendor_name: + if (ret_code) + *ret_code = omp_irc_type_str; + return 0; + case omp_ipr_device_num: + if (ret_code) + *ret_code = omp_irc_success; + return obj->device_num; + case omp_ipr_platform: + if (ret_code) + *ret_code = omp_irc_no_value; + return 0; + case omp_ipr_device: + if (obj->fr == omp_ifr_hsa) + { + if (ret_code) + *ret_code = omp_irc_type_ptr; + return 0; + } + if (ret_code) + *ret_code = omp_irc_success; + return ((struct agent_info *) obj->device_data)->device_id; + case omp_ipr_device_context: + if (ret_code && obj->fr == omp_ifr_hsa) + *ret_code = omp_irc_no_value; + else if (ret_code) + *ret_code = omp_irc_type_ptr; + return 0; + case omp_ipr_targetsync: + if (ret_code && !obj->stream) + *ret_code = omp_irc_no_value; + else if (ret_code) + *ret_code = omp_irc_type_ptr; + return 0; + default: + break; + } + __builtin_unreachable (); + return 0; +} + +void * +GOMP_OFFLOAD_get_interop_ptr (struct interop_obj_t *obj, + omp_interop_property_t property_id, + omp_interop_rc_t *ret_code) +{ + if (obj->fr != omp_ifr_hip && obj->fr != omp_ifr_hsa) + { + if (ret_code) + *ret_code = omp_irc_no_value; /* Hmm. */ + return 0; + } + switch (property_id) + { + case omp_ipr_fr_id: + if (ret_code) + *ret_code = omp_irc_type_int; + return NULL; + case omp_ipr_fr_name: + if (ret_code) + *ret_code = omp_irc_type_str; + return NULL; + case omp_ipr_vendor: + if (ret_code) + *ret_code = omp_irc_type_str; + return NULL; + case omp_ipr_vendor_name: + if (ret_code) + *ret_code = omp_irc_type_str; + return NULL; + case omp_ipr_device_num: + if (ret_code) + *ret_code = omp_irc_type_int; + return NULL; + case omp_ipr_platform: + if (ret_code) + *ret_code = omp_irc_no_value; + return NULL; + case omp_ipr_device: + if (obj->fr == omp_ifr_hsa) + { + if (ret_code) + *ret_code = omp_irc_success; + /* hsa_agent_t is an struct containing a single uint64_t. */ + return &((struct agent_info *) obj->device_data)->id; + } + else + { + if (ret_code) + *ret_code = omp_irc_type_int; + return NULL; + } + case omp_ipr_device_context: + if (obj->fr == omp_ifr_hsa) + { + if (ret_code) + *ret_code = omp_irc_no_value; + return NULL; + } + else + { + hipCtx_t ctx; + int dev_curr; + int dev = ((struct agent_info *) obj->device_data)->device_id; + hipError_t err; + err = hip_fns.hipGetDevice_fn (&dev_curr); + if (!err && dev != dev_curr) + err = hip_fns.hipSetDevice_fn (dev); + if (!err) + err = hip_fns.hipCtxGetCurrent_fn (&ctx); + if (!err && dev != dev_curr) + err = hip_fns.hipSetDevice_fn (dev_curr); + if (err) + GOMP_PLUGIN_fatal ("Error obtaining hipCtx_t for device %d: %d", + obj->device_num, err); + if (ret_code) + *ret_code = omp_irc_success; + return ctx; + } + case omp_ipr_targetsync: + if (!obj->stream) + { + if (ret_code) + *ret_code = omp_irc_no_value; + return NULL; + } + if (ret_code) + *ret_code = omp_irc_success; + return obj->stream; + default: + break; + } + __builtin_unreachable (); + return NULL; +} + +const char * +GOMP_OFFLOAD_get_interop_str (struct interop_obj_t *obj, + omp_interop_property_t property_id, + omp_interop_rc_t *ret_code) +{ + if (obj->fr != omp_ifr_hip && obj->fr != omp_ifr_hsa) + { + if (ret_code) + *ret_code = omp_irc_no_value; /* Hmm. */ + return 0; + } + switch (property_id) + { + case omp_ipr_fr_id: + if (ret_code) + *ret_code = omp_irc_type_int; + return NULL; + case omp_ipr_fr_name: + if (ret_code) + *ret_code = omp_irc_success; + if (obj->fr == omp_ifr_hip) + return "hip"; + if (obj->fr == omp_ifr_hsa) + return "hsa"; + case omp_ipr_vendor: + if (ret_code) + *ret_code = omp_irc_type_int; + return NULL; + case omp_ipr_vendor_name: + if (ret_code) + *ret_code = omp_irc_success; + return "amd"; + case omp_ipr_device_num: + if (ret_code) + *ret_code = omp_irc_type_int; + return NULL; + case omp_ipr_platform: + if (ret_code) + *ret_code = omp_irc_no_value; + return NULL; + case omp_ipr_device: + if (ret_code && obj->fr == omp_ifr_hsa) + *ret_code = omp_irc_type_ptr; + else if (ret_code) + *ret_code = omp_irc_type_int; + return NULL; + case omp_ipr_device_context: + if (ret_code && obj->fr == omp_ifr_hsa) + *ret_code = omp_irc_no_value; + else if (ret_code) + *ret_code = omp_irc_type_ptr; + return NULL; + case omp_ipr_targetsync: + if (ret_code && !obj->stream) + *ret_code = omp_irc_no_value; + else if (ret_code) + *ret_code = omp_irc_type_ptr; + return NULL; + default: + break; + } + __builtin_unreachable (); + return 0; +} + +const char * +GOMP_OFFLOAD_get_interop_type_desc (struct interop_obj_t *obj, + omp_interop_property_t property_id) +{ + _Static_assert (omp_ipr_targetsync == omp_ipr_first, + "omp_ipr_targetsync == omp_ipr_first"); + _Static_assert (omp_ipr_platform - omp_ipr_first + 1 == 4, + "omp_ipr_platform - omp_ipr_first + 1 == 4"); + static const char *desc_hip[] = {"N/A", /* platform */ + "hipDevice_t", /* device */ + "hipCtx_t", /* device_context */ + "hipStream_t"}; /* targetsync */ + static const char *desc_hsa[] = {"N/A", /* platform */ + "hsa_agent_t *", /* device */ + "N/A", /* device_context */ + "hsa_queue_t *"}; /* targetsync */ + if (obj->fr == omp_ifr_hip) + return desc_hip[omp_ipr_platform - property_id]; + else + return desc_hsa[omp_ipr_platform - property_id]; + return NULL; +} + /* }}} */ /* {{{ OpenMP Plugin API */ |