diff options
author | Ian Lance Taylor <iant@golang.org> | 2021-09-13 10:37:49 -0700 |
---|---|---|
committer | Ian Lance Taylor <iant@golang.org> | 2021-09-13 10:37:49 -0700 |
commit | e252b51ccde010cbd2a146485d8045103cd99533 (patch) | |
tree | e060f101cdc32bf5e520de8e5275db9d4236b74c /libgomp/plugin | |
parent | f10c7c4596dda99d2ee872c995ae4aeda65adbdf (diff) | |
parent | 104c05c5284b7822d770ee51a7d91946c7e56d50 (diff) | |
download | gcc-e252b51ccde010cbd2a146485d8045103cd99533.zip gcc-e252b51ccde010cbd2a146485d8045103cd99533.tar.gz gcc-e252b51ccde010cbd2a146485d8045103cd99533.tar.bz2 |
Merge from trunk revision 104c05c5284b7822d770ee51a7d91946c7e56d50.
Diffstat (limited to 'libgomp/plugin')
-rw-r--r-- | libgomp/plugin/configfrag.ac | 7 | ||||
-rw-r--r-- | libgomp/plugin/plugin-gcn.c | 67 | ||||
-rw-r--r-- | libgomp/plugin/plugin-nvptx.c | 25 |
3 files changed, 65 insertions, 34 deletions
diff --git a/libgomp/plugin/configfrag.ac b/libgomp/plugin/configfrag.ac index 1ab1777..f447def 100644 --- a/libgomp/plugin/configfrag.ac +++ b/libgomp/plugin/configfrag.ac @@ -272,10 +272,3 @@ AC_DEFINE_UNQUOTED([PLUGIN_NVPTX_DYNAMIC], [$PLUGIN_NVPTX_DYNAMIC], AM_CONDITIONAL([PLUGIN_GCN], [test $PLUGIN_GCN = 1]) AC_DEFINE_UNQUOTED([PLUGIN_GCN], [$PLUGIN_GCN], [Define to 1 if the GCN plugin is built, 0 if not.]) - -if test "$HSA_RUNTIME_LIB" != ""; then - HSA_RUNTIME_LIB="$HSA_RUNTIME_LIB/" -fi - -AC_DEFINE_UNQUOTED([HSA_RUNTIME_LIB], ["$HSA_RUNTIME_LIB"], - [Define path to HSA runtime.]) diff --git a/libgomp/plugin/plugin-gcn.c b/libgomp/plugin/plugin-gcn.c index 8e6af69..9e7377c 100644 --- a/libgomp/plugin/plugin-gcn.c +++ b/libgomp/plugin/plugin-gcn.c @@ -29,6 +29,7 @@ /* {{{ Includes and defines */ #include "config.h" +#include "symcat.h" #include <stdio.h> #include <stdlib.h> #include <string.h> @@ -292,7 +293,6 @@ struct copy_data void *dst; const void *src; size_t len; - bool free_src; struct goacc_asyncqueue *aq; }; @@ -1072,7 +1072,7 @@ init_environment_variables (void) hsa_runtime_lib = secure_getenv ("HSA_RUNTIME_LIB"); if (hsa_runtime_lib == NULL) - hsa_runtime_lib = HSA_RUNTIME_LIB "libhsa-runtime64.so.1"; + hsa_runtime_lib = "libhsa-runtime64.so.1"; support_cpu_devices = secure_getenv ("GCN_SUPPORT_CPU_DEVICES"); @@ -2914,8 +2914,6 @@ copy_data (void *data_) data->aq->agent->device_id, data->aq->id, data->len, data->src, data->dst); hsa_memory_copy_wrapper (data->dst, data->src, data->len); - if (data->free_src) - free ((void *) data->src); free (data); } @@ -2929,12 +2927,11 @@ gomp_offload_free (void *ptr) } /* Request an asynchronous data copy, to or from a device, on a given queue. - The event will be registered as a callback. If FREE_SRC is true - then the source data will be freed following the copy. */ + The event will be registered as a callback. */ static void queue_push_copy (struct goacc_asyncqueue *aq, void *dst, const void *src, - size_t len, bool free_src) + size_t len) { if (DEBUG_QUEUES) GCN_DEBUG ("queue_push_copy %d:%d: %zu bytes from (%p) to (%p)\n", @@ -2944,7 +2941,6 @@ queue_push_copy (struct goacc_asyncqueue *aq, void *dst, const void *src, data->dst = dst; data->src = src; data->len = len; - data->free_src = free_src; data->aq = aq; queue_push_callback (aq, copy_data, data); } @@ -3041,10 +3037,8 @@ gcn_exec (struct kernel_info *kernel, size_t mapnum, void **hostaddrs, problem size, so let's do a reasonable number of single-worker gangs. 64 gangs matches a typical Fiji device. */ - /* NOTE: Until support for middle-end worker partitioning is merged, use 1 - for the default number of workers. */ if (dims[0] == 0) dims[0] = get_cu_count (kernel->agent); /* Gangs. */ - if (dims[1] == 0) dims[1] = 1; /* Workers. */ + if (dims[1] == 0) dims[1] = 16; /* Workers. */ /* The incoming dimensions are expressed in terms of gangs, workers, and vectors. The HSA dimensions are expressed in terms of "work-items", @@ -3311,6 +3305,7 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data, struct kernel_info *kernel; int kernel_count = image_desc->kernel_count; unsigned var_count = image_desc->global_variable_count; + int other_count = 1; agent = get_agent_info (ord); if (!agent) @@ -3327,7 +3322,8 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data, GCN_DEBUG ("Encountered %d kernels in an image\n", kernel_count); GCN_DEBUG ("Encountered %u global variables in an image\n", var_count); - pair = GOMP_PLUGIN_malloc ((kernel_count + var_count - 2) + GCN_DEBUG ("Expect %d other variables in an image\n", other_count); + pair = GOMP_PLUGIN_malloc ((kernel_count + var_count + other_count - 2) * sizeof (struct addr_pair)); *target_table = pair; module = (struct module_info *) @@ -3402,6 +3398,37 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data, pair++; } + GCN_DEBUG ("Looking for variable %s\n", STRINGX (GOMP_DEVICE_NUM_VAR)); + + hsa_status_t status; + hsa_executable_symbol_t var_symbol; + status = hsa_fns.hsa_executable_get_symbol_fn (agent->executable, NULL, + STRINGX (GOMP_DEVICE_NUM_VAR), + agent->id, 0, &var_symbol); + if (status == HSA_STATUS_SUCCESS) + { + uint64_t device_num_varptr; + uint32_t device_num_varsize; + + status = hsa_fns.hsa_executable_symbol_get_info_fn + (var_symbol, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS, + &device_num_varptr); + if (status != HSA_STATUS_SUCCESS) + hsa_fatal ("Could not extract a variable from its symbol", status); + status = hsa_fns.hsa_executable_symbol_get_info_fn + (var_symbol, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_SIZE, + &device_num_varsize); + if (status != HSA_STATUS_SUCCESS) + hsa_fatal ("Could not extract a variable size from its symbol", status); + + pair->start = device_num_varptr; + pair->end = device_num_varptr + device_num_varsize; + } + else + /* The 'GOMP_DEVICE_NUM_VAR' variable was not in this image. */ + pair->start = pair->end = 0; + pair++; + /* Ensure that constructors are run first. */ struct GOMP_kernel_launch_attributes kla = { 3, @@ -3424,7 +3451,7 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data, if (module->fini_array_func) kernel_count--; - return kernel_count + var_count; + return kernel_count + var_count + other_count; } /* Unload GCN object-code module described by struct gcn_image_desc in @@ -3647,7 +3674,7 @@ GOMP_OFFLOAD_dev2dev (int device, void *dst, const void *src, size_t n) { struct agent_info *agent = get_agent_info (device); maybe_init_omp_async (agent); - queue_push_copy (agent->omp_async_queue, dst, src, n, false); + queue_push_copy (agent->omp_async_queue, dst, src, n); return true; } @@ -3917,15 +3944,7 @@ GOMP_OFFLOAD_openacc_async_host2dev (int device, void *dst, const void *src, { struct agent_info *agent = get_agent_info (device); assert (agent == aq->agent); - /* The source data does not necessarily remain live until the deferred - copy happens. Taking a snapshot of the data here avoids reading - uninitialised data later, but means that (a) data is copied twice and - (b) modifications to the copied data between the "spawning" point of - the asynchronous kernel and when it is executed will not be seen. - But, that is probably correct. */ - void *src_copy = GOMP_PLUGIN_malloc (n); - memcpy (src_copy, src, n); - queue_push_copy (aq, dst, src_copy, n, true); + queue_push_copy (aq, dst, src, n); return true; } @@ -3937,7 +3956,7 @@ GOMP_OFFLOAD_openacc_async_dev2host (int device, void *dst, const void *src, { struct agent_info *agent = get_agent_info (device); assert (agent == aq->agent); - queue_push_copy (aq, dst, src, n, false); + queue_push_copy (aq, dst, src, n); return true; } diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c index 1215212..0f16e1c 100644 --- a/libgomp/plugin/plugin-nvptx.c +++ b/libgomp/plugin/plugin-nvptx.c @@ -34,6 +34,7 @@ #define _GNU_SOURCE #include "openacc.h" #include "config.h" +#include "symcat.h" #include "libgomp-plugin.h" #include "oacc-plugin.h" #include "gomp-constants.h" @@ -1265,7 +1266,7 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data, CUmodule module; const char *const *var_names; const struct targ_fn_launch *fn_descs; - unsigned int fn_entries, var_entries, i, j; + unsigned int fn_entries, var_entries, other_entries, i, j; struct targ_fn_descriptor *targ_fns; struct addr_pair *targ_tbl; const nvptx_tdata_t *img_header = (const nvptx_tdata_t *) target_data; @@ -1295,8 +1296,11 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data, fn_entries = img_header->fn_num; fn_descs = img_header->fn_descs; + /* Currently, the only other entry kind is 'device number'. */ + other_entries = 1; + targ_tbl = GOMP_PLUGIN_malloc (sizeof (struct addr_pair) - * (fn_entries + var_entries)); + * (fn_entries + var_entries + other_entries)); targ_fns = GOMP_PLUGIN_malloc (sizeof (struct targ_fn_descriptor) * fn_entries); @@ -1345,9 +1349,24 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data, targ_tbl->end = targ_tbl->start + bytes; } + CUdeviceptr device_num_varptr; + size_t device_num_varsize; + CUresult r = CUDA_CALL_NOCHECK (cuModuleGetGlobal, &device_num_varptr, + &device_num_varsize, module, + STRINGX (GOMP_DEVICE_NUM_VAR)); + if (r == CUDA_SUCCESS) + { + targ_tbl->start = (uintptr_t) device_num_varptr; + targ_tbl->end = (uintptr_t) (device_num_varptr + device_num_varsize); + } + else + /* The 'GOMP_DEVICE_NUM_VAR' variable was not in this image. */ + targ_tbl->start = targ_tbl->end = 0; + targ_tbl++; + nvptx_set_clocktick (module, dev); - return fn_entries + var_entries; + return fn_entries + var_entries + other_entries; } /* Unload the program described by TARGET_DATA. DEV_DATA is the |