aboutsummaryrefslogtreecommitdiff
path: root/libgomp/plugin
diff options
context:
space:
mode:
Diffstat (limited to 'libgomp/plugin')
-rw-r--r--libgomp/plugin/configfrag.ac7
-rw-r--r--libgomp/plugin/plugin-gcn.c67
-rw-r--r--libgomp/plugin/plugin-nvptx.c25
3 files changed, 65 insertions, 34 deletions
diff --git a/libgomp/plugin/configfrag.ac b/libgomp/plugin/configfrag.ac
index 1ab1777..f447def 100644
--- a/libgomp/plugin/configfrag.ac
+++ b/libgomp/plugin/configfrag.ac
@@ -272,10 +272,3 @@ AC_DEFINE_UNQUOTED([PLUGIN_NVPTX_DYNAMIC], [$PLUGIN_NVPTX_DYNAMIC],
AM_CONDITIONAL([PLUGIN_GCN], [test $PLUGIN_GCN = 1])
AC_DEFINE_UNQUOTED([PLUGIN_GCN], [$PLUGIN_GCN],
[Define to 1 if the GCN plugin is built, 0 if not.])
-
-if test "$HSA_RUNTIME_LIB" != ""; then
- HSA_RUNTIME_LIB="$HSA_RUNTIME_LIB/"
-fi
-
-AC_DEFINE_UNQUOTED([HSA_RUNTIME_LIB], ["$HSA_RUNTIME_LIB"],
- [Define path to HSA runtime.])
diff --git a/libgomp/plugin/plugin-gcn.c b/libgomp/plugin/plugin-gcn.c
index 8e6af69..9e7377c 100644
--- a/libgomp/plugin/plugin-gcn.c
+++ b/libgomp/plugin/plugin-gcn.c
@@ -29,6 +29,7 @@
/* {{{ Includes and defines */
#include "config.h"
+#include "symcat.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -292,7 +293,6 @@ struct copy_data
void *dst;
const void *src;
size_t len;
- bool free_src;
struct goacc_asyncqueue *aq;
};
@@ -1072,7 +1072,7 @@ init_environment_variables (void)
hsa_runtime_lib = secure_getenv ("HSA_RUNTIME_LIB");
if (hsa_runtime_lib == NULL)
- hsa_runtime_lib = HSA_RUNTIME_LIB "libhsa-runtime64.so.1";
+ hsa_runtime_lib = "libhsa-runtime64.so.1";
support_cpu_devices = secure_getenv ("GCN_SUPPORT_CPU_DEVICES");
@@ -2914,8 +2914,6 @@ copy_data (void *data_)
data->aq->agent->device_id, data->aq->id, data->len, data->src,
data->dst);
hsa_memory_copy_wrapper (data->dst, data->src, data->len);
- if (data->free_src)
- free ((void *) data->src);
free (data);
}
@@ -2929,12 +2927,11 @@ gomp_offload_free (void *ptr)
}
/* Request an asynchronous data copy, to or from a device, on a given queue.
- The event will be registered as a callback. If FREE_SRC is true
- then the source data will be freed following the copy. */
+ The event will be registered as a callback. */
static void
queue_push_copy (struct goacc_asyncqueue *aq, void *dst, const void *src,
- size_t len, bool free_src)
+ size_t len)
{
if (DEBUG_QUEUES)
GCN_DEBUG ("queue_push_copy %d:%d: %zu bytes from (%p) to (%p)\n",
@@ -2944,7 +2941,6 @@ queue_push_copy (struct goacc_asyncqueue *aq, void *dst, const void *src,
data->dst = dst;
data->src = src;
data->len = len;
- data->free_src = free_src;
data->aq = aq;
queue_push_callback (aq, copy_data, data);
}
@@ -3041,10 +3037,8 @@ gcn_exec (struct kernel_info *kernel, size_t mapnum, void **hostaddrs,
problem size, so let's do a reasonable number of single-worker gangs.
64 gangs matches a typical Fiji device. */
- /* NOTE: Until support for middle-end worker partitioning is merged, use 1
- for the default number of workers. */
if (dims[0] == 0) dims[0] = get_cu_count (kernel->agent); /* Gangs. */
- if (dims[1] == 0) dims[1] = 1; /* Workers. */
+ if (dims[1] == 0) dims[1] = 16; /* Workers. */
/* The incoming dimensions are expressed in terms of gangs, workers, and
vectors. The HSA dimensions are expressed in terms of "work-items",
@@ -3311,6 +3305,7 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data,
struct kernel_info *kernel;
int kernel_count = image_desc->kernel_count;
unsigned var_count = image_desc->global_variable_count;
+ int other_count = 1;
agent = get_agent_info (ord);
if (!agent)
@@ -3327,7 +3322,8 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data,
GCN_DEBUG ("Encountered %d kernels in an image\n", kernel_count);
GCN_DEBUG ("Encountered %u global variables in an image\n", var_count);
- pair = GOMP_PLUGIN_malloc ((kernel_count + var_count - 2)
+ GCN_DEBUG ("Expect %d other variables in an image\n", other_count);
+ pair = GOMP_PLUGIN_malloc ((kernel_count + var_count + other_count - 2)
* sizeof (struct addr_pair));
*target_table = pair;
module = (struct module_info *)
@@ -3402,6 +3398,37 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data,
pair++;
}
+ GCN_DEBUG ("Looking for variable %s\n", STRINGX (GOMP_DEVICE_NUM_VAR));
+
+ hsa_status_t status;
+ hsa_executable_symbol_t var_symbol;
+ status = hsa_fns.hsa_executable_get_symbol_fn (agent->executable, NULL,
+ STRINGX (GOMP_DEVICE_NUM_VAR),
+ agent->id, 0, &var_symbol);
+ if (status == HSA_STATUS_SUCCESS)
+ {
+ uint64_t device_num_varptr;
+ uint32_t device_num_varsize;
+
+ status = hsa_fns.hsa_executable_symbol_get_info_fn
+ (var_symbol, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS,
+ &device_num_varptr);
+ if (status != HSA_STATUS_SUCCESS)
+ hsa_fatal ("Could not extract a variable from its symbol", status);
+ status = hsa_fns.hsa_executable_symbol_get_info_fn
+ (var_symbol, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_SIZE,
+ &device_num_varsize);
+ if (status != HSA_STATUS_SUCCESS)
+ hsa_fatal ("Could not extract a variable size from its symbol", status);
+
+ pair->start = device_num_varptr;
+ pair->end = device_num_varptr + device_num_varsize;
+ }
+ else
+ /* The 'GOMP_DEVICE_NUM_VAR' variable was not in this image. */
+ pair->start = pair->end = 0;
+ pair++;
+
/* Ensure that constructors are run first. */
struct GOMP_kernel_launch_attributes kla =
{ 3,
@@ -3424,7 +3451,7 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data,
if (module->fini_array_func)
kernel_count--;
- return kernel_count + var_count;
+ return kernel_count + var_count + other_count;
}
/* Unload GCN object-code module described by struct gcn_image_desc in
@@ -3647,7 +3674,7 @@ GOMP_OFFLOAD_dev2dev (int device, void *dst, const void *src, size_t n)
{
struct agent_info *agent = get_agent_info (device);
maybe_init_omp_async (agent);
- queue_push_copy (agent->omp_async_queue, dst, src, n, false);
+ queue_push_copy (agent->omp_async_queue, dst, src, n);
return true;
}
@@ -3917,15 +3944,7 @@ GOMP_OFFLOAD_openacc_async_host2dev (int device, void *dst, const void *src,
{
struct agent_info *agent = get_agent_info (device);
assert (agent == aq->agent);
- /* The source data does not necessarily remain live until the deferred
- copy happens. Taking a snapshot of the data here avoids reading
- uninitialised data later, but means that (a) data is copied twice and
- (b) modifications to the copied data between the "spawning" point of
- the asynchronous kernel and when it is executed will not be seen.
- But, that is probably correct. */
- void *src_copy = GOMP_PLUGIN_malloc (n);
- memcpy (src_copy, src, n);
- queue_push_copy (aq, dst, src_copy, n, true);
+ queue_push_copy (aq, dst, src, n);
return true;
}
@@ -3937,7 +3956,7 @@ GOMP_OFFLOAD_openacc_async_dev2host (int device, void *dst, const void *src,
{
struct agent_info *agent = get_agent_info (device);
assert (agent == aq->agent);
- queue_push_copy (aq, dst, src, n, false);
+ queue_push_copy (aq, dst, src, n);
return true;
}
diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c
index 1215212..0f16e1c 100644
--- a/libgomp/plugin/plugin-nvptx.c
+++ b/libgomp/plugin/plugin-nvptx.c
@@ -34,6 +34,7 @@
#define _GNU_SOURCE
#include "openacc.h"
#include "config.h"
+#include "symcat.h"
#include "libgomp-plugin.h"
#include "oacc-plugin.h"
#include "gomp-constants.h"
@@ -1265,7 +1266,7 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data,
CUmodule module;
const char *const *var_names;
const struct targ_fn_launch *fn_descs;
- unsigned int fn_entries, var_entries, i, j;
+ unsigned int fn_entries, var_entries, other_entries, i, j;
struct targ_fn_descriptor *targ_fns;
struct addr_pair *targ_tbl;
const nvptx_tdata_t *img_header = (const nvptx_tdata_t *) target_data;
@@ -1295,8 +1296,11 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data,
fn_entries = img_header->fn_num;
fn_descs = img_header->fn_descs;
+ /* Currently, the only other entry kind is 'device number'. */
+ other_entries = 1;
+
targ_tbl = GOMP_PLUGIN_malloc (sizeof (struct addr_pair)
- * (fn_entries + var_entries));
+ * (fn_entries + var_entries + other_entries));
targ_fns = GOMP_PLUGIN_malloc (sizeof (struct targ_fn_descriptor)
* fn_entries);
@@ -1345,9 +1349,24 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data,
targ_tbl->end = targ_tbl->start + bytes;
}
+ CUdeviceptr device_num_varptr;
+ size_t device_num_varsize;
+ CUresult r = CUDA_CALL_NOCHECK (cuModuleGetGlobal, &device_num_varptr,
+ &device_num_varsize, module,
+ STRINGX (GOMP_DEVICE_NUM_VAR));
+ if (r == CUDA_SUCCESS)
+ {
+ targ_tbl->start = (uintptr_t) device_num_varptr;
+ targ_tbl->end = (uintptr_t) (device_num_varptr + device_num_varsize);
+ }
+ else
+ /* The 'GOMP_DEVICE_NUM_VAR' variable was not in this image. */
+ targ_tbl->start = targ_tbl->end = 0;
+ targ_tbl++;
+
nvptx_set_clocktick (module, dev);
- return fn_entries + var_entries;
+ return fn_entries + var_entries + other_entries;
}
/* Unload the program described by TARGET_DATA. DEV_DATA is the