aboutsummaryrefslogtreecommitdiff
path: root/libgomp/target.c
diff options
context:
space:
mode:
Diffstat (limited to 'libgomp/target.c')
-rw-r--r--libgomp/target.c174
1 files changed, 157 insertions, 17 deletions
diff --git a/libgomp/target.c b/libgomp/target.c
index a64ee96..cda092b 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -461,6 +461,19 @@ gomp_copy_dev2host (struct gomp_device_descr *devicep,
gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
}
+attribute_hidden void
+gomp_copy_dev2dev (struct gomp_device_descr *devicep,
+ struct goacc_asyncqueue *aq,
+ void *dst, const void *src, size_t sz)
+{
+ if (__builtin_expect (aq != NULL, 0))
+ goacc_device_copy_async (devicep, devicep->openacc.async.dev2dev_func,
+ "dev", dst, "dev", src, NULL, sz, aq);
+ else
+ gomp_device_copy (devicep, devicep->dev2dev_func, "dev", dst,
+ "dev", src, sz);
+}
+
static void
gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr)
{
@@ -800,12 +813,22 @@ gomp_map_fields_existing (struct target_mem_desc *tgt,
(void *) cur_node.host_end);
}
-attribute_hidden void
+/* Update the devptr by setting it to the device address of the host pointee
+ 'attach_to'; devptr is obtained from the splay_tree_key n.
+ When the pointer is already attached or the host pointee is either
+ NULL or in memory map, this function returns true.
+ Otherwise, the device pointer is set to point to the host pointee and:
+ - If allow_zero_length_array_sections is set, true is returned.
+ - Else, if fail_if_not_found is set, a fatal error is issued.
+ - Otherwise, false is returned. */
+
+attribute_hidden bool
gomp_attach_pointer (struct gomp_device_descr *devicep,
struct goacc_asyncqueue *aq, splay_tree mem_map,
splay_tree_key n, uintptr_t attach_to, size_t bias,
struct gomp_coalesce_buf *cbufp,
- bool allow_zero_length_array_sections)
+ bool allow_zero_length_array_sections,
+ bool fail_if_not_found)
{
struct splay_tree_key_s s;
size_t size, idx;
@@ -860,7 +883,7 @@ gomp_attach_pointer (struct gomp_device_descr *devicep,
gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &data,
sizeof (void *), true, cbufp);
- return;
+ return true;
}
s.host_start = target + bias;
@@ -869,15 +892,16 @@ gomp_attach_pointer (struct gomp_device_descr *devicep,
if (!tn)
{
- if (allow_zero_length_array_sections)
- /* When allowing attachment to zero-length array sections, we
- copy the host pointer when the target region is not mapped. */
- data = target;
- else
+ /* We copy the host pointer when the target region is not mapped;
+ for allow_zero_length_array_sections, that's permitted.
+ Otherwise, it depends on the context. Return false in that
+ case, unless fail_if_not_found. */
+ if (!allow_zero_length_array_sections && fail_if_not_found)
{
gomp_mutex_unlock (&devicep->lock);
gomp_fatal ("pointer target not mapped for attach");
}
+ data = target;
}
else
data = tn->tgt->tgt_start + tn->tgt_offset + target - tn->host_start;
@@ -889,10 +913,13 @@ gomp_attach_pointer (struct gomp_device_descr *devicep,
gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &data,
sizeof (void *), true, cbufp);
+ if (!tn && !allow_zero_length_array_sections)
+ return false;
}
else
gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
(void *) attach_to, (int) n->aux->attach_count[idx]);
+ return true;
}
attribute_hidden void
@@ -1587,9 +1614,37 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
bool zlas
= ((kind & typemask)
== GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION);
- gomp_attach_pointer (devicep, aq, mem_map, n,
- (uintptr_t) hostaddrs[i], sizes[i],
- cbufp, zlas);
+ /* For 'target enter data', the map clauses are split;
+ however, for more complex code with struct and
+ pointer members, the mapping and the attach can end up
+ in different sets; or the wrong mapping with the
+ attach. As there is no way to know whether a size
+ zero like 'var->ptr[i][:0]' happend in the same
+ directive or not, the not-attached check is now
+ fully silenced for 'enter data'. */
+ if (openmp_p && (pragma_kind & GOMP_MAP_VARS_ENTER_DATA))
+ zlas = true;
+ if (!gomp_attach_pointer (devicep, aq, mem_map, n,
+ (uintptr_t) hostaddrs[i], sizes[i],
+ cbufp, zlas, !openmp_p))
+ {
+ /* Pointee not found; that's an error except for
+ map(var[:n]) with n == 0; the compiler adds a
+ runtime condition such that for those the kind is
+ always GOMP_MAP_ZERO_LEN_ARRAY_SECTION. */
+ for (j = i; j > 0; j--)
+ if (*(void**) hostaddrs[i] == hostaddrs[j-1] - sizes[i]
+ && sizes[j-1] == 0
+ && (GOMP_MAP_ZERO_LEN_ARRAY_SECTION
+ == (get_kind (short_mapkind, kinds, j-1)
+ & typemask)))
+ break;
+ if (j == 0)
+ {
+ gomp_mutex_unlock (&devicep->lock);
+ gomp_fatal ("pointer target not mapped for attach");
+ }
+ }
}
else if ((pragma_kind & GOMP_MAP_VARS_OPENACC) != 0)
{
@@ -2586,6 +2641,10 @@ gomp_unload_image_from_device (struct gomp_device_descr *devicep,
}
}
+#define GOMP_REQUIRES_NAME_BUF_LEN \
+ sizeof ("unified_address, unified_shared_memory, " \
+ "self_maps, reverse_offload")
+
static void
gomp_requires_to_name (char *buf, size_t size, int requires_mask)
{
@@ -2634,10 +2693,8 @@ GOMP_offload_register_ver (unsigned version, const void *host_table,
if (omp_req && omp_requires_mask && omp_requires_mask != omp_req)
{
- char buf1[sizeof ("unified_address, unified_shared_memory, "
- "self_maps, reverse_offload")];
- char buf2[sizeof ("unified_address, unified_shared_memory, "
- "self_maps, reverse_offload")];
+ char buf1[GOMP_REQUIRES_NAME_BUF_LEN];
+ char buf2[GOMP_REQUIRES_NAME_BUF_LEN];
gomp_requires_to_name (buf2, sizeof (buf2),
omp_req != GOMP_REQUIRES_TARGET_USED
? omp_req : omp_requires_mask);
@@ -4948,6 +5005,88 @@ omp_target_memcpy_rect_async (void *dst, const void *src, size_t element_size,
return 0;
}
+static void
+omp_target_memset_int (void *ptr, int val, size_t count,
+ struct gomp_device_descr *devicep)
+{
+ if (__builtin_expect (count == 0, 0))
+ return;
+ if (devicep == NULL)
+ {
+ memset (ptr, val, count);
+ return;
+ }
+
+ gomp_mutex_lock (&devicep->lock);
+ int ret = devicep->memset_func (devicep->target_id, ptr, val, count);
+ gomp_mutex_unlock (&devicep->lock);
+ if (!ret)
+ gomp_fatal ("omp_target_memset failed");
+}
+
+void*
+omp_target_memset (void *ptr, int val, size_t count, int device_num)
+{
+ struct gomp_device_descr *devicep;
+ if (device_num == omp_initial_device
+ || device_num == gomp_get_num_devices ()
+ || (devicep = resolve_device (device_num, false)) == NULL
+ || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
+ || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
+ devicep = NULL;
+
+ omp_target_memset_int (ptr, val, count, devicep);
+ return ptr;
+}
+
+typedef struct
+{
+ void *ptr;
+ size_t count;
+ struct gomp_device_descr *devicep;
+ int val;
+} omp_target_memset_data;
+
+static void
+omp_target_memset_async_helper (void *args)
+{
+ omp_target_memset_data *a = args;
+ omp_target_memset_int (a->ptr, a->val, a->count, a->devicep);
+}
+
+void*
+omp_target_memset_async (void *ptr, int val, size_t count, int device_num,
+ int depobj_count, omp_depend_t *depobj_list)
+{
+ void *depend[depobj_count + 5];
+ struct gomp_device_descr *devicep;
+ unsigned flags = 0;
+ int i;
+
+ if (device_num == omp_initial_device
+ || device_num == gomp_get_num_devices ()
+ || (devicep = resolve_device (device_num, false)) == NULL
+ || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
+ || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
+ devicep = NULL;
+
+ omp_target_memset_data s = {.ptr = ptr, .val = val, .count = count,
+ .devicep = devicep};
+ if (depobj_count > 0 && depobj_list != NULL)
+ {
+ flags |= GOMP_TASK_FLAG_DEPEND;
+ depend[0] = 0;
+ depend[1] = (void *) (uintptr_t) depobj_count;
+ depend[2] = depend[3] = depend[4] = 0;
+ for (i = 0; i < depobj_count; ++i)
+ depend[i + 5] = &depobj_list[i];
+ }
+
+ GOMP_task (omp_target_memset_async_helper, &s, NULL, sizeof (s),
+ __alignof__ (s), true, flags, depend, 0, NULL);
+ return ptr;
+}
+
int
omp_target_associate_ptr (const void *host_ptr, const void *device_ptr,
size_t size, size_t device_offset, int device_num)
@@ -5513,6 +5652,7 @@ gomp_load_plugin_for_device (struct gomp_device_descr *device,
DLSYM_OPT (async_run, async_run);
DLSYM_OPT (can_run, can_run);
DLSYM (dev2dev);
+ DLSYM (memset);
}
if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
{
@@ -5531,6 +5671,7 @@ gomp_load_plugin_for_device (struct gomp_device_descr *device,
|| !DLSYM_OPT (openacc.async.exec, openacc_async_exec)
|| !DLSYM_OPT (openacc.async.dev2host, openacc_async_dev2host)
|| !DLSYM_OPT (openacc.async.host2dev, openacc_async_host2dev)
+ || !DLSYM_OPT (openacc.async.dev2dev, openacc_async_dev2dev)
|| !DLSYM_OPT (openacc.get_property, openacc_get_property))
{
/* Require all the OpenACC handlers if we have
@@ -5647,8 +5788,7 @@ gomp_target_init (void)
found = true;
if (found)
{
- char buf[sizeof ("unified_address, unified_shared_memory, "
- "reverse_offload")];
+ char buf[GOMP_REQUIRES_NAME_BUF_LEN];
gomp_requires_to_name (buf, sizeof (buf), omp_req);
char *name = (char *) malloc (cur_len + 1);
memcpy (name, cur, cur_len);