diff options
Diffstat (limited to 'libgomp/target.c')
-rw-r--r-- | libgomp/target.c | 174 |
1 files changed, 157 insertions, 17 deletions
diff --git a/libgomp/target.c b/libgomp/target.c index a64ee96..cda092b 100644 --- a/libgomp/target.c +++ b/libgomp/target.c @@ -461,6 +461,19 @@ gomp_copy_dev2host (struct gomp_device_descr *devicep, gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz); } +attribute_hidden void +gomp_copy_dev2dev (struct gomp_device_descr *devicep, + struct goacc_asyncqueue *aq, + void *dst, const void *src, size_t sz) +{ + if (__builtin_expect (aq != NULL, 0)) + goacc_device_copy_async (devicep, devicep->openacc.async.dev2dev_func, + "dev", dst, "dev", src, NULL, sz, aq); + else + gomp_device_copy (devicep, devicep->dev2dev_func, "dev", dst, + "dev", src, sz); +} + static void gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr) { @@ -800,12 +813,22 @@ gomp_map_fields_existing (struct target_mem_desc *tgt, (void *) cur_node.host_end); } -attribute_hidden void +/* Update the devptr by setting it to the device address of the host pointee + 'attach_to'; devptr is obtained from the splay_tree_key n. + When the pointer is already attached or the host pointee is either + NULL or in memory map, this function returns true. + Otherwise, the device pointer is set to point to the host pointee and: + - If allow_zero_length_array_sections is set, true is returned. + - Else, if fail_if_not_found is set, a fatal error is issued. + - Otherwise, false is returned. */ + +attribute_hidden bool gomp_attach_pointer (struct gomp_device_descr *devicep, struct goacc_asyncqueue *aq, splay_tree mem_map, splay_tree_key n, uintptr_t attach_to, size_t bias, struct gomp_coalesce_buf *cbufp, - bool allow_zero_length_array_sections) + bool allow_zero_length_array_sections, + bool fail_if_not_found) { struct splay_tree_key_s s; size_t size, idx; @@ -860,7 +883,7 @@ gomp_attach_pointer (struct gomp_device_descr *devicep, gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &data, sizeof (void *), true, cbufp); - return; + return true; } s.host_start = target + bias; @@ -869,15 +892,16 @@ gomp_attach_pointer (struct gomp_device_descr *devicep, if (!tn) { - if (allow_zero_length_array_sections) - /* When allowing attachment to zero-length array sections, we - copy the host pointer when the target region is not mapped. */ - data = target; - else + /* We copy the host pointer when the target region is not mapped; + for allow_zero_length_array_sections, that's permitted. + Otherwise, it depends on the context. Return false in that + case, unless fail_if_not_found. */ + if (!allow_zero_length_array_sections && fail_if_not_found) { gomp_mutex_unlock (&devicep->lock); gomp_fatal ("pointer target not mapped for attach"); } + data = target; } else data = tn->tgt->tgt_start + tn->tgt_offset + target - tn->host_start; @@ -889,10 +913,13 @@ gomp_attach_pointer (struct gomp_device_descr *devicep, gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &data, sizeof (void *), true, cbufp); + if (!tn && !allow_zero_length_array_sections) + return false; } else gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__, (void *) attach_to, (int) n->aux->attach_count[idx]); + return true; } attribute_hidden void @@ -1587,9 +1614,37 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, bool zlas = ((kind & typemask) == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION); - gomp_attach_pointer (devicep, aq, mem_map, n, - (uintptr_t) hostaddrs[i], sizes[i], - cbufp, zlas); + /* For 'target enter data', the map clauses are split; + however, for more complex code with struct and + pointer members, the mapping and the attach can end up + in different sets; or the wrong mapping with the + attach. As there is no way to know whether a size + zero like 'var->ptr[i][:0]' happend in the same + directive or not, the not-attached check is now + fully silenced for 'enter data'. */ + if (openmp_p && (pragma_kind & GOMP_MAP_VARS_ENTER_DATA)) + zlas = true; + if (!gomp_attach_pointer (devicep, aq, mem_map, n, + (uintptr_t) hostaddrs[i], sizes[i], + cbufp, zlas, !openmp_p)) + { + /* Pointee not found; that's an error except for + map(var[:n]) with n == 0; the compiler adds a + runtime condition such that for those the kind is + always GOMP_MAP_ZERO_LEN_ARRAY_SECTION. */ + for (j = i; j > 0; j--) + if (*(void**) hostaddrs[i] == hostaddrs[j-1] - sizes[i] + && sizes[j-1] == 0 + && (GOMP_MAP_ZERO_LEN_ARRAY_SECTION + == (get_kind (short_mapkind, kinds, j-1) + & typemask))) + break; + if (j == 0) + { + gomp_mutex_unlock (&devicep->lock); + gomp_fatal ("pointer target not mapped for attach"); + } + } } else if ((pragma_kind & GOMP_MAP_VARS_OPENACC) != 0) { @@ -2586,6 +2641,10 @@ gomp_unload_image_from_device (struct gomp_device_descr *devicep, } } +#define GOMP_REQUIRES_NAME_BUF_LEN \ + sizeof ("unified_address, unified_shared_memory, " \ + "self_maps, reverse_offload") + static void gomp_requires_to_name (char *buf, size_t size, int requires_mask) { @@ -2634,10 +2693,8 @@ GOMP_offload_register_ver (unsigned version, const void *host_table, if (omp_req && omp_requires_mask && omp_requires_mask != omp_req) { - char buf1[sizeof ("unified_address, unified_shared_memory, " - "self_maps, reverse_offload")]; - char buf2[sizeof ("unified_address, unified_shared_memory, " - "self_maps, reverse_offload")]; + char buf1[GOMP_REQUIRES_NAME_BUF_LEN]; + char buf2[GOMP_REQUIRES_NAME_BUF_LEN]; gomp_requires_to_name (buf2, sizeof (buf2), omp_req != GOMP_REQUIRES_TARGET_USED ? omp_req : omp_requires_mask); @@ -4948,6 +5005,88 @@ omp_target_memcpy_rect_async (void *dst, const void *src, size_t element_size, return 0; } +static void +omp_target_memset_int (void *ptr, int val, size_t count, + struct gomp_device_descr *devicep) +{ + if (__builtin_expect (count == 0, 0)) + return; + if (devicep == NULL) + { + memset (ptr, val, count); + return; + } + + gomp_mutex_lock (&devicep->lock); + int ret = devicep->memset_func (devicep->target_id, ptr, val, count); + gomp_mutex_unlock (&devicep->lock); + if (!ret) + gomp_fatal ("omp_target_memset failed"); +} + +void* +omp_target_memset (void *ptr, int val, size_t count, int device_num) +{ + struct gomp_device_descr *devicep; + if (device_num == omp_initial_device + || device_num == gomp_get_num_devices () + || (devicep = resolve_device (device_num, false)) == NULL + || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) + || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) + devicep = NULL; + + omp_target_memset_int (ptr, val, count, devicep); + return ptr; +} + +typedef struct +{ + void *ptr; + size_t count; + struct gomp_device_descr *devicep; + int val; +} omp_target_memset_data; + +static void +omp_target_memset_async_helper (void *args) +{ + omp_target_memset_data *a = args; + omp_target_memset_int (a->ptr, a->val, a->count, a->devicep); +} + +void* +omp_target_memset_async (void *ptr, int val, size_t count, int device_num, + int depobj_count, omp_depend_t *depobj_list) +{ + void *depend[depobj_count + 5]; + struct gomp_device_descr *devicep; + unsigned flags = 0; + int i; + + if (device_num == omp_initial_device + || device_num == gomp_get_num_devices () + || (devicep = resolve_device (device_num, false)) == NULL + || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) + || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) + devicep = NULL; + + omp_target_memset_data s = {.ptr = ptr, .val = val, .count = count, + .devicep = devicep}; + if (depobj_count > 0 && depobj_list != NULL) + { + flags |= GOMP_TASK_FLAG_DEPEND; + depend[0] = 0; + depend[1] = (void *) (uintptr_t) depobj_count; + depend[2] = depend[3] = depend[4] = 0; + for (i = 0; i < depobj_count; ++i) + depend[i + 5] = &depobj_list[i]; + } + + GOMP_task (omp_target_memset_async_helper, &s, NULL, sizeof (s), + __alignof__ (s), true, flags, depend, 0, NULL); + return ptr; +} + int omp_target_associate_ptr (const void *host_ptr, const void *device_ptr, size_t size, size_t device_offset, int device_num) @@ -5513,6 +5652,7 @@ gomp_load_plugin_for_device (struct gomp_device_descr *device, DLSYM_OPT (async_run, async_run); DLSYM_OPT (can_run, can_run); DLSYM (dev2dev); + DLSYM (memset); } if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200) { @@ -5531,6 +5671,7 @@ gomp_load_plugin_for_device (struct gomp_device_descr *device, || !DLSYM_OPT (openacc.async.exec, openacc_async_exec) || !DLSYM_OPT (openacc.async.dev2host, openacc_async_dev2host) || !DLSYM_OPT (openacc.async.host2dev, openacc_async_host2dev) + || !DLSYM_OPT (openacc.async.dev2dev, openacc_async_dev2dev) || !DLSYM_OPT (openacc.get_property, openacc_get_property)) { /* Require all the OpenACC handlers if we have @@ -5647,8 +5788,7 @@ gomp_target_init (void) found = true; if (found) { - char buf[sizeof ("unified_address, unified_shared_memory, " - "reverse_offload")]; + char buf[GOMP_REQUIRES_NAME_BUF_LEN]; gomp_requires_to_name (buf, sizeof (buf), omp_req); char *name = (char *) malloc (cur_len + 1); memcpy (name, cur, cur_len); |