diff options
Diffstat (limited to 'libgomp/oacc-mem.c')
-rw-r--r-- | libgomp/oacc-mem.c | 130 |
1 files changed, 112 insertions, 18 deletions
diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c index 0482ed3..e40b41b 100644 --- a/libgomp/oacc-mem.c +++ b/libgomp/oacc-mem.c @@ -171,21 +171,22 @@ acc_free (void *d) } static void -memcpy_tofrom_device (bool from, void *d, void *h, size_t s, int async, - const char *libfnname) +memcpy_tofrom_device (bool dev_to, bool dev_from, void *dst, void *src, + size_t s, int async, const char *libfnname) { /* No need to call lazy open here, as the device pointer must have been obtained from a routine that did that. */ struct goacc_thread *thr = goacc_thread (); assert (thr && thr->dev); + if (s == 0) + return; if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) { - if (from) - memmove (h, d, s); - else - memmove (d, h, s); + if (src == dst) + return; + memcpy (dst, src, s); return; } @@ -199,10 +200,15 @@ memcpy_tofrom_device (bool from, void *d, void *h, size_t s, int async, } goacc_aq aq = get_goacc_asyncqueue (async); - if (from) - gomp_copy_dev2host (thr->dev, aq, h, d, s); + if (dev_to && dev_from) + { + if (dst != src) + gomp_copy_dev2dev (thr->dev, aq, dst, src, s); + } + else if (dev_from) + gomp_copy_dev2host (thr->dev, aq, dst, src, s); else - gomp_copy_host2dev (thr->dev, aq, d, h, s, false, /* TODO: cbuf? */ NULL); + gomp_copy_host2dev (thr->dev, aq, dst, src, s, false, /* TODO: cbuf? */ NULL); if (profiling_p) { @@ -214,25 +220,37 @@ memcpy_tofrom_device (bool from, void *d, void *h, size_t s, int async, void acc_memcpy_to_device (void *d, void *h, size_t s) { - memcpy_tofrom_device (false, d, h, s, acc_async_sync, __FUNCTION__); + memcpy_tofrom_device (true, false, d, h, s, acc_async_sync, __FUNCTION__); } void acc_memcpy_to_device_async (void *d, void *h, size_t s, int async) { - memcpy_tofrom_device (false, d, h, s, async, __FUNCTION__); + memcpy_tofrom_device (true, false, d, h, s, async, __FUNCTION__); } void acc_memcpy_from_device (void *h, void *d, size_t s) { - memcpy_tofrom_device (true, d, h, s, acc_async_sync, __FUNCTION__); + memcpy_tofrom_device (false, true, h, d, s, acc_async_sync, __FUNCTION__); } void acc_memcpy_from_device_async (void *h, void *d, size_t s, int async) { - memcpy_tofrom_device (true, d, h, s, async, __FUNCTION__); + memcpy_tofrom_device (false, true, h, d, s, async, __FUNCTION__); +} + +void +acc_memcpy_device (void *dst, void *src, size_t s) +{ + memcpy_tofrom_device (true, true, dst, src, s, acc_async_sync, __FUNCTION__); +} + +void +acc_memcpy_device_async (void *dst, void *src, size_t s, int async) +{ + memcpy_tofrom_device (true, true, dst, src, s, async, __FUNCTION__); } /* Return the device pointer that corresponds to host data H. Or NULL @@ -403,7 +421,7 @@ acc_map_data (void *h, void *d, size_t s) struct target_mem_desc *tgt = goacc_map_vars (acc_dev, NULL, mapnum, &hostaddrs, &devaddrs, &sizes, - &kinds, true, GOMP_MAP_VARS_ENTER_DATA); + &kinds, NULL, true, GOMP_MAP_VARS_ENTER_DATA); assert (tgt); assert (tgt->list_count == 1); splay_tree_key n = tgt->list[0].key; @@ -568,7 +586,7 @@ goacc_enter_datum (void **hostaddrs, size_t *sizes, void *kinds, int async) struct target_mem_desc *tgt = goacc_map_vars (acc_dev, aq, mapnum, hostaddrs, NULL, sizes, - kinds, true, GOMP_MAP_VARS_ENTER_DATA); + kinds, NULL, true, GOMP_MAP_VARS_ENTER_DATA); assert (tgt); assert (tgt->list_count == 1); n = tgt->list[0].key; @@ -925,6 +943,35 @@ acc_update_self_async (void *h, size_t s, int async) update_dev_host (0, h, s, async); } +/* Implement "declare allocate" and "declare deallocate" operations. The + device lock must not be held before calling this function. */ + +static void +gomp_acc_declare_allocate (bool allocate, bool pointer, void **hostaddrs, + size_t *sizes, unsigned short *kinds) +{ + gomp_debug (0, " %s: processing\n", __FUNCTION__); + + if (allocate) + { + /* Allocate memory for the array data. */ + uintptr_t data = (uintptr_t) acc_create (hostaddrs[0], sizes[0]); + + if (pointer) + { + /* Update the PSET. */ + acc_update_device (hostaddrs[1], sizes[1]); + void *pset = acc_deviceptr (hostaddrs[1]); + acc_memcpy_to_device (pset, &data, sizeof (uintptr_t)); + } + } + else + /* Deallocate memory for the array data. */ + acc_delete (hostaddrs[0], sizes[0]); + + gomp_debug (0, " %s: end\n", __FUNCTION__); +} + void acc_attach_async (void **hostaddr, int async) { @@ -1056,6 +1103,28 @@ find_group_last (int pos, size_t mapnum, size_t *sizes, unsigned short *kinds) case GOMP_MAP_ATTACH: break; + case GOMP_MAP_DECLARE_ALLOCATE: + case GOMP_MAP_DECLARE_DEALLOCATE: + { + /* The "declare allocate" and "declare deallocate" mappings can be + used to specify either a scalar allocatable (which just appears as + GOMP_MAP_DECLARE_{ALLOCATE,DEALLOCATE} by itself), or an array + allocatable (which appears as that directive followed by a + GOMP_MAP_TO_PSET and one (or more?) GOMP_MAP_POINTER mappings. */ + if (pos + 1 >= mapnum) + break; + + unsigned char kind1 = kinds[pos + 1] & 0xff; + if (kind1 != GOMP_MAP_TO_PSET) + break; + + pos++; + + while (pos + 1 < mapnum && (kinds[pos + 1] & 0xff) == GOMP_MAP_POINTER) + pos++; + } + break; + default: /* GOMP_MAP_ALWAYS_POINTER can only appear directly after some other mapping. */ @@ -1121,7 +1190,14 @@ goacc_enter_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum, n = lookup_host (acc_dev, hostaddrs[i], size); - if (n && struct_p) + if ((kinds[i] & 0xff) == GOMP_MAP_DECLARE_ALLOCATE) + { + gomp_mutex_unlock (&acc_dev->lock); + gomp_acc_declare_allocate (true, group_last > i, &hostaddrs[i], + &sizes[i], &kinds[i]); + gomp_mutex_lock (&acc_dev->lock); + } + else if (n && struct_p) { for (size_t j = i + 1; j <= group_last; j++) { @@ -1206,7 +1282,7 @@ goacc_enter_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum, gomp_mutex_unlock (&acc_dev->lock); struct target_mem_desc *tgt_ __attribute__((unused)) = goacc_map_vars (acc_dev, aq, groupnum, &hostaddrs[i], NULL, - &sizes[i], &kinds[i], true, + &sizes[i], &kinds[i], NULL, true, GOMP_MAP_VARS_ENTER_DATA); assert (tgt_ == NULL); gomp_mutex_lock (&acc_dev->lock); @@ -1257,7 +1333,7 @@ goacc_enter_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum, struct target_mem_desc *tgt = goacc_map_vars (acc_dev, aq, groupnum, &hostaddrs[i], NULL, - &sizes[i], &kinds[i], true, + &sizes[i], &kinds[i], NULL, true, GOMP_MAP_VARS_ENTER_DATA); assert (tgt); @@ -1365,6 +1441,24 @@ goacc_exit_data_internal (struct gomp_device_descr *acc_dev, size_t mapnum, reference counts ('n->refcount', 'n->dynamic_refcount'). */ break; + case GOMP_MAP_DECLARE_DEALLOCATE: + { + bool deallocate_pointer + = i + 1 < mapnum && (kinds[i + 1] & 0xff) == GOMP_MAP_TO_PSET; + gomp_mutex_unlock (&acc_dev->lock); + gomp_acc_declare_allocate (false, deallocate_pointer, + &hostaddrs[i], &sizes[i], &kinds[i]); + gomp_mutex_lock (&acc_dev->lock); + if (deallocate_pointer) + { + i++; + while (i + 1 < mapnum + && (kinds[i + 1] & 0xff) == GOMP_MAP_POINTER) + i++; + } + } + break; + default: gomp_fatal (">>>> goacc_exit_data_internal UNHANDLED kind 0x%.2x", kind); |