aboutsummaryrefslogtreecommitdiff
path: root/libgomp/target.c
diff options
context:
space:
mode:
authorJulian Brown <julian@codesourcery.com>2021-06-29 16:42:03 -0700
committerThomas Schwinge <thomas@codesourcery.com>2021-07-27 11:16:27 +0200
commit9c41f5b9cddd93f1b56eb71bff87b255d37d16f4 (patch)
tree575141d79a9a03bddcfef0c5eb5c6abeae5f1ad4 /libgomp/target.c
parent88c40c36db8a52d2c630aa61ee54e33908e9daec (diff)
downloadgcc-9c41f5b9cddd93f1b56eb71bff87b255d37d16f4.zip
gcc-9c41f5b9cddd93f1b56eb71bff87b255d37d16f4.tar.gz
gcc-9c41f5b9cddd93f1b56eb71bff87b255d37d16f4.tar.bz2
Fix OpenACC "ephemeral" asynchronous host-to-device copies
This patch fixes several places in libgomp/target.c where "ephemeral" data (on the stack or in temporary heap locations) may be used as the source of an asynchronous host-to-device copy that may not complete before the host data disappears. An existing, but flawed, workaround for this problem in the AMD GCN libgomp offloading plugin is currently present on mainline, and was posted for the og9 branch here: https://gcc.gnu.org/legacy-ml/gcc-patches/2019-08/msg00901.html and previous versions of this patch were posted here (for mainline/og9): https://gcc.gnu.org/legacy-ml/gcc-patches/2019-11/msg01482.html https://gcc.gnu.org/legacy-ml/gcc-patches/2019-09/msg01026.html libgomp/ * libgomp.h (gomp_copy_host2dev): Update prototype. * oacc-mem.c (memcpy_tofrom_device, update_dev_host): Add new argument to gomp_copy_host2dev (false). * plugin/plugin-gcn.c (struct copy_data): Remove free_src field. (copy_data): Don't free src. (queue_push_copy): Remove free_src handling. (GOMP_OFFLOAD_dev2dev): Update call to queue_push_copy. (GOMP_OFFLOAD_openacc_async_host2dev): Remove source-data snapshotting. (GOMP_OFFLOAD_openacc_async_dev2host): Update call to queue_push_copy. * target.c (goacc_device_copy_async): Add SRCADDR_ORIG parameter. (gomp_copy_host2dev): Add EPHEMERAL parameter. Snapshot source data when true, and set up deferred freeing of temporary buffer. (gomp_copy_dev2host): Update call to goacc_device_copy_async. (gomp_map_vars_existing, gomp_map_pointer, gomp_attach_pointer) (gomp_detach_pointer, gomp_map_vars_internal, gomp_update): Update calls to gomp_copy_host2dev with appropriate ephemeral argument. * testsuite/libgomp.oacc-c-c++-common/async-data-1-1.c: Remove XFAIL. Co-Authored-By: Thomas Schwinge <thomas@codesourcery.com>
Diffstat (limited to 'libgomp/target.c')
-rw-r--r--libgomp/target.c77
1 files changed, 56 insertions, 21 deletions
diff --git a/libgomp/target.c b/libgomp/target.c
index bb09d50..5576e57 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -214,13 +214,24 @@ goacc_device_copy_async (struct gomp_device_descr *devicep,
struct goacc_asyncqueue *),
const char *dst, void *dstaddr,
const char *src, const void *srcaddr,
+ const void *srcaddr_orig,
size_t size, struct goacc_asyncqueue *aq)
{
if (!copy_func (devicep->target_id, dstaddr, srcaddr, size, aq))
{
gomp_mutex_unlock (&devicep->lock);
- gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
- src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
+ if (srcaddr_orig && srcaddr_orig != srcaddr)
+ gomp_fatal ("Copying of %s object [%p..%p)"
+ " via buffer %s object [%p..%p)"
+ " to %s object [%p..%p) failed",
+ src, srcaddr_orig, srcaddr_orig + size,
+ src, srcaddr, srcaddr + size,
+ dst, dstaddr, dstaddr + size);
+ else
+ gomp_fatal ("Copying of %s object [%p..%p)"
+ " to %s object [%p..%p) failed",
+ src, srcaddr, srcaddr + size,
+ dst, dstaddr, dstaddr + size);
}
}
@@ -317,11 +328,16 @@ gomp_to_device_kind_p (int kind)
}
}
+/* Copy host memory to an offload device. In asynchronous mode (if AQ is
+ non-NULL), when the source data is stack or may otherwise be deallocated
+ before the asynchronous copy takes place, EPHEMERAL must be passed as
+ TRUE. */
+
attribute_hidden void
gomp_copy_host2dev (struct gomp_device_descr *devicep,
struct goacc_asyncqueue *aq,
void *d, const void *h, size_t sz,
- struct gomp_coalesce_buf *cbuf)
+ bool ephemeral, struct gomp_coalesce_buf *cbuf)
{
if (cbuf)
{
@@ -349,8 +365,23 @@ gomp_copy_host2dev (struct gomp_device_descr *devicep,
}
}
if (__builtin_expect (aq != NULL, 0))
- goacc_device_copy_async (devicep, devicep->openacc.async.host2dev_func,
- "dev", d, "host", h, sz, aq);
+ {
+ void *h_buf = (void *) h;
+ if (ephemeral)
+ {
+ /* We're queueing up an asynchronous copy from data that may
+ disappear before the transfer takes place (i.e. because it is a
+ stack local in a function that is no longer executing). Make a
+ copy of the data into a temporary buffer in those cases. */
+ h_buf = gomp_malloc (sz);
+ memcpy (h_buf, h, sz);
+ }
+ goacc_device_copy_async (devicep, devicep->openacc.async.host2dev_func,
+ "dev", d, "host", h_buf, h, sz, aq);
+ if (ephemeral)
+ /* Free temporary buffer once the transfer has completed. */
+ devicep->openacc.async.queue_callback_func (aq, free, h_buf);
+ }
else
gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz);
}
@@ -362,7 +393,7 @@ gomp_copy_dev2host (struct gomp_device_descr *devicep,
{
if (__builtin_expect (aq != NULL, 0))
goacc_device_copy_async (devicep, devicep->openacc.async.dev2host_func,
- "host", h, "dev", d, sz, aq);
+ "host", h, "dev", d, NULL, sz, aq);
else
gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
}
@@ -521,7 +552,7 @@ gomp_map_vars_existing (struct gomp_device_descr *devicep,
(void *) (oldn->tgt->tgt_start + oldn->tgt_offset
+ newn->host_start - oldn->host_start),
(void *) newn->host_start,
- newn->host_end - newn->host_start, cbuf);
+ newn->host_end - newn->host_start, false, cbuf);
gomp_increment_refcount (oldn, refcount_set);
}
@@ -548,8 +579,8 @@ gomp_map_pointer (struct target_mem_desc *tgt, struct goacc_asyncqueue *aq,
cur_node.tgt_offset = (uintptr_t) NULL;
gomp_copy_host2dev (devicep, aq,
(void *) (tgt->tgt_start + target_offset),
- (void *) &cur_node.tgt_offset,
- sizeof (void *), cbuf);
+ (void *) &cur_node.tgt_offset, sizeof (void *),
+ true, cbuf);
return;
}
/* Add bias to the pointer value. */
@@ -569,7 +600,8 @@ gomp_map_pointer (struct target_mem_desc *tgt, struct goacc_asyncqueue *aq,
to initialize the pointer with. */
cur_node.tgt_offset -= bias;
gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + target_offset),
- (void *) &cur_node.tgt_offset, sizeof (void *), cbuf);
+ (void *) &cur_node.tgt_offset, sizeof (void *),
+ true, cbuf);
}
static void
@@ -702,7 +734,7 @@ gomp_attach_pointer (struct gomp_device_descr *devicep,
(void *) (n->tgt->tgt_start + n->tgt_offset), (void *) data);
gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &data,
- sizeof (void *), cbufp);
+ sizeof (void *), true, cbufp);
}
else
gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
@@ -755,7 +787,7 @@ gomp_detach_pointer (struct gomp_device_descr *devicep,
(void *) target);
gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &target,
- sizeof (void *), cbufp);
+ sizeof (void *), true, cbufp);
}
else
gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
@@ -1218,7 +1250,7 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
len = sizes[i];
gomp_copy_host2dev (devicep, aq,
(void *) (tgt->tgt_start + tgt_size),
- (void *) hostaddrs[i], len, cbufp);
+ (void *) hostaddrs[i], len, false, cbufp);
tgt_size += len;
continue;
case GOMP_MAP_FIRSTPRIVATE_INT:
@@ -1312,7 +1344,7 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
+ cur_node.host_start
- n->host_start),
(void *) &cur_node.tgt_offset,
- sizeof (void *), cbufp);
+ sizeof (void *), true, cbufp);
cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
+ cur_node.host_start - n->host_start;
continue;
@@ -1450,7 +1482,8 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
(void *) (tgt->tgt_start
+ k->tgt_offset),
(void *) k->host_start,
- k->host_end - k->host_start, cbufp);
+ k->host_end - k->host_start,
+ false, cbufp);
break;
case GOMP_MAP_POINTER:
gomp_map_pointer (tgt, aq,
@@ -1462,7 +1495,8 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
(void *) (tgt->tgt_start
+ k->tgt_offset),
(void *) k->host_start,
- k->host_end - k->host_start, cbufp);
+ k->host_end - k->host_start,
+ false, cbufp);
tgt->list[i].has_null_ptr_assoc = false;
for (j = i + 1; j < mapnum; j++)
@@ -1525,7 +1559,7 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
(void *) (tgt->tgt_start
+ k->tgt_offset),
(void *) k->host_start,
- sizeof (void *), cbufp);
+ sizeof (void *), false, cbufp);
break;
default:
gomp_mutex_unlock (&devicep->lock);
@@ -1541,7 +1575,7 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
/* We intentionally do not use coalescing here, as it's not
data allocated by the current call to this function. */
gomp_copy_host2dev (devicep, aq, (void *) n->tgt_offset,
- &tgt_addr, sizeof (void *), NULL);
+ &tgt_addr, sizeof (void *), true, NULL);
}
array++;
}
@@ -1556,7 +1590,7 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
gomp_copy_host2dev (devicep, aq,
(void *) (tgt->tgt_start + i * sizeof (void *)),
(void *) &cur_node.tgt_offset, sizeof (void *),
- cbufp);
+ true, cbufp);
}
}
@@ -1568,7 +1602,8 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
(void *) (tgt->tgt_start + cbuf.chunks[c].start),
(char *) cbuf.buf + (cbuf.chunks[c].start
- cbuf.chunks[0].start),
- cbuf.chunks[c].end - cbuf.chunks[c].start, NULL);
+ cbuf.chunks[c].end - cbuf.chunks[c].start,
+ true, NULL);
free (cbuf.buf);
cbuf.buf = NULL;
cbufp = NULL;
@@ -1892,7 +1927,7 @@ gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
if (GOMP_MAP_COPY_TO_P (kind & typemask))
gomp_copy_host2dev (devicep, NULL, devaddr, hostaddr, size,
- NULL);
+ false, NULL);
if (GOMP_MAP_COPY_FROM_P (kind & typemask))
gomp_copy_dev2host (devicep, NULL, hostaddr, devaddr, size);
}