aboutsummaryrefslogtreecommitdiff
path: root/libgomp/target.c
diff options
context:
space:
mode:
authorChung-Lin Tang <cltang@codesourcery.com>2019-05-13 13:32:00 +0000
committerChung-Lin Tang <cltang@gcc.gnu.org>2019-05-13 13:32:00 +0000
commit1f4c5b9bb2eb81880e2bc725435d596fcd2bdfef (patch)
tree5bc5ab9a965b79bc46065ff6f4ac986b34239caa /libgomp/target.c
parentf78f5d239203d0da6638bc5e20080971d61d09d9 (diff)
downloadgcc-1f4c5b9bb2eb81880e2bc725435d596fcd2bdfef.zip
gcc-1f4c5b9bb2eb81880e2bc725435d596fcd2bdfef.tar.gz
gcc-1f4c5b9bb2eb81880e2bc725435d596fcd2bdfef.tar.bz2
2019-05-13 Chung-Lin Tang <cltang@codesourcery.com>
Reviewed-by: Thomas Schwinge <thomas@codesourcery.com> libgomp/ * libgomp-plugin.h (struct goacc_asyncqueue): Declare. (struct goacc_asyncqueue_list): Likewise. (goacc_aq): Likewise. (goacc_aq_list): Likewise. (GOMP_OFFLOAD_openacc_register_async_cleanup): Remove. (GOMP_OFFLOAD_openacc_async_test): Remove. (GOMP_OFFLOAD_openacc_async_test_all): Remove. (GOMP_OFFLOAD_openacc_async_wait): Remove. (GOMP_OFFLOAD_openacc_async_wait_async): Remove. (GOMP_OFFLOAD_openacc_async_wait_all): Remove. (GOMP_OFFLOAD_openacc_async_wait_all_async): Remove. (GOMP_OFFLOAD_openacc_async_set_async): Remove. (GOMP_OFFLOAD_openacc_exec): Adjust declaration. (GOMP_OFFLOAD_openacc_cuda_get_stream): Likewise. (GOMP_OFFLOAD_openacc_cuda_set_stream): Likewise. (GOMP_OFFLOAD_openacc_async_exec): Declare. (GOMP_OFFLOAD_openacc_async_construct): Declare. (GOMP_OFFLOAD_openacc_async_destruct): Declare. (GOMP_OFFLOAD_openacc_async_test): Declare. (GOMP_OFFLOAD_openacc_async_synchronize): Declare. (GOMP_OFFLOAD_openacc_async_serialize): Declare. (GOMP_OFFLOAD_openacc_async_queue_callback): Declare. (GOMP_OFFLOAD_openacc_async_host2dev): Declare. (GOMP_OFFLOAD_openacc_async_dev2host): Declare. * libgomp.h (struct acc_dispatch_t): Define 'async' sub-struct. (gomp_acc_insert_pointer): Adjust declaration. (gomp_copy_host2dev): New declaration. (gomp_copy_dev2host): Likewise. (gomp_map_vars_async): Likewise. (gomp_unmap_tgt): Likewise. (gomp_unmap_vars_async): Likewise. (gomp_fini_device): Likewise. * oacc-async.c (get_goacc_thread): New function. (get_goacc_thread_device): New function. (lookup_goacc_asyncqueue): New function. (get_goacc_asyncqueue): New function. (acc_async_test): Adjust code to use new async design. (acc_async_test_all): Likewise. (acc_wait): Likewise. (acc_wait_async): Likewise. (acc_wait_all): Likewise. (acc_wait_all_async): Likewise. (goacc_async_free): New function. (goacc_init_asyncqueues): Likewise. (goacc_fini_asyncqueues): Likewise. * oacc-cuda.c (acc_get_cuda_stream): Adjust code to use new async design. (acc_set_cuda_stream): Likewise. * oacc-host.c (host_openacc_exec): Adjust parameters, remove 'async'. (host_openacc_register_async_cleanup): Remove. (host_openacc_async_exec): New function. (host_openacc_async_test): Adjust parameters. (host_openacc_async_test_all): Remove. (host_openacc_async_wait): Remove. (host_openacc_async_wait_async): Remove. (host_openacc_async_wait_all): Remove. (host_openacc_async_wait_all_async): Remove. (host_openacc_async_set_async): Remove. (host_openacc_async_synchronize): New function. (host_openacc_async_serialize): New function. (host_openacc_async_host2dev): New function. (host_openacc_async_dev2host): New function. (host_openacc_async_queue_callback): New function. (host_openacc_async_construct): New function. (host_openacc_async_destruct): New function. (struct gomp_device_descr host_dispatch): Remove initialization of old interface, add intialization of new async sub-struct. * oacc-init.c (acc_shutdown_1): Adjust to use gomp_fini_device. (goacc_attach_host_thread_to_device): Remove old async code usage. * oacc-int.h (goacc_init_asyncqueues): New declaration. (goacc_fini_asyncqueues): Likewise. (goacc_async_copyout_unmap_vars): Likewise. (goacc_async_free): Likewise. (get_goacc_asyncqueue): Likewise. (lookup_goacc_asyncqueue): Likewise. * oacc-mem.c (memcpy_tofrom_device): Adjust code to use new async design. (present_create_copy): Adjust code to use new async design. (delete_copyout): Likewise. (update_dev_host): Likewise. (gomp_acc_insert_pointer): Add async parameter, adjust code to use new async design. (gomp_acc_remove_pointer): Adjust code to use new async design. * oacc-parallel.c (GOACC_parallel_keyed): Adjust code to use new async design. (GOACC_enter_exit_data): Likewise. (goacc_wait): Likewise. (GOACC_update): Likewise. * oacc-plugin.c (GOMP_PLUGIN_async_unmap_vars): Change to assert fail when called, warn as obsolete in comment. * target.c (goacc_device_copy_async): New function. (gomp_copy_host2dev): Remove 'static', add goacc_asyncqueue parameter, add goacc_device_copy_async case. (gomp_copy_dev2host): Likewise. (gomp_map_vars_existing): Add goacc_asyncqueue parameter, adjust code. (gomp_map_pointer): Likewise. (gomp_map_fields_existing): Likewise. (gomp_map_vars_internal): New always_inline function, renamed from gomp_map_vars. (gomp_map_vars): Implement by calling gomp_map_vars_internal. (gomp_map_vars_async): Implement by calling gomp_map_vars_internal, passing goacc_asyncqueue argument. (gomp_unmap_tgt): Remove static, add attribute_hidden. (gomp_unref_tgt): New function. (gomp_unmap_vars_internal): New always_inline function, renamed from gomp_unmap_vars. (gomp_unmap_vars): Implement by calling gomp_unmap_vars_internal. (gomp_unmap_vars_async): Implement by calling gomp_unmap_vars_internal, passing goacc_asyncqueue argument. (gomp_fini_device): New function. (gomp_exit_data): Adjust gomp_copy_dev2host call. (gomp_load_plugin_for_device): Remove old interface, adjust to load new async interface. (gomp_target_fini): Adjust code to call gomp_fini_device. * plugin/plugin-nvptx.c (struct cuda_map): Remove. (struct ptx_stream): Remove. (struct nvptx_thread): Remove current_stream field. (cuda_map_create): Remove. (cuda_map_destroy): Remove. (map_init): Remove. (map_fini): Remove. (map_pop): Remove. (map_push): Remove. (struct goacc_asyncqueue): Define. (struct nvptx_callback): Define. (struct ptx_free_block): Define. (struct ptx_device): Remove null_stream, active_streams, async_streams, stream_lock, and next fields. (enum ptx_event_type): Remove. (struct ptx_event): Remove. (ptx_event_lock): Remove. (ptx_events): Remove. (init_streams_for_device): Remove. (fini_streams_for_device): Remove. (select_stream_for_async): Remove. (nvptx_init): Remove ptx_events and ptx_event_lock references. (nvptx_attach_host_thread_to_device): Remove CUDA_ERROR_NOT_PERMITTED case. (nvptx_open_device): Add free_blocks initialization, remove init_streams_for_device call. (nvptx_close_device): Remove fini_streams_for_device call, add free_blocks destruct code. (event_gc): Remove. (event_add): Remove. (nvptx_exec): Adjust parameters and code. (nvptx_free): Likewise. (nvptx_host2dev): Remove. (nvptx_dev2host): Remove. (nvptx_set_async): Remove. (nvptx_async_test): Remove. (nvptx_async_test_all): Remove. (nvptx_wait): Remove. (nvptx_wait_async): Remove. (nvptx_wait_all): Remove. (nvptx_wait_all_async): Remove. (nvptx_get_cuda_stream): Remove. (nvptx_set_cuda_stream): Remove. (GOMP_OFFLOAD_alloc): Adjust code. (GOMP_OFFLOAD_free): Likewise. (GOMP_OFFLOAD_openacc_register_async_cleanup): Remove. (GOMP_OFFLOAD_openacc_exec): Adjust parameters and code. (GOMP_OFFLOAD_openacc_async_test_all): Remove. (GOMP_OFFLOAD_openacc_async_wait): Remove. (GOMP_OFFLOAD_openacc_async_wait_async): Remove. (GOMP_OFFLOAD_openacc_async_wait_all): Remove. (GOMP_OFFLOAD_openacc_async_wait_all_async): Remove. (GOMP_OFFLOAD_openacc_async_set_async): Remove. (cuda_free_argmem): New function. (GOMP_OFFLOAD_openacc_async_exec): New plugin hook function. (GOMP_OFFLOAD_openacc_create_thread_data): Adjust code. (GOMP_OFFLOAD_openacc_cuda_get_stream): Adjust code. (GOMP_OFFLOAD_openacc_cuda_set_stream): Adjust code. (GOMP_OFFLOAD_openacc_async_construct): New plugin hook function. (GOMP_OFFLOAD_openacc_async_destruct): New plugin hook function. (GOMP_OFFLOAD_openacc_async_test): Remove and re-implement. (GOMP_OFFLOAD_openacc_async_synchronize): New plugin hook function. (GOMP_OFFLOAD_openacc_async_serialize): New plugin hook function. (GOMP_OFFLOAD_openacc_async_queue_callback): New plugin hook function. (cuda_callback_wrapper): New function. (cuda_memcpy_sanity_check): New function. (GOMP_OFFLOAD_host2dev): Remove and re-implement. (GOMP_OFFLOAD_dev2host): Remove and re-implement. (GOMP_OFFLOAD_openacc_async_host2dev): New plugin hook function. (GOMP_OFFLOAD_openacc_async_dev2host): New plugin hook function. From-SVN: r271128
Diffstat (limited to 'libgomp/target.c')
-rw-r--r--libgomp/target.c206
1 files changed, 148 insertions, 58 deletions
diff --git a/libgomp/target.c b/libgomp/target.c
index 3114800..2e0905e 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -177,6 +177,22 @@ gomp_device_copy (struct gomp_device_descr *devicep,
}
}
+static inline void
+goacc_device_copy_async (struct gomp_device_descr *devicep,
+ bool (*copy_func) (int, void *, const void *, size_t,
+ struct goacc_asyncqueue *),
+ const char *dst, void *dstaddr,
+ const char *src, const void *srcaddr,
+ size_t size, struct goacc_asyncqueue *aq)
+{
+ if (!copy_func (devicep->target_id, dstaddr, srcaddr, size, aq))
+ {
+ gomp_mutex_unlock (&devicep->lock);
+ gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
+ src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
+ }
+}
+
/* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
host to device memory transfers. */
@@ -269,8 +285,9 @@ gomp_to_device_kind_p (int kind)
}
}
-static void
+attribute_hidden void
gomp_copy_host2dev (struct gomp_device_descr *devicep,
+ struct goacc_asyncqueue *aq,
void *d, const void *h, size_t sz,
struct gomp_coalesce_buf *cbuf)
{
@@ -299,14 +316,23 @@ gomp_copy_host2dev (struct gomp_device_descr *devicep,
}
}
}
- gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz);
+ if (__builtin_expect (aq != NULL, 0))
+ goacc_device_copy_async (devicep, devicep->openacc.async.host2dev_func,
+ "dev", d, "host", h, sz, aq);
+ else
+ gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz);
}
-static void
+attribute_hidden void
gomp_copy_dev2host (struct gomp_device_descr *devicep,
+ struct goacc_asyncqueue *aq,
void *h, const void *d, size_t sz)
{
- gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
+ if (__builtin_expect (aq != NULL, 0))
+ goacc_device_copy_async (devicep, devicep->openacc.async.dev2host_func,
+ "host", h, "dev", d, sz, aq);
+ else
+ gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
}
static void
@@ -324,7 +350,8 @@ gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr)
Helper function of gomp_map_vars. */
static inline void
-gomp_map_vars_existing (struct gomp_device_descr *devicep, splay_tree_key oldn,
+gomp_map_vars_existing (struct gomp_device_descr *devicep,
+ struct goacc_asyncqueue *aq, splay_tree_key oldn,
splay_tree_key newn, struct target_var_desc *tgt_var,
unsigned char kind, struct gomp_coalesce_buf *cbuf)
{
@@ -346,7 +373,7 @@ gomp_map_vars_existing (struct gomp_device_descr *devicep, splay_tree_key oldn,
}
if (GOMP_MAP_ALWAYS_TO_P (kind))
- gomp_copy_host2dev (devicep,
+ gomp_copy_host2dev (devicep, aq,
(void *) (oldn->tgt->tgt_start + oldn->tgt_offset
+ newn->host_start - oldn->host_start),
(void *) newn->host_start,
@@ -364,8 +391,8 @@ get_kind (bool short_mapkind, void *kinds, int idx)
}
static void
-gomp_map_pointer (struct target_mem_desc *tgt, uintptr_t host_ptr,
- uintptr_t target_offset, uintptr_t bias,
+gomp_map_pointer (struct target_mem_desc *tgt, struct goacc_asyncqueue *aq,
+ uintptr_t host_ptr, uintptr_t target_offset, uintptr_t bias,
struct gomp_coalesce_buf *cbuf)
{
struct gomp_device_descr *devicep = tgt->device_descr;
@@ -376,7 +403,7 @@ gomp_map_pointer (struct target_mem_desc *tgt, uintptr_t host_ptr,
if (cur_node.host_start == (uintptr_t) NULL)
{
cur_node.tgt_offset = (uintptr_t) NULL;
- gomp_copy_host2dev (devicep,
+ gomp_copy_host2dev (devicep, aq,
(void *) (tgt->tgt_start + target_offset),
(void *) &cur_node.tgt_offset,
sizeof (void *), cbuf);
@@ -398,12 +425,13 @@ gomp_map_pointer (struct target_mem_desc *tgt, uintptr_t host_ptr,
array section. Now subtract bias to get what we want
to initialize the pointer with. */
cur_node.tgt_offset -= bias;
- gomp_copy_host2dev (devicep, (void *) (tgt->tgt_start + target_offset),
+ gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + target_offset),
(void *) &cur_node.tgt_offset, sizeof (void *), cbuf);
}
static void
-gomp_map_fields_existing (struct target_mem_desc *tgt, splay_tree_key n,
+gomp_map_fields_existing (struct target_mem_desc *tgt,
+ struct goacc_asyncqueue *aq, splay_tree_key n,
size_t first, size_t i, void **hostaddrs,
size_t *sizes, void *kinds,
struct gomp_coalesce_buf *cbuf)
@@ -423,7 +451,7 @@ gomp_map_fields_existing (struct target_mem_desc *tgt, splay_tree_key n,
&& n2->tgt == n->tgt
&& n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
{
- gomp_map_vars_existing (devicep, n2, &cur_node,
+ gomp_map_vars_existing (devicep, aq, n2, &cur_node,
&tgt->list[i], kind & typemask, cbuf);
return;
}
@@ -439,8 +467,8 @@ gomp_map_fields_existing (struct target_mem_desc *tgt, splay_tree_key n,
&& n2->host_start - n->host_start
== n2->tgt_offset - n->tgt_offset)
{
- gomp_map_vars_existing (devicep, n2, &cur_node, &tgt->list[i],
- kind & typemask, cbuf);
+ gomp_map_vars_existing (devicep, aq, n2, &cur_node,
+ &tgt->list[i], kind & typemask, cbuf);
return;
}
}
@@ -451,7 +479,7 @@ gomp_map_fields_existing (struct target_mem_desc *tgt, splay_tree_key n,
&& n2->tgt == n->tgt
&& n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
{
- gomp_map_vars_existing (devicep, n2, &cur_node, &tgt->list[i],
+ gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
kind & typemask, cbuf);
return;
}
@@ -483,10 +511,12 @@ gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i)
return tgt->tgt_start + tgt->list[i].offset;
}
-attribute_hidden struct target_mem_desc *
-gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
- void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
- bool short_mapkind, enum gomp_map_vars_kind pragma_kind)
+static inline __attribute__((always_inline)) struct target_mem_desc *
+gomp_map_vars_internal (struct gomp_device_descr *devicep,
+ struct goacc_asyncqueue *aq, size_t mapnum,
+ void **hostaddrs, void **devaddrs, size_t *sizes,
+ void *kinds, bool short_mapkind,
+ enum gomp_map_vars_kind pragma_kind)
{
size_t i, tgt_align, tgt_size, not_found_cnt = 0;
bool has_firstprivate = false;
@@ -600,7 +630,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
continue;
}
for (i = first; i <= last; i++)
- gomp_map_fields_existing (tgt, n, first, i, hostaddrs,
+ gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
sizes, kinds, NULL);
i--;
continue;
@@ -645,7 +675,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
else
n = splay_tree_lookup (mem_map, &cur_node);
if (n && n->refcount != REFCOUNT_LINK)
- gomp_map_vars_existing (devicep, n, &cur_node, &tgt->list[i],
+ gomp_map_vars_existing (devicep, aq, n, &cur_node, &tgt->list[i],
kind & typemask, NULL);
else
{
@@ -756,7 +786,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
tgt_size = (tgt_size + align - 1) & ~(align - 1);
tgt->list[i].offset = tgt_size;
len = sizes[i];
- gomp_copy_host2dev (devicep,
+ gomp_copy_host2dev (devicep, aq,
(void *) (tgt->tgt_start + tgt_size),
(void *) hostaddrs[i], len, cbufp);
tgt_size += len;
@@ -790,7 +820,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
continue;
}
for (i = first; i <= last; i++)
- gomp_map_fields_existing (tgt, n, first, i, hostaddrs,
+ gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
sizes, kinds, cbufp);
i--;
continue;
@@ -810,7 +840,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1);
if (cur_node.tgt_offset)
cur_node.tgt_offset -= sizes[i];
- gomp_copy_host2dev (devicep,
+ gomp_copy_host2dev (devicep, aq,
(void *) (n->tgt->tgt_start
+ n->tgt_offset
+ cur_node.host_start
@@ -831,7 +861,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
k->host_end = k->host_start + sizeof (void *);
splay_tree_key n = splay_tree_lookup (mem_map, k);
if (n && n->refcount != REFCOUNT_LINK)
- gomp_map_vars_existing (devicep, n, k, &tgt->list[i],
+ gomp_map_vars_existing (devicep, aq, n, k, &tgt->list[i],
kind & typemask, cbufp);
else
{
@@ -884,18 +914,19 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
case GOMP_MAP_FORCE_TOFROM:
case GOMP_MAP_ALWAYS_TO:
case GOMP_MAP_ALWAYS_TOFROM:
- gomp_copy_host2dev (devicep,
+ gomp_copy_host2dev (devicep, aq,
(void *) (tgt->tgt_start
+ k->tgt_offset),
(void *) k->host_start,
k->host_end - k->host_start, cbufp);
break;
case GOMP_MAP_POINTER:
- gomp_map_pointer (tgt, (uintptr_t) *(void **) k->host_start,
+ gomp_map_pointer (tgt, aq,
+ (uintptr_t) *(void **) k->host_start,
k->tgt_offset, sizes[i], cbufp);
break;
case GOMP_MAP_TO_PSET:
- gomp_copy_host2dev (devicep,
+ gomp_copy_host2dev (devicep, aq,
(void *) (tgt->tgt_start
+ k->tgt_offset),
(void *) k->host_start,
@@ -917,7 +948,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
tgt->list[j].always_copy_from = false;
if (k->refcount != REFCOUNT_INFINITY)
k->refcount++;
- gomp_map_pointer (tgt,
+ gomp_map_pointer (tgt, aq,
(uintptr_t) *(void **) hostaddrs[j],
k->tgt_offset
+ ((uintptr_t) hostaddrs[j]
@@ -946,7 +977,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
break;
case GOMP_MAP_FORCE_DEVICEPTR:
assert (k->host_end - k->host_start == sizeof (void *));
- gomp_copy_host2dev (devicep,
+ gomp_copy_host2dev (devicep, aq,
(void *) (tgt->tgt_start
+ k->tgt_offset),
(void *) k->host_start,
@@ -965,7 +996,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset);
/* We intentionally do not use coalescing here, as it's not
data allocated by the current call to this function. */
- gomp_copy_host2dev (devicep, (void *) n->tgt_offset,
+ gomp_copy_host2dev (devicep, aq, (void *) n->tgt_offset,
&tgt_addr, sizeof (void *), NULL);
}
array++;
@@ -978,7 +1009,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
for (i = 0; i < mapnum; i++)
{
cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i);
- gomp_copy_host2dev (devicep,
+ gomp_copy_host2dev (devicep, aq,
(void *) (tgt->tgt_start + i * sizeof (void *)),
(void *) &cur_node.tgt_offset, sizeof (void *),
cbufp);
@@ -989,7 +1020,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
{
long c = 0;
for (c = 0; c < cbuf.chunk_cnt; ++c)
- gomp_copy_host2dev (devicep,
+ gomp_copy_host2dev (devicep, aq,
(void *) (tgt->tgt_start + cbuf.chunks[c].start),
(char *) cbuf.buf + (cbuf.chunks[c].start
- cbuf.chunks[0].start),
@@ -1012,7 +1043,27 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
return tgt;
}
-static void
+attribute_hidden struct target_mem_desc *
+gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
+ void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
+ bool short_mapkind, enum gomp_map_vars_kind pragma_kind)
+{
+ return gomp_map_vars_internal (devicep, NULL, mapnum, hostaddrs, devaddrs,
+ sizes, kinds, short_mapkind, pragma_kind);
+}
+
+attribute_hidden struct target_mem_desc *
+gomp_map_vars_async (struct gomp_device_descr *devicep,
+ struct goacc_asyncqueue *aq, size_t mapnum,
+ void **hostaddrs, void **devaddrs, size_t *sizes,
+ void *kinds, bool short_mapkind,
+ enum gomp_map_vars_kind pragma_kind)
+{
+ return gomp_map_vars_internal (devicep, aq, mapnum, hostaddrs, devaddrs,
+ sizes, kinds, short_mapkind, pragma_kind);
+}
+
+attribute_hidden void
gomp_unmap_tgt (struct target_mem_desc *tgt)
{
/* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */
@@ -1040,12 +1091,24 @@ gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k)
return is_tgt_unmapped;
}
+static void
+gomp_unref_tgt (void *ptr)
+{
+ struct target_mem_desc *tgt = (struct target_mem_desc *) ptr;
+
+ if (tgt->refcount > 1)
+ tgt->refcount--;
+ else
+ gomp_unmap_tgt (tgt);
+}
+
/* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant
variables back from device to host: if it is false, it is assumed that this
has been done already. */
-attribute_hidden void
-gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
+static inline __attribute__((always_inline)) void
+gomp_unmap_vars_internal (struct target_mem_desc *tgt, bool do_copyfrom,
+ struct goacc_asyncqueue *aq)
{
struct gomp_device_descr *devicep = tgt->device_descr;
@@ -1082,7 +1145,7 @@ gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
if ((do_unmap && do_copyfrom && tgt->list[i].copy_from)
|| tgt->list[i].always_copy_from)
- gomp_copy_dev2host (devicep,
+ gomp_copy_dev2host (devicep, aq,
(void *) (k->host_start + tgt->list[i].offset),
(void *) (k->tgt->tgt_start + k->tgt_offset
+ tgt->list[i].offset),
@@ -1091,14 +1154,28 @@ gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
gomp_remove_var (devicep, k);
}
- if (tgt->refcount > 1)
- tgt->refcount--;
+ if (aq)
+ devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt,
+ (void *) tgt);
else
- gomp_unmap_tgt (tgt);
+ gomp_unref_tgt ((void *) tgt);
gomp_mutex_unlock (&devicep->lock);
}
+attribute_hidden void
+gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
+{
+ gomp_unmap_vars_internal (tgt, do_copyfrom, NULL);
+}
+
+attribute_hidden void
+gomp_unmap_vars_async (struct target_mem_desc *tgt, bool do_copyfrom,
+ struct goacc_asyncqueue *aq)
+{
+ gomp_unmap_vars_internal (tgt, do_copyfrom, aq);
+}
+
static void
gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
size_t *sizes, void *kinds, bool short_mapkind)
@@ -1148,9 +1225,10 @@ gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
size_t size = cur_node.host_end - cur_node.host_start;
if (GOMP_MAP_COPY_TO_P (kind & typemask))
- gomp_copy_host2dev (devicep, devaddr, hostaddr, size, NULL);
+ gomp_copy_host2dev (devicep, NULL, devaddr, hostaddr, size,
+ NULL);
if (GOMP_MAP_COPY_FROM_P (kind & typemask))
- gomp_copy_dev2host (devicep, hostaddr, devaddr, size);
+ gomp_copy_dev2host (devicep, NULL, hostaddr, devaddr, size);
}
}
gomp_mutex_unlock (&devicep->lock);
@@ -1443,9 +1521,24 @@ gomp_init_device (struct gomp_device_descr *devicep)
false);
}
+ /* Initialize OpenACC asynchronous queues. */
+ goacc_init_asyncqueues (devicep);
+
devicep->state = GOMP_DEVICE_INITIALIZED;
}
+/* This function finalizes the target device, specified by DEVICEP. DEVICEP
+ must be locked on entry, and remains locked on return. */
+
+attribute_hidden bool
+gomp_fini_device (struct gomp_device_descr *devicep)
+{
+ bool ret = goacc_fini_asyncqueues (devicep);
+ ret &= devicep->fini_device_func (devicep->target_id);
+ devicep->state = GOMP_DEVICE_FINALIZED;
+ return ret;
+}
+
attribute_hidden void
gomp_unload_device (struct gomp_device_descr *devicep)
{
@@ -1954,7 +2047,7 @@ gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
if ((kind == GOMP_MAP_FROM && k->refcount == 0)
|| kind == GOMP_MAP_ALWAYS_FROM)
- gomp_copy_dev2host (devicep, (void *) cur_node.host_start,
+ gomp_copy_dev2host (devicep, NULL, (void *) cur_node.host_start,
(void *) (k->tgt->tgt_start + k->tgt_offset
+ cur_node.host_start
- k->host_start),
@@ -2636,20 +2729,20 @@ gomp_load_plugin_for_device (struct gomp_device_descr *device,
if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
{
if (!DLSYM_OPT (openacc.exec, openacc_exec)
- || !DLSYM_OPT (openacc.register_async_cleanup,
- openacc_register_async_cleanup)
- || !DLSYM_OPT (openacc.async_test, openacc_async_test)
- || !DLSYM_OPT (openacc.async_test_all, openacc_async_test_all)
- || !DLSYM_OPT (openacc.async_wait, openacc_async_wait)
- || !DLSYM_OPT (openacc.async_wait_async, openacc_async_wait_async)
- || !DLSYM_OPT (openacc.async_wait_all, openacc_async_wait_all)
- || !DLSYM_OPT (openacc.async_wait_all_async,
- openacc_async_wait_all_async)
- || !DLSYM_OPT (openacc.async_set_async, openacc_async_set_async)
|| !DLSYM_OPT (openacc.create_thread_data,
openacc_create_thread_data)
|| !DLSYM_OPT (openacc.destroy_thread_data,
- openacc_destroy_thread_data))
+ openacc_destroy_thread_data)
+ || !DLSYM_OPT (openacc.async.construct, openacc_async_construct)
+ || !DLSYM_OPT (openacc.async.destruct, openacc_async_destruct)
+ || !DLSYM_OPT (openacc.async.test, openacc_async_test)
+ || !DLSYM_OPT (openacc.async.synchronize, openacc_async_synchronize)
+ || !DLSYM_OPT (openacc.async.serialize, openacc_async_serialize)
+ || !DLSYM_OPT (openacc.async.queue_callback,
+ openacc_async_queue_callback)
+ || !DLSYM_OPT (openacc.async.exec, openacc_async_exec)
+ || !DLSYM_OPT (openacc.async.dev2host, openacc_async_dev2host)
+ || !DLSYM_OPT (openacc.async.host2dev, openacc_async_host2dev))
{
/* Require all the OpenACC handlers if we have
GOMP_OFFLOAD_CAP_OPENACC_200. */
@@ -2700,10 +2793,7 @@ gomp_target_fini (void)
struct gomp_device_descr *devicep = &devices[i];
gomp_mutex_lock (&devicep->lock);
if (devicep->state == GOMP_DEVICE_INITIALIZED)
- {
- ret = devicep->fini_device_func (devicep->target_id);
- devicep->state = GOMP_DEVICE_FINALIZED;
- }
+ ret = gomp_fini_device (devicep);
gomp_mutex_unlock (&devicep->lock);
if (!ret)
gomp_fatal ("device finalization failed");