aboutsummaryrefslogtreecommitdiff
path: root/libgomp/oacc-async.c
diff options
context:
space:
mode:
authorChung-Lin Tang <cltang@codesourcery.com>2019-05-13 13:32:00 +0000
committerChung-Lin Tang <cltang@gcc.gnu.org>2019-05-13 13:32:00 +0000
commit1f4c5b9bb2eb81880e2bc725435d596fcd2bdfef (patch)
tree5bc5ab9a965b79bc46065ff6f4ac986b34239caa /libgomp/oacc-async.c
parentf78f5d239203d0da6638bc5e20080971d61d09d9 (diff)
downloadgcc-1f4c5b9bb2eb81880e2bc725435d596fcd2bdfef.zip
gcc-1f4c5b9bb2eb81880e2bc725435d596fcd2bdfef.tar.gz
gcc-1f4c5b9bb2eb81880e2bc725435d596fcd2bdfef.tar.bz2
2019-05-13 Chung-Lin Tang <cltang@codesourcery.com>
Reviewed-by: Thomas Schwinge <thomas@codesourcery.com> libgomp/ * libgomp-plugin.h (struct goacc_asyncqueue): Declare. (struct goacc_asyncqueue_list): Likewise. (goacc_aq): Likewise. (goacc_aq_list): Likewise. (GOMP_OFFLOAD_openacc_register_async_cleanup): Remove. (GOMP_OFFLOAD_openacc_async_test): Remove. (GOMP_OFFLOAD_openacc_async_test_all): Remove. (GOMP_OFFLOAD_openacc_async_wait): Remove. (GOMP_OFFLOAD_openacc_async_wait_async): Remove. (GOMP_OFFLOAD_openacc_async_wait_all): Remove. (GOMP_OFFLOAD_openacc_async_wait_all_async): Remove. (GOMP_OFFLOAD_openacc_async_set_async): Remove. (GOMP_OFFLOAD_openacc_exec): Adjust declaration. (GOMP_OFFLOAD_openacc_cuda_get_stream): Likewise. (GOMP_OFFLOAD_openacc_cuda_set_stream): Likewise. (GOMP_OFFLOAD_openacc_async_exec): Declare. (GOMP_OFFLOAD_openacc_async_construct): Declare. (GOMP_OFFLOAD_openacc_async_destruct): Declare. (GOMP_OFFLOAD_openacc_async_test): Declare. (GOMP_OFFLOAD_openacc_async_synchronize): Declare. (GOMP_OFFLOAD_openacc_async_serialize): Declare. (GOMP_OFFLOAD_openacc_async_queue_callback): Declare. (GOMP_OFFLOAD_openacc_async_host2dev): Declare. (GOMP_OFFLOAD_openacc_async_dev2host): Declare. * libgomp.h (struct acc_dispatch_t): Define 'async' sub-struct. (gomp_acc_insert_pointer): Adjust declaration. (gomp_copy_host2dev): New declaration. (gomp_copy_dev2host): Likewise. (gomp_map_vars_async): Likewise. (gomp_unmap_tgt): Likewise. (gomp_unmap_vars_async): Likewise. (gomp_fini_device): Likewise. * oacc-async.c (get_goacc_thread): New function. (get_goacc_thread_device): New function. (lookup_goacc_asyncqueue): New function. (get_goacc_asyncqueue): New function. (acc_async_test): Adjust code to use new async design. (acc_async_test_all): Likewise. (acc_wait): Likewise. (acc_wait_async): Likewise. (acc_wait_all): Likewise. (acc_wait_all_async): Likewise. (goacc_async_free): New function. (goacc_init_asyncqueues): Likewise. (goacc_fini_asyncqueues): Likewise. * oacc-cuda.c (acc_get_cuda_stream): Adjust code to use new async design. (acc_set_cuda_stream): Likewise. * oacc-host.c (host_openacc_exec): Adjust parameters, remove 'async'. (host_openacc_register_async_cleanup): Remove. (host_openacc_async_exec): New function. (host_openacc_async_test): Adjust parameters. (host_openacc_async_test_all): Remove. (host_openacc_async_wait): Remove. (host_openacc_async_wait_async): Remove. (host_openacc_async_wait_all): Remove. (host_openacc_async_wait_all_async): Remove. (host_openacc_async_set_async): Remove. (host_openacc_async_synchronize): New function. (host_openacc_async_serialize): New function. (host_openacc_async_host2dev): New function. (host_openacc_async_dev2host): New function. (host_openacc_async_queue_callback): New function. (host_openacc_async_construct): New function. (host_openacc_async_destruct): New function. (struct gomp_device_descr host_dispatch): Remove initialization of old interface, add intialization of new async sub-struct. * oacc-init.c (acc_shutdown_1): Adjust to use gomp_fini_device. (goacc_attach_host_thread_to_device): Remove old async code usage. * oacc-int.h (goacc_init_asyncqueues): New declaration. (goacc_fini_asyncqueues): Likewise. (goacc_async_copyout_unmap_vars): Likewise. (goacc_async_free): Likewise. (get_goacc_asyncqueue): Likewise. (lookup_goacc_asyncqueue): Likewise. * oacc-mem.c (memcpy_tofrom_device): Adjust code to use new async design. (present_create_copy): Adjust code to use new async design. (delete_copyout): Likewise. (update_dev_host): Likewise. (gomp_acc_insert_pointer): Add async parameter, adjust code to use new async design. (gomp_acc_remove_pointer): Adjust code to use new async design. * oacc-parallel.c (GOACC_parallel_keyed): Adjust code to use new async design. (GOACC_enter_exit_data): Likewise. (goacc_wait): Likewise. (GOACC_update): Likewise. * oacc-plugin.c (GOMP_PLUGIN_async_unmap_vars): Change to assert fail when called, warn as obsolete in comment. * target.c (goacc_device_copy_async): New function. (gomp_copy_host2dev): Remove 'static', add goacc_asyncqueue parameter, add goacc_device_copy_async case. (gomp_copy_dev2host): Likewise. (gomp_map_vars_existing): Add goacc_asyncqueue parameter, adjust code. (gomp_map_pointer): Likewise. (gomp_map_fields_existing): Likewise. (gomp_map_vars_internal): New always_inline function, renamed from gomp_map_vars. (gomp_map_vars): Implement by calling gomp_map_vars_internal. (gomp_map_vars_async): Implement by calling gomp_map_vars_internal, passing goacc_asyncqueue argument. (gomp_unmap_tgt): Remove static, add attribute_hidden. (gomp_unref_tgt): New function. (gomp_unmap_vars_internal): New always_inline function, renamed from gomp_unmap_vars. (gomp_unmap_vars): Implement by calling gomp_unmap_vars_internal. (gomp_unmap_vars_async): Implement by calling gomp_unmap_vars_internal, passing goacc_asyncqueue argument. (gomp_fini_device): New function. (gomp_exit_data): Adjust gomp_copy_dev2host call. (gomp_load_plugin_for_device): Remove old interface, adjust to load new async interface. (gomp_target_fini): Adjust code to call gomp_fini_device. * plugin/plugin-nvptx.c (struct cuda_map): Remove. (struct ptx_stream): Remove. (struct nvptx_thread): Remove current_stream field. (cuda_map_create): Remove. (cuda_map_destroy): Remove. (map_init): Remove. (map_fini): Remove. (map_pop): Remove. (map_push): Remove. (struct goacc_asyncqueue): Define. (struct nvptx_callback): Define. (struct ptx_free_block): Define. (struct ptx_device): Remove null_stream, active_streams, async_streams, stream_lock, and next fields. (enum ptx_event_type): Remove. (struct ptx_event): Remove. (ptx_event_lock): Remove. (ptx_events): Remove. (init_streams_for_device): Remove. (fini_streams_for_device): Remove. (select_stream_for_async): Remove. (nvptx_init): Remove ptx_events and ptx_event_lock references. (nvptx_attach_host_thread_to_device): Remove CUDA_ERROR_NOT_PERMITTED case. (nvptx_open_device): Add free_blocks initialization, remove init_streams_for_device call. (nvptx_close_device): Remove fini_streams_for_device call, add free_blocks destruct code. (event_gc): Remove. (event_add): Remove. (nvptx_exec): Adjust parameters and code. (nvptx_free): Likewise. (nvptx_host2dev): Remove. (nvptx_dev2host): Remove. (nvptx_set_async): Remove. (nvptx_async_test): Remove. (nvptx_async_test_all): Remove. (nvptx_wait): Remove. (nvptx_wait_async): Remove. (nvptx_wait_all): Remove. (nvptx_wait_all_async): Remove. (nvptx_get_cuda_stream): Remove. (nvptx_set_cuda_stream): Remove. (GOMP_OFFLOAD_alloc): Adjust code. (GOMP_OFFLOAD_free): Likewise. (GOMP_OFFLOAD_openacc_register_async_cleanup): Remove. (GOMP_OFFLOAD_openacc_exec): Adjust parameters and code. (GOMP_OFFLOAD_openacc_async_test_all): Remove. (GOMP_OFFLOAD_openacc_async_wait): Remove. (GOMP_OFFLOAD_openacc_async_wait_async): Remove. (GOMP_OFFLOAD_openacc_async_wait_all): Remove. (GOMP_OFFLOAD_openacc_async_wait_all_async): Remove. (GOMP_OFFLOAD_openacc_async_set_async): Remove. (cuda_free_argmem): New function. (GOMP_OFFLOAD_openacc_async_exec): New plugin hook function. (GOMP_OFFLOAD_openacc_create_thread_data): Adjust code. (GOMP_OFFLOAD_openacc_cuda_get_stream): Adjust code. (GOMP_OFFLOAD_openacc_cuda_set_stream): Adjust code. (GOMP_OFFLOAD_openacc_async_construct): New plugin hook function. (GOMP_OFFLOAD_openacc_async_destruct): New plugin hook function. (GOMP_OFFLOAD_openacc_async_test): Remove and re-implement. (GOMP_OFFLOAD_openacc_async_synchronize): New plugin hook function. (GOMP_OFFLOAD_openacc_async_serialize): New plugin hook function. (GOMP_OFFLOAD_openacc_async_queue_callback): New plugin hook function. (cuda_callback_wrapper): New function. (cuda_memcpy_sanity_check): New function. (GOMP_OFFLOAD_host2dev): Remove and re-implement. (GOMP_OFFLOAD_dev2host): Remove and re-implement. (GOMP_OFFLOAD_openacc_async_host2dev): New plugin hook function. (GOMP_OFFLOAD_openacc_async_dev2host): New plugin hook function. From-SVN: r271128
Diffstat (limited to 'libgomp/oacc-async.c')
-rw-r--r--libgomp/oacc-async.c250
1 files changed, 223 insertions, 27 deletions
diff --git a/libgomp/oacc-async.c b/libgomp/oacc-async.c
index 915284d..51bb676 100644
--- a/libgomp/oacc-async.c
+++ b/libgomp/oacc-async.c
@@ -27,47 +27,160 @@
<http://www.gnu.org/licenses/>. */
#include <assert.h>
+#include <string.h>
#include "openacc.h"
#include "libgomp.h"
#include "oacc-int.h"
-int
-acc_async_test (int async)
+static struct goacc_thread *
+get_goacc_thread (void)
{
- if (!async_valid_p (async))
- gomp_fatal ("invalid async argument: %d", async);
-
struct goacc_thread *thr = goacc_thread ();
if (!thr || !thr->dev)
gomp_fatal ("no device active");
- return thr->dev->openacc.async_test_func (async);
+ return thr;
}
-int
-acc_async_test_all (void)
+static struct gomp_device_descr *
+get_goacc_thread_device (void)
{
struct goacc_thread *thr = goacc_thread ();
if (!thr || !thr->dev)
gomp_fatal ("no device active");
- return thr->dev->openacc.async_test_all_func ();
+ return thr->dev;
}
-void
-acc_wait (int async)
+static int
+validate_async_val (int async)
{
if (!async_valid_p (async))
- gomp_fatal ("invalid async argument: %d", async);
+ gomp_fatal ("invalid async-argument: %d", async);
+
+ if (async == acc_async_sync)
+ return -1;
+
+ if (async == acc_async_noval)
+ return 0;
+
+ if (async >= 0)
+ /* TODO: we reserve 0 for acc_async_noval before we can clarify the
+ semantics of "default_async". */
+ return 1 + async;
+ else
+ __builtin_unreachable ();
+}
+
+/* Return the asyncqueue to be used for OpenACC async-argument ASYNC. This
+ might return NULL if no asyncqueue is to be used. Otherwise, if CREATE,
+ create the asyncqueue if it doesn't exist yet. */
+
+attribute_hidden struct goacc_asyncqueue *
+lookup_goacc_asyncqueue (struct goacc_thread *thr, bool create, int async)
+{
+ async = validate_async_val (async);
+ if (async < 0)
+ return NULL;
+
+ struct goacc_asyncqueue *ret_aq = NULL;
+ struct gomp_device_descr *dev = thr->dev;
+
+ gomp_mutex_lock (&dev->openacc.async.lock);
+ if (!create
+ && (async >= dev->openacc.async.nasyncqueue
+ || !dev->openacc.async.asyncqueue[async]))
+ goto end;
+
+ if (async >= dev->openacc.async.nasyncqueue)
+ {
+ int diff = async + 1 - dev->openacc.async.nasyncqueue;
+ dev->openacc.async.asyncqueue
+ = gomp_realloc (dev->openacc.async.asyncqueue,
+ sizeof (goacc_aq) * (async + 1));
+ memset (dev->openacc.async.asyncqueue + dev->openacc.async.nasyncqueue,
+ 0, sizeof (goacc_aq) * diff);
+ dev->openacc.async.nasyncqueue = async + 1;
+ }
+
+ if (!dev->openacc.async.asyncqueue[async])
+ {
+ dev->openacc.async.asyncqueue[async] = dev->openacc.async.construct_func ();
+
+ if (!dev->openacc.async.asyncqueue[async])
+ {
+ gomp_mutex_unlock (&dev->openacc.async.lock);
+ gomp_fatal ("async %d creation failed", async);
+ }
+
+ /* Link new async queue into active list. */
+ goacc_aq_list n = gomp_malloc (sizeof (struct goacc_asyncqueue_list));
+ n->aq = dev->openacc.async.asyncqueue[async];
+ n->next = dev->openacc.async.active;
+ dev->openacc.async.active = n;
+ }
+
+ ret_aq = dev->openacc.async.asyncqueue[async];
+
+ end:
+ gomp_mutex_unlock (&dev->openacc.async.lock);
+ return ret_aq;
+}
+
+/* Return the asyncqueue to be used for OpenACC async-argument ASYNC. This
+ might return NULL if no asyncqueue is to be used. Otherwise, create the
+ asyncqueue if it doesn't exist yet. */
+
+attribute_hidden struct goacc_asyncqueue *
+get_goacc_asyncqueue (int async)
+{
+ struct goacc_thread *thr = get_goacc_thread ();
+ return lookup_goacc_asyncqueue (thr, true, async);
+}
+
+int
+acc_async_test (int async)
+{
struct goacc_thread *thr = goacc_thread ();
if (!thr || !thr->dev)
gomp_fatal ("no device active");
- thr->dev->openacc.async_wait_func (async);
+ goacc_aq aq = lookup_goacc_asyncqueue (thr, false, async);
+ if (!aq)
+ return 1;
+ else
+ return thr->dev->openacc.async.test_func (aq);
+}
+
+int
+acc_async_test_all (void)
+{
+ struct goacc_thread *thr = get_goacc_thread ();
+
+ int ret = 1;
+ gomp_mutex_lock (&thr->dev->openacc.async.lock);
+ for (goacc_aq_list l = thr->dev->openacc.async.active; l; l = l->next)
+ if (!thr->dev->openacc.async.test_func (l->aq))
+ {
+ ret = 0;
+ break;
+ }
+ gomp_mutex_unlock (&thr->dev->openacc.async.lock);
+ return ret;
+}
+
+void
+acc_wait (int async)
+{
+ struct goacc_thread *thr = get_goacc_thread ();
+
+ goacc_aq aq = lookup_goacc_asyncqueue (thr, false, async);
+ if (aq && !thr->dev->openacc.async.synchronize_func (aq))
+ gomp_fatal ("wait on %d failed", async);
}
/* acc_async_wait is an OpenACC 1.0 compatibility name for acc_wait. */
@@ -84,23 +197,46 @@ acc_async_wait (int async)
void
acc_wait_async (int async1, int async2)
{
- struct goacc_thread *thr = goacc_thread ();
+ struct goacc_thread *thr = get_goacc_thread ();
- if (!thr || !thr->dev)
- gomp_fatal ("no device active");
+ goacc_aq aq1 = lookup_goacc_asyncqueue (thr, false, async1);
+ /* TODO: Is this also correct for acc_async_sync, assuming that in this case,
+ we'll always be synchronous anyways? */
+ if (!aq1)
+ return;
+
+ goacc_aq aq2 = lookup_goacc_asyncqueue (thr, true, async2);
+ /* An async queue is always synchronized with itself. */
+ if (aq1 == aq2)
+ return;
- thr->dev->openacc.async_wait_async_func (async1, async2);
+ if (aq2)
+ {
+ if (!thr->dev->openacc.async.serialize_func (aq1, aq2))
+ gomp_fatal ("ordering of async ids %d and %d failed", async1, async2);
+ }
+ else
+ {
+ /* TODO: Local thread synchronization.
+ Necessary for the "async2 == acc_async_sync" case, or can just skip? */
+ if (!thr->dev->openacc.async.synchronize_func (aq1))
+ gomp_fatal ("wait on %d failed", async1);
+ }
}
void
acc_wait_all (void)
{
- struct goacc_thread *thr = goacc_thread ();
+ struct gomp_device_descr *dev = get_goacc_thread_device ();
- if (!thr || !thr->dev)
- gomp_fatal ("no device active");
+ bool ret = true;
+ gomp_mutex_lock (&dev->openacc.async.lock);
+ for (goacc_aq_list l = dev->openacc.async.active; l; l = l->next)
+ ret &= dev->openacc.async.synchronize_func (l->aq);
+ gomp_mutex_unlock (&dev->openacc.async.lock);
- thr->dev->openacc.async_wait_all_func ();
+ if (!ret)
+ gomp_fatal ("wait all failed");
}
/* acc_async_wait_all is an OpenACC 1.0 compatibility name for acc_wait_all. */
@@ -117,13 +253,73 @@ acc_async_wait_all (void)
void
acc_wait_all_async (int async)
{
- if (!async_valid_p (async))
- gomp_fatal ("invalid async argument: %d", async);
+ struct goacc_thread *thr = get_goacc_thread ();
- struct goacc_thread *thr = goacc_thread ();
+ goacc_aq waiting_queue = lookup_goacc_asyncqueue (thr, true, async);
- if (!thr || !thr->dev)
- gomp_fatal ("no device active");
+ bool ret = true;
+ gomp_mutex_lock (&thr->dev->openacc.async.lock);
+ for (goacc_aq_list l = thr->dev->openacc.async.active; l; l = l->next)
+ {
+ if (waiting_queue)
+ ret &= thr->dev->openacc.async.serialize_func (l->aq, waiting_queue);
+ else
+ /* TODO: Local thread synchronization.
+ Necessary for the "async2 == acc_async_sync" case, or can just skip? */
+ ret &= thr->dev->openacc.async.synchronize_func (l->aq);
+ }
+ gomp_mutex_unlock (&thr->dev->openacc.async.lock);
+
+ if (!ret)
+ gomp_fatal ("wait all async(%d) failed", async);
+}
+
+attribute_hidden void
+goacc_async_free (struct gomp_device_descr *devicep,
+ struct goacc_asyncqueue *aq, void *ptr)
+{
+ if (!aq)
+ free (ptr);
+ else
+ devicep->openacc.async.queue_callback_func (aq, free, ptr);
+}
+
+/* This function initializes the asyncqueues for the device specified by
+ DEVICEP. TODO DEVICEP must be locked on entry, and remains locked on
+ return. */
+
+attribute_hidden void
+goacc_init_asyncqueues (struct gomp_device_descr *devicep)
+{
+ devicep->openacc.async.nasyncqueue = 0;
+ devicep->openacc.async.asyncqueue = NULL;
+ devicep->openacc.async.active = NULL;
+ gomp_mutex_init (&devicep->openacc.async.lock);
+}
- thr->dev->openacc.async_wait_all_async_func (async);
+/* This function finalizes the asyncqueues for the device specified by DEVICEP.
+ TODO DEVICEP must be locked on entry, and remains locked on return. */
+
+attribute_hidden bool
+goacc_fini_asyncqueues (struct gomp_device_descr *devicep)
+{
+ bool ret = true;
+ gomp_mutex_lock (&devicep->openacc.async.lock);
+ if (devicep->openacc.async.nasyncqueue > 0)
+ {
+ goacc_aq_list next;
+ for (goacc_aq_list l = devicep->openacc.async.active; l; l = next)
+ {
+ ret &= devicep->openacc.async.destruct_func (l->aq);
+ next = l->next;
+ free (l);
+ }
+ free (devicep->openacc.async.asyncqueue);
+ devicep->openacc.async.nasyncqueue = 0;
+ devicep->openacc.async.asyncqueue = NULL;
+ devicep->openacc.async.active = NULL;
+ }
+ gomp_mutex_unlock (&devicep->openacc.async.lock);
+ gomp_mutex_destroy (&devicep->openacc.async.lock);
+ return ret;
}