aboutsummaryrefslogtreecommitdiff
path: root/libgomp/oacc-parallel.c
diff options
context:
space:
mode:
authorThomas Schwinge <thomas@codesourcery.com>2019-05-17 21:13:36 +0200
committerThomas Schwinge <tschwinge@gcc.gnu.org>2019-05-17 21:13:36 +0200
commit5fae049dc272144f8e61af94ee0ba42b270915e5 (patch)
tree6108e18ecb0a9f9190bb4d03730bba8cd80654f1 /libgomp/oacc-parallel.c
parentb48f44bf77a39fefc238a16cf1225c6464c82406 (diff)
downloadgcc-5fae049dc272144f8e61af94ee0ba42b270915e5.zip
gcc-5fae049dc272144f8e61af94ee0ba42b270915e5.tar.gz
gcc-5fae049dc272144f8e61af94ee0ba42b270915e5.tar.bz2
OpenACC Profiling Interface (incomplete)
libgomp/ * acc_prof.h: New file. * oacc-profiling.c: Likewise. * Makefile.am (nodist_libsubinclude_HEADERS, libgomp_la_SOURCES): Add these, respectively. * Makefile.in: Regenerate. * env.c (initialize_env): Call goacc_profiling_initialize. * oacc-plugin.c (GOMP_PLUGIN_goacc_thread) (GOMP_PLUGIN_goacc_profiling_dispatch): New functions. * oacc-plugin.h (GOMP_PLUGIN_goacc_thread) (GOMP_PLUGIN_goacc_profiling_dispatch): Declare. * libgomp.map (OACC_2.5.1): Add acc_prof_lookup, acc_prof_register, acc_prof_unregister, and acc_register_library. (GOMP_PLUGIN_1.3): Add GOMP_PLUGIN_goacc_profiling_dispatch, and GOMP_PLUGIN_goacc_thread. * oacc-int.h (struct goacc_thread): Add prof_info, api_info, prof_callbacks_enabled members. (goacc_prof_enabled, goacc_profiling_initialize) (_goacc_profiling_dispatch_p, _goacc_profiling_setup_p) (goacc_profiling_dispatch): Declare. (GOACC_PROF_ENABLED, GOACC_PROFILING_DISPATCH_P) (GOACC_PROFILING_SETUP_P): Define. * oacc-async.c (acc_async_test, acc_async_test_all, acc_wait) (acc_wait_async, acc_wait_all, acc_wait_all_async): Update for OpenACC Profiling Interface. * oacc-cuda.c (acc_get_current_cuda_device) (acc_get_current_cuda_context, acc_get_cuda_stream) (acc_set_cuda_stream): Likewise. * oacc-init.c (acc_init_1, goacc_attach_host_thread_to_device) (acc_init, acc_set_device_type, acc_get_device_type) (acc_get_device_num, goacc_lazy_initialize): Likewise. * oacc-mem.c (acc_malloc, acc_free, memcpy_tofrom_device) (acc_deviceptr, acc_hostptr, acc_is_present, acc_map_data) (acc_unmap_data, present_create_copy, delete_copyout) (update_dev_host): Likewise. * oacc-parallel.c (GOACC_parallel_keyed, GOACC_data_start) (GOACC_data_end, GOACC_enter_exit_data, GOACC_update, GOACC_wait): Likewise. * plugin/plugin-nvptx.c (nvptx_exec, nvptx_alloc, nvptx_free) (GOMP_OFFLOAD_openacc_exec, GOMP_OFFLOAD_openacc_async_exec): Likewise. * libgomp.texi: Update. * testsuite/libgomp.oacc-c-c++-common/acc_prof-dispatch-1.c: New file. * testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-valid_bytes-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-version-1.c: Likewise. From-SVN: r271346
Diffstat (limited to 'libgomp/oacc-parallel.c')
-rw-r--r--libgomp/oacc-parallel.c460
1 files changed, 436 insertions, 24 deletions
diff --git a/libgomp/oacc-parallel.c b/libgomp/oacc-parallel.c
index fa99a2a..e56330f 100644
--- a/libgomp/oacc-parallel.c
+++ b/libgomp/oacc-parallel.c
@@ -152,21 +152,75 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *),
thr = goacc_thread ();
acc_dev = thr->dev;
+ bool profiling_p = GOACC_PROFILING_DISPATCH_P (true);
+
+ acc_prof_info prof_info;
+ if (profiling_p)
+ {
+ thr->prof_info = &prof_info;
+
+ prof_info.event_type = acc_ev_compute_construct_start;
+ prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES;
+ prof_info.version = _ACC_PROF_INFO_VERSION;
+ prof_info.device_type = acc_device_type (acc_dev->type);
+ prof_info.device_number = acc_dev->target_id;
+ prof_info.thread_id = -1;
+ prof_info.async = async;
+ prof_info.async_queue = prof_info.async;
+ prof_info.src_file = NULL;
+ prof_info.func_name = NULL;
+ prof_info.line_no = -1;
+ prof_info.end_line_no = -1;
+ prof_info.func_line_no = -1;
+ prof_info.func_end_line_no = -1;
+ }
+ acc_event_info compute_construct_event_info;
+ if (profiling_p)
+ {
+ compute_construct_event_info.other_event.event_type
+ = prof_info.event_type;
+ compute_construct_event_info.other_event.valid_bytes
+ = _ACC_OTHER_EVENT_INFO_VALID_BYTES;
+ compute_construct_event_info.other_event.parent_construct
+ = acc_construct_parallel;
+ compute_construct_event_info.other_event.implicit = 0;
+ compute_construct_event_info.other_event.tool_info = NULL;
+ }
+ acc_api_info api_info;
+ if (profiling_p)
+ {
+ thr->api_info = &api_info;
+
+ api_info.device_api = acc_device_api_none;
+ api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES;
+ api_info.device_type = prof_info.device_type;
+ api_info.vendor = -1;
+ api_info.device_handle = NULL;
+ api_info.context_handle = NULL;
+ api_info.async_handle = NULL;
+ }
+
+ if (profiling_p)
+ goacc_profiling_dispatch (&prof_info, &compute_construct_event_info,
+ &api_info);
+
handle_ftn_pointers (mapnum, hostaddrs, sizes, kinds);
/* Host fallback if "if" clause is false or if the current device is set to
the host. */
if (flags & GOACC_FLAG_HOST_FALLBACK)
{
+ prof_info.device_type = acc_device_host;
+ api_info.device_type = prof_info.device_type;
goacc_save_and_set_bind (acc_device_host);
fn (hostaddrs);
goacc_restore_bind ();
- return;
+ goto out_prof;
}
else if (acc_device_type (acc_dev->type) == acc_device_host)
{
fn (hostaddrs);
- return;
+ goto out_prof;
}
/* Default: let the runtime choose. */
@@ -200,6 +254,13 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *),
if (async == GOMP_LAUNCH_OP_MAX)
async = va_arg (ap, unsigned);
+
+ if (profiling_p)
+ {
+ prof_info.async = async;
+ prof_info.async_queue = prof_info.async;
+ }
+
break;
}
@@ -233,10 +294,34 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *),
else
tgt_fn = (void (*)) fn;
+ acc_event_info enter_exit_data_event_info;
+ if (profiling_p)
+ {
+ prof_info.event_type = acc_ev_enter_data_start;
+ enter_exit_data_event_info.other_event.event_type
+ = prof_info.event_type;
+ enter_exit_data_event_info.other_event.valid_bytes
+ = _ACC_OTHER_EVENT_INFO_VALID_BYTES;
+ enter_exit_data_event_info.other_event.parent_construct
+ = compute_construct_event_info.other_event.parent_construct;
+ enter_exit_data_event_info.other_event.implicit = 1;
+ enter_exit_data_event_info.other_event.tool_info = NULL;
+ goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
+ &api_info);
+ }
+
goacc_aq aq = get_goacc_asyncqueue (async);
tgt = gomp_map_vars_async (acc_dev, aq, mapnum, hostaddrs, NULL, sizes, kinds,
true, GOMP_MAP_VARS_OPENACC);
+ if (profiling_p)
+ {
+ prof_info.event_type = acc_ev_enter_data_end;
+ enter_exit_data_event_info.other_event.event_type
+ = prof_info.event_type;
+ goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
+ &api_info);
+ }
devaddrs = gomp_alloca (sizeof (void *) * mapnum);
for (i = 0; i < mapnum; i++)
@@ -244,17 +329,46 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *),
+ tgt->list[i].key->tgt_offset
+ tgt->list[i].offset);
if (aq == NULL)
+ acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs, dims,
+ tgt);
+ else
+ acc_dev->openacc.async.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs,
+ dims, tgt, aq);
+
+ if (profiling_p)
{
- acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs,
- dims, tgt);
- /* If running synchronously, unmap immediately. */
- gomp_unmap_vars (tgt, true);
+ prof_info.event_type = acc_ev_exit_data_start;
+ enter_exit_data_event_info.other_event.event_type = prof_info.event_type;
+ enter_exit_data_event_info.other_event.tool_info = NULL;
+ goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
+ &api_info);
}
+
+ /* If running synchronously, unmap immediately. */
+ if (aq == NULL)
+ gomp_unmap_vars (tgt, true);
else
+ gomp_unmap_vars_async (tgt, true, aq);
+
+ if (profiling_p)
{
- acc_dev->openacc.async.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs,
- dims, tgt, aq);
- gomp_unmap_vars_async (tgt, true, aq);
+ prof_info.event_type = acc_ev_exit_data_end;
+ enter_exit_data_event_info.other_event.event_type = prof_info.event_type;
+ goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
+ &api_info);
+ }
+
+ out_prof:
+ if (profiling_p)
+ {
+ prof_info.event_type = acc_ev_compute_construct_end;
+ compute_construct_event_info.other_event.event_type
+ = prof_info.event_type;
+ goacc_profiling_dispatch (&prof_info, &compute_construct_event_info,
+ &api_info);
+
+ thr->prof_info = NULL;
+ thr->api_info = NULL;
}
}
@@ -293,16 +407,83 @@ GOACC_data_start (int flags_m, size_t mapnum,
struct goacc_thread *thr = goacc_thread ();
struct gomp_device_descr *acc_dev = thr->dev;
+ bool profiling_p = GOACC_PROFILING_DISPATCH_P (true);
+
+ acc_prof_info prof_info;
+ if (profiling_p)
+ {
+ thr->prof_info = &prof_info;
+
+ prof_info.event_type = acc_ev_enter_data_start;
+ prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES;
+ prof_info.version = _ACC_PROF_INFO_VERSION;
+ prof_info.device_type = acc_device_type (acc_dev->type);
+ prof_info.device_number = acc_dev->target_id;
+ prof_info.thread_id = -1;
+ prof_info.async = acc_async_sync; /* Always synchronous. */
+ prof_info.async_queue = prof_info.async;
+ prof_info.src_file = NULL;
+ prof_info.func_name = NULL;
+ prof_info.line_no = -1;
+ prof_info.end_line_no = -1;
+ prof_info.func_line_no = -1;
+ prof_info.func_end_line_no = -1;
+ }
+ acc_event_info enter_data_event_info;
+ if (profiling_p)
+ {
+ enter_data_event_info.other_event.event_type
+ = prof_info.event_type;
+ enter_data_event_info.other_event.valid_bytes
+ = _ACC_OTHER_EVENT_INFO_VALID_BYTES;
+ enter_data_event_info.other_event.parent_construct = acc_construct_data;
+ for (int i = 0; i < mapnum; ++i)
+ if ((kinds[i] & 0xff) == GOMP_MAP_USE_DEVICE_PTR)
+ {
+ /* If there is one such data mapping kind, then this is actually an
+ OpenACC 'host_data' construct. (GCC maps the OpenACC
+ 'host_data' construct to the OpenACC 'data' construct.) Apart
+ from artificial test cases (such as an OpenACC 'host_data'
+ construct's (implicit) device initialization when there hasn't
+ been any device data be set up before...), there can't really
+ any meaningful events be generated from OpenACC 'host_data'
+ constructs, though. */
+ enter_data_event_info.other_event.parent_construct
+ = acc_construct_host_data;
+ break;
+ }
+ enter_data_event_info.other_event.implicit = 0;
+ enter_data_event_info.other_event.tool_info = NULL;
+ }
+ acc_api_info api_info;
+ if (profiling_p)
+ {
+ thr->api_info = &api_info;
+
+ api_info.device_api = acc_device_api_none;
+ api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES;
+ api_info.device_type = prof_info.device_type;
+ api_info.vendor = -1;
+ api_info.device_handle = NULL;
+ api_info.context_handle = NULL;
+ api_info.async_handle = NULL;
+ }
+
+ if (profiling_p)
+ goacc_profiling_dispatch (&prof_info, &enter_data_event_info, &api_info);
+
/* Host fallback or 'do nothing'. */
if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
|| (flags & GOACC_FLAG_HOST_FALLBACK))
{
+ prof_info.device_type = acc_device_host;
+ api_info.device_type = prof_info.device_type;
tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true,
GOMP_MAP_VARS_OPENACC);
tgt->prev = thr->mapped_data;
thr->mapped_data = tgt;
- return;
+ goto out_prof;
}
gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__);
@@ -311,18 +492,90 @@ GOACC_data_start (int flags_m, size_t mapnum,
gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__);
tgt->prev = thr->mapped_data;
thr->mapped_data = tgt;
+
+ out_prof:
+ if (profiling_p)
+ {
+ prof_info.event_type = acc_ev_enter_data_end;
+ enter_data_event_info.other_event.event_type = prof_info.event_type;
+ goacc_profiling_dispatch (&prof_info, &enter_data_event_info, &api_info);
+
+ thr->prof_info = NULL;
+ thr->api_info = NULL;
+ }
}
void
GOACC_data_end (void)
{
struct goacc_thread *thr = goacc_thread ();
+ struct gomp_device_descr *acc_dev = thr->dev;
struct target_mem_desc *tgt = thr->mapped_data;
+ bool profiling_p = GOACC_PROFILING_DISPATCH_P (true);
+
+ acc_prof_info prof_info;
+ if (profiling_p)
+ {
+ thr->prof_info = &prof_info;
+
+ prof_info.event_type = acc_ev_exit_data_start;
+ prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES;
+ prof_info.version = _ACC_PROF_INFO_VERSION;
+ prof_info.device_type = acc_device_type (acc_dev->type);
+ prof_info.device_number = acc_dev->target_id;
+ prof_info.thread_id = -1;
+ prof_info.async = acc_async_sync; /* Always synchronous. */
+ prof_info.async_queue = prof_info.async;
+ prof_info.src_file = NULL;
+ prof_info.func_name = NULL;
+ prof_info.line_no = -1;
+ prof_info.end_line_no = -1;
+ prof_info.func_line_no = -1;
+ prof_info.func_end_line_no = -1;
+ }
+ acc_event_info exit_data_event_info;
+ if (profiling_p)
+ {
+ exit_data_event_info.other_event.event_type
+ = prof_info.event_type;
+ exit_data_event_info.other_event.valid_bytes
+ = _ACC_OTHER_EVENT_INFO_VALID_BYTES;
+ exit_data_event_info.other_event.parent_construct = acc_construct_data;
+ exit_data_event_info.other_event.implicit = 0;
+ exit_data_event_info.other_event.tool_info = NULL;
+ }
+ acc_api_info api_info;
+ if (profiling_p)
+ {
+ thr->api_info = &api_info;
+
+ api_info.device_api = acc_device_api_none;
+ api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES;
+ api_info.device_type = prof_info.device_type;
+ api_info.vendor = -1;
+ api_info.device_handle = NULL;
+ api_info.context_handle = NULL;
+ api_info.async_handle = NULL;
+ }
+
+ if (profiling_p)
+ goacc_profiling_dispatch (&prof_info, &exit_data_event_info, &api_info);
+
gomp_debug (0, " %s: restore mappings\n", __FUNCTION__);
thr->mapped_data = tgt->prev;
gomp_unmap_vars (tgt, true);
gomp_debug (0, " %s: mappings restored\n", __FUNCTION__);
+
+ if (profiling_p)
+ {
+ prof_info.event_type = acc_ev_exit_data_end;
+ exit_data_event_info.other_event.event_type = prof_info.event_type;
+ goacc_profiling_dispatch (&prof_info, &exit_data_event_info, &api_info);
+
+ thr->prof_info = NULL;
+ thr->api_info = NULL;
+ }
}
void
@@ -342,19 +595,6 @@ GOACC_enter_exit_data (int flags_m, size_t mapnum,
thr = goacc_thread ();
acc_dev = thr->dev;
- if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
- || (flags & GOACC_FLAG_HOST_FALLBACK))
- return;
-
- if (num_waits)
- {
- va_list ap;
-
- va_start (ap, num_waits);
- goacc_wait (async, num_waits, &ap);
- va_end (ap);
- }
-
/* Determine whether "finalize" semantics apply to all mappings of this
OpenACC directive. */
bool finalize = false;
@@ -394,6 +634,77 @@ GOACC_enter_exit_data (int flags_m, size_t mapnum,
kind);
}
+ bool profiling_p = GOACC_PROFILING_DISPATCH_P (true);
+
+ acc_prof_info prof_info;
+ if (profiling_p)
+ {
+ thr->prof_info = &prof_info;
+
+ prof_info.event_type
+ = data_enter ? acc_ev_enter_data_start : acc_ev_exit_data_start;
+ prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES;
+ prof_info.version = _ACC_PROF_INFO_VERSION;
+ prof_info.device_type = acc_device_type (acc_dev->type);
+ prof_info.device_number = acc_dev->target_id;
+ prof_info.thread_id = -1;
+ prof_info.async = async;
+ prof_info.async_queue = prof_info.async;
+ prof_info.src_file = NULL;
+ prof_info.func_name = NULL;
+ prof_info.line_no = -1;
+ prof_info.end_line_no = -1;
+ prof_info.func_line_no = -1;
+ prof_info.func_end_line_no = -1;
+ }
+ acc_event_info enter_exit_data_event_info;
+ if (profiling_p)
+ {
+ enter_exit_data_event_info.other_event.event_type
+ = prof_info.event_type;
+ enter_exit_data_event_info.other_event.valid_bytes
+ = _ACC_OTHER_EVENT_INFO_VALID_BYTES;
+ enter_exit_data_event_info.other_event.parent_construct
+ = data_enter ? acc_construct_enter_data : acc_construct_exit_data;
+ enter_exit_data_event_info.other_event.implicit = 0;
+ enter_exit_data_event_info.other_event.tool_info = NULL;
+ }
+ acc_api_info api_info;
+ if (profiling_p)
+ {
+ thr->api_info = &api_info;
+
+ api_info.device_api = acc_device_api_none;
+ api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES;
+ api_info.device_type = prof_info.device_type;
+ api_info.vendor = -1;
+ api_info.device_handle = NULL;
+ api_info.context_handle = NULL;
+ api_info.async_handle = NULL;
+ }
+
+ if (profiling_p)
+ goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
+ &api_info);
+
+ if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
+ || (flags & GOACC_FLAG_HOST_FALLBACK))
+ {
+ prof_info.device_type = acc_device_host;
+ api_info.device_type = prof_info.device_type;
+
+ goto out_prof;
+ }
+
+ if (num_waits)
+ {
+ va_list ap;
+
+ va_start (ap, num_waits);
+ goacc_wait (async, num_waits, &ap);
+ va_end (ap);
+ }
+
/* In c, non-pointers and arrays are represented by a single data clause.
Dynamically allocated arrays and subarrays are represented by a data
clause followed by an internal GOMP_MAP_POINTER.
@@ -486,6 +797,19 @@ GOACC_enter_exit_data (int flags_m, size_t mapnum,
i += pointer - 1;
}
}
+
+ out_prof:
+ if (profiling_p)
+ {
+ prof_info.event_type
+ = data_enter ? acc_ev_enter_data_end : acc_ev_exit_data_end;
+ enter_exit_data_event_info.other_event.event_type = prof_info.event_type;
+ goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info,
+ &api_info);
+
+ thr->prof_info = NULL;
+ thr->api_info = NULL;
+ }
}
static void
@@ -534,9 +858,64 @@ GOACC_update (int flags_m, size_t mapnum,
struct goacc_thread *thr = goacc_thread ();
struct gomp_device_descr *acc_dev = thr->dev;
+ bool profiling_p = GOACC_PROFILING_DISPATCH_P (true);
+
+ acc_prof_info prof_info;
+ if (profiling_p)
+ {
+ thr->prof_info = &prof_info;
+
+ prof_info.event_type = acc_ev_update_start;
+ prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES;
+ prof_info.version = _ACC_PROF_INFO_VERSION;
+ prof_info.device_type = acc_device_type (acc_dev->type);
+ prof_info.device_number = acc_dev->target_id;
+ prof_info.thread_id = -1;
+ prof_info.async = async;
+ prof_info.async_queue = prof_info.async;
+ prof_info.src_file = NULL;
+ prof_info.func_name = NULL;
+ prof_info.line_no = -1;
+ prof_info.end_line_no = -1;
+ prof_info.func_line_no = -1;
+ prof_info.func_end_line_no = -1;
+ }
+ acc_event_info update_event_info;
+ if (profiling_p)
+ {
+ update_event_info.other_event.event_type
+ = prof_info.event_type;
+ update_event_info.other_event.valid_bytes
+ = _ACC_OTHER_EVENT_INFO_VALID_BYTES;
+ update_event_info.other_event.parent_construct = acc_construct_update;
+ update_event_info.other_event.implicit = 0;
+ update_event_info.other_event.tool_info = NULL;
+ }
+ acc_api_info api_info;
+ if (profiling_p)
+ {
+ thr->api_info = &api_info;
+
+ api_info.device_api = acc_device_api_none;
+ api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES;
+ api_info.device_type = prof_info.device_type;
+ api_info.vendor = -1;
+ api_info.device_handle = NULL;
+ api_info.context_handle = NULL;
+ api_info.async_handle = NULL;
+ }
+
+ if (profiling_p)
+ goacc_profiling_dispatch (&prof_info, &update_event_info, &api_info);
+
if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
|| (flags & GOACC_FLAG_HOST_FALLBACK))
- return;
+ {
+ prof_info.device_type = acc_device_host;
+ api_info.device_type = prof_info.device_type;
+
+ goto out_prof;
+ }
if (num_waits)
{
@@ -608,11 +987,38 @@ GOACC_update (int flags_m, size_t mapnum,
break;
}
}
+
+ out_prof:
+ if (profiling_p)
+ {
+ prof_info.event_type = acc_ev_update_end;
+ update_event_info.other_event.event_type = prof_info.event_type;
+ goacc_profiling_dispatch (&prof_info, &update_event_info, &api_info);
+
+ thr->prof_info = NULL;
+ thr->api_info = NULL;
+ }
}
void
GOACC_wait (int async, int num_waits, ...)
{
+ goacc_lazy_initialize ();
+
+ struct goacc_thread *thr = goacc_thread ();
+
+ /* No nesting. */
+ assert (thr->prof_info == NULL);
+ assert (thr->api_info == NULL);
+ acc_prof_info prof_info;
+ acc_api_info api_info;
+ bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
+ if (profiling_p)
+ {
+ prof_info.async = async;
+ prof_info.async_queue = prof_info.async;
+ }
+
if (num_waits)
{
va_list ap;
@@ -625,6 +1031,12 @@ GOACC_wait (int async, int num_waits, ...)
acc_wait_all ();
else
acc_wait_all_async (async);
+
+ if (profiling_p)
+ {
+ thr->prof_info = NULL;
+ thr->api_info = NULL;
+ }
}
/* Legacy entry point (GCC 5). */