diff options
author | Thomas Schwinge <thomas@codesourcery.com> | 2019-05-17 21:13:36 +0200 |
---|---|---|
committer | Thomas Schwinge <tschwinge@gcc.gnu.org> | 2019-05-17 21:13:36 +0200 |
commit | 5fae049dc272144f8e61af94ee0ba42b270915e5 (patch) | |
tree | 6108e18ecb0a9f9190bb4d03730bba8cd80654f1 /libgomp/oacc-parallel.c | |
parent | b48f44bf77a39fefc238a16cf1225c6464c82406 (diff) | |
download | gcc-5fae049dc272144f8e61af94ee0ba42b270915e5.zip gcc-5fae049dc272144f8e61af94ee0ba42b270915e5.tar.gz gcc-5fae049dc272144f8e61af94ee0ba42b270915e5.tar.bz2 |
OpenACC Profiling Interface (incomplete)
libgomp/
* acc_prof.h: New file.
* oacc-profiling.c: Likewise.
* Makefile.am (nodist_libsubinclude_HEADERS, libgomp_la_SOURCES):
Add these, respectively.
* Makefile.in: Regenerate.
* env.c (initialize_env): Call goacc_profiling_initialize.
* oacc-plugin.c (GOMP_PLUGIN_goacc_thread)
(GOMP_PLUGIN_goacc_profiling_dispatch): New functions.
* oacc-plugin.h (GOMP_PLUGIN_goacc_thread)
(GOMP_PLUGIN_goacc_profiling_dispatch): Declare.
* libgomp.map (OACC_2.5.1): Add acc_prof_lookup,
acc_prof_register, acc_prof_unregister, and acc_register_library.
(GOMP_PLUGIN_1.3): Add GOMP_PLUGIN_goacc_profiling_dispatch, and
GOMP_PLUGIN_goacc_thread.
* oacc-int.h (struct goacc_thread): Add prof_info, api_info,
prof_callbacks_enabled members.
(goacc_prof_enabled, goacc_profiling_initialize)
(_goacc_profiling_dispatch_p, _goacc_profiling_setup_p)
(goacc_profiling_dispatch): Declare.
(GOACC_PROF_ENABLED, GOACC_PROFILING_DISPATCH_P)
(GOACC_PROFILING_SETUP_P): Define.
* oacc-async.c (acc_async_test, acc_async_test_all, acc_wait)
(acc_wait_async, acc_wait_all, acc_wait_all_async): Update for
OpenACC Profiling Interface.
* oacc-cuda.c (acc_get_current_cuda_device)
(acc_get_current_cuda_context, acc_get_cuda_stream)
(acc_set_cuda_stream): Likewise.
* oacc-init.c (acc_init_1, goacc_attach_host_thread_to_device)
(acc_init, acc_set_device_type, acc_get_device_type)
(acc_get_device_num, goacc_lazy_initialize): Likewise.
* oacc-mem.c (acc_malloc, acc_free, memcpy_tofrom_device)
(acc_deviceptr, acc_hostptr, acc_is_present, acc_map_data)
(acc_unmap_data, present_create_copy, delete_copyout)
(update_dev_host): Likewise.
* oacc-parallel.c (GOACC_parallel_keyed, GOACC_data_start)
(GOACC_data_end, GOACC_enter_exit_data, GOACC_update, GOACC_wait):
Likewise.
* plugin/plugin-nvptx.c (nvptx_exec, nvptx_alloc, nvptx_free)
(GOMP_OFFLOAD_openacc_exec, GOMP_OFFLOAD_openacc_async_exec):
Likewise.
* libgomp.texi: Update.
* testsuite/libgomp.oacc-c-c++-common/acc_prof-dispatch-1.c: New
file.
* testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c: Likewise.
* testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c:
Likewise.
* testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c:
Likewise.
* testsuite/libgomp.oacc-c-c++-common/acc_prof-valid_bytes-1.c:
Likewise.
* testsuite/libgomp.oacc-c-c++-common/acc_prof-version-1.c:
Likewise.
From-SVN: r271346
Diffstat (limited to 'libgomp/oacc-parallel.c')
-rw-r--r-- | libgomp/oacc-parallel.c | 460 |
1 files changed, 436 insertions, 24 deletions
diff --git a/libgomp/oacc-parallel.c b/libgomp/oacc-parallel.c index fa99a2a..e56330f 100644 --- a/libgomp/oacc-parallel.c +++ b/libgomp/oacc-parallel.c @@ -152,21 +152,75 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *), thr = goacc_thread (); acc_dev = thr->dev; + bool profiling_p = GOACC_PROFILING_DISPATCH_P (true); + + acc_prof_info prof_info; + if (profiling_p) + { + thr->prof_info = &prof_info; + + prof_info.event_type = acc_ev_compute_construct_start; + prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES; + prof_info.version = _ACC_PROF_INFO_VERSION; + prof_info.device_type = acc_device_type (acc_dev->type); + prof_info.device_number = acc_dev->target_id; + prof_info.thread_id = -1; + prof_info.async = async; + prof_info.async_queue = prof_info.async; + prof_info.src_file = NULL; + prof_info.func_name = NULL; + prof_info.line_no = -1; + prof_info.end_line_no = -1; + prof_info.func_line_no = -1; + prof_info.func_end_line_no = -1; + } + acc_event_info compute_construct_event_info; + if (profiling_p) + { + compute_construct_event_info.other_event.event_type + = prof_info.event_type; + compute_construct_event_info.other_event.valid_bytes + = _ACC_OTHER_EVENT_INFO_VALID_BYTES; + compute_construct_event_info.other_event.parent_construct + = acc_construct_parallel; + compute_construct_event_info.other_event.implicit = 0; + compute_construct_event_info.other_event.tool_info = NULL; + } + acc_api_info api_info; + if (profiling_p) + { + thr->api_info = &api_info; + + api_info.device_api = acc_device_api_none; + api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES; + api_info.device_type = prof_info.device_type; + api_info.vendor = -1; + api_info.device_handle = NULL; + api_info.context_handle = NULL; + api_info.async_handle = NULL; + } + + if (profiling_p) + goacc_profiling_dispatch (&prof_info, &compute_construct_event_info, + &api_info); + handle_ftn_pointers (mapnum, hostaddrs, sizes, kinds); /* Host fallback if "if" clause is false or if the current device is set to the host. */ if (flags & GOACC_FLAG_HOST_FALLBACK) { + prof_info.device_type = acc_device_host; + api_info.device_type = prof_info.device_type; goacc_save_and_set_bind (acc_device_host); fn (hostaddrs); goacc_restore_bind (); - return; + goto out_prof; } else if (acc_device_type (acc_dev->type) == acc_device_host) { fn (hostaddrs); - return; + goto out_prof; } /* Default: let the runtime choose. */ @@ -200,6 +254,13 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *), if (async == GOMP_LAUNCH_OP_MAX) async = va_arg (ap, unsigned); + + if (profiling_p) + { + prof_info.async = async; + prof_info.async_queue = prof_info.async; + } + break; } @@ -233,10 +294,34 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *), else tgt_fn = (void (*)) fn; + acc_event_info enter_exit_data_event_info; + if (profiling_p) + { + prof_info.event_type = acc_ev_enter_data_start; + enter_exit_data_event_info.other_event.event_type + = prof_info.event_type; + enter_exit_data_event_info.other_event.valid_bytes + = _ACC_OTHER_EVENT_INFO_VALID_BYTES; + enter_exit_data_event_info.other_event.parent_construct + = compute_construct_event_info.other_event.parent_construct; + enter_exit_data_event_info.other_event.implicit = 1; + enter_exit_data_event_info.other_event.tool_info = NULL; + goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info, + &api_info); + } + goacc_aq aq = get_goacc_asyncqueue (async); tgt = gomp_map_vars_async (acc_dev, aq, mapnum, hostaddrs, NULL, sizes, kinds, true, GOMP_MAP_VARS_OPENACC); + if (profiling_p) + { + prof_info.event_type = acc_ev_enter_data_end; + enter_exit_data_event_info.other_event.event_type + = prof_info.event_type; + goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info, + &api_info); + } devaddrs = gomp_alloca (sizeof (void *) * mapnum); for (i = 0; i < mapnum; i++) @@ -244,17 +329,46 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *), + tgt->list[i].key->tgt_offset + tgt->list[i].offset); if (aq == NULL) + acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs, dims, + tgt); + else + acc_dev->openacc.async.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs, + dims, tgt, aq); + + if (profiling_p) { - acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs, - dims, tgt); - /* If running synchronously, unmap immediately. */ - gomp_unmap_vars (tgt, true); + prof_info.event_type = acc_ev_exit_data_start; + enter_exit_data_event_info.other_event.event_type = prof_info.event_type; + enter_exit_data_event_info.other_event.tool_info = NULL; + goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info, + &api_info); } + + /* If running synchronously, unmap immediately. */ + if (aq == NULL) + gomp_unmap_vars (tgt, true); else + gomp_unmap_vars_async (tgt, true, aq); + + if (profiling_p) { - acc_dev->openacc.async.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs, - dims, tgt, aq); - gomp_unmap_vars_async (tgt, true, aq); + prof_info.event_type = acc_ev_exit_data_end; + enter_exit_data_event_info.other_event.event_type = prof_info.event_type; + goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info, + &api_info); + } + + out_prof: + if (profiling_p) + { + prof_info.event_type = acc_ev_compute_construct_end; + compute_construct_event_info.other_event.event_type + = prof_info.event_type; + goacc_profiling_dispatch (&prof_info, &compute_construct_event_info, + &api_info); + + thr->prof_info = NULL; + thr->api_info = NULL; } } @@ -293,16 +407,83 @@ GOACC_data_start (int flags_m, size_t mapnum, struct goacc_thread *thr = goacc_thread (); struct gomp_device_descr *acc_dev = thr->dev; + bool profiling_p = GOACC_PROFILING_DISPATCH_P (true); + + acc_prof_info prof_info; + if (profiling_p) + { + thr->prof_info = &prof_info; + + prof_info.event_type = acc_ev_enter_data_start; + prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES; + prof_info.version = _ACC_PROF_INFO_VERSION; + prof_info.device_type = acc_device_type (acc_dev->type); + prof_info.device_number = acc_dev->target_id; + prof_info.thread_id = -1; + prof_info.async = acc_async_sync; /* Always synchronous. */ + prof_info.async_queue = prof_info.async; + prof_info.src_file = NULL; + prof_info.func_name = NULL; + prof_info.line_no = -1; + prof_info.end_line_no = -1; + prof_info.func_line_no = -1; + prof_info.func_end_line_no = -1; + } + acc_event_info enter_data_event_info; + if (profiling_p) + { + enter_data_event_info.other_event.event_type + = prof_info.event_type; + enter_data_event_info.other_event.valid_bytes + = _ACC_OTHER_EVENT_INFO_VALID_BYTES; + enter_data_event_info.other_event.parent_construct = acc_construct_data; + for (int i = 0; i < mapnum; ++i) + if ((kinds[i] & 0xff) == GOMP_MAP_USE_DEVICE_PTR) + { + /* If there is one such data mapping kind, then this is actually an + OpenACC 'host_data' construct. (GCC maps the OpenACC + 'host_data' construct to the OpenACC 'data' construct.) Apart + from artificial test cases (such as an OpenACC 'host_data' + construct's (implicit) device initialization when there hasn't + been any device data be set up before...), there can't really + any meaningful events be generated from OpenACC 'host_data' + constructs, though. */ + enter_data_event_info.other_event.parent_construct + = acc_construct_host_data; + break; + } + enter_data_event_info.other_event.implicit = 0; + enter_data_event_info.other_event.tool_info = NULL; + } + acc_api_info api_info; + if (profiling_p) + { + thr->api_info = &api_info; + + api_info.device_api = acc_device_api_none; + api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES; + api_info.device_type = prof_info.device_type; + api_info.vendor = -1; + api_info.device_handle = NULL; + api_info.context_handle = NULL; + api_info.async_handle = NULL; + } + + if (profiling_p) + goacc_profiling_dispatch (&prof_info, &enter_data_event_info, &api_info); + /* Host fallback or 'do nothing'. */ if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) || (flags & GOACC_FLAG_HOST_FALLBACK)) { + prof_info.device_type = acc_device_host; + api_info.device_type = prof_info.device_type; tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true, GOMP_MAP_VARS_OPENACC); tgt->prev = thr->mapped_data; thr->mapped_data = tgt; - return; + goto out_prof; } gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__); @@ -311,18 +492,90 @@ GOACC_data_start (int flags_m, size_t mapnum, gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__); tgt->prev = thr->mapped_data; thr->mapped_data = tgt; + + out_prof: + if (profiling_p) + { + prof_info.event_type = acc_ev_enter_data_end; + enter_data_event_info.other_event.event_type = prof_info.event_type; + goacc_profiling_dispatch (&prof_info, &enter_data_event_info, &api_info); + + thr->prof_info = NULL; + thr->api_info = NULL; + } } void GOACC_data_end (void) { struct goacc_thread *thr = goacc_thread (); + struct gomp_device_descr *acc_dev = thr->dev; struct target_mem_desc *tgt = thr->mapped_data; + bool profiling_p = GOACC_PROFILING_DISPATCH_P (true); + + acc_prof_info prof_info; + if (profiling_p) + { + thr->prof_info = &prof_info; + + prof_info.event_type = acc_ev_exit_data_start; + prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES; + prof_info.version = _ACC_PROF_INFO_VERSION; + prof_info.device_type = acc_device_type (acc_dev->type); + prof_info.device_number = acc_dev->target_id; + prof_info.thread_id = -1; + prof_info.async = acc_async_sync; /* Always synchronous. */ + prof_info.async_queue = prof_info.async; + prof_info.src_file = NULL; + prof_info.func_name = NULL; + prof_info.line_no = -1; + prof_info.end_line_no = -1; + prof_info.func_line_no = -1; + prof_info.func_end_line_no = -1; + } + acc_event_info exit_data_event_info; + if (profiling_p) + { + exit_data_event_info.other_event.event_type + = prof_info.event_type; + exit_data_event_info.other_event.valid_bytes + = _ACC_OTHER_EVENT_INFO_VALID_BYTES; + exit_data_event_info.other_event.parent_construct = acc_construct_data; + exit_data_event_info.other_event.implicit = 0; + exit_data_event_info.other_event.tool_info = NULL; + } + acc_api_info api_info; + if (profiling_p) + { + thr->api_info = &api_info; + + api_info.device_api = acc_device_api_none; + api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES; + api_info.device_type = prof_info.device_type; + api_info.vendor = -1; + api_info.device_handle = NULL; + api_info.context_handle = NULL; + api_info.async_handle = NULL; + } + + if (profiling_p) + goacc_profiling_dispatch (&prof_info, &exit_data_event_info, &api_info); + gomp_debug (0, " %s: restore mappings\n", __FUNCTION__); thr->mapped_data = tgt->prev; gomp_unmap_vars (tgt, true); gomp_debug (0, " %s: mappings restored\n", __FUNCTION__); + + if (profiling_p) + { + prof_info.event_type = acc_ev_exit_data_end; + exit_data_event_info.other_event.event_type = prof_info.event_type; + goacc_profiling_dispatch (&prof_info, &exit_data_event_info, &api_info); + + thr->prof_info = NULL; + thr->api_info = NULL; + } } void @@ -342,19 +595,6 @@ GOACC_enter_exit_data (int flags_m, size_t mapnum, thr = goacc_thread (); acc_dev = thr->dev; - if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) - || (flags & GOACC_FLAG_HOST_FALLBACK)) - return; - - if (num_waits) - { - va_list ap; - - va_start (ap, num_waits); - goacc_wait (async, num_waits, &ap); - va_end (ap); - } - /* Determine whether "finalize" semantics apply to all mappings of this OpenACC directive. */ bool finalize = false; @@ -394,6 +634,77 @@ GOACC_enter_exit_data (int flags_m, size_t mapnum, kind); } + bool profiling_p = GOACC_PROFILING_DISPATCH_P (true); + + acc_prof_info prof_info; + if (profiling_p) + { + thr->prof_info = &prof_info; + + prof_info.event_type + = data_enter ? acc_ev_enter_data_start : acc_ev_exit_data_start; + prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES; + prof_info.version = _ACC_PROF_INFO_VERSION; + prof_info.device_type = acc_device_type (acc_dev->type); + prof_info.device_number = acc_dev->target_id; + prof_info.thread_id = -1; + prof_info.async = async; + prof_info.async_queue = prof_info.async; + prof_info.src_file = NULL; + prof_info.func_name = NULL; + prof_info.line_no = -1; + prof_info.end_line_no = -1; + prof_info.func_line_no = -1; + prof_info.func_end_line_no = -1; + } + acc_event_info enter_exit_data_event_info; + if (profiling_p) + { + enter_exit_data_event_info.other_event.event_type + = prof_info.event_type; + enter_exit_data_event_info.other_event.valid_bytes + = _ACC_OTHER_EVENT_INFO_VALID_BYTES; + enter_exit_data_event_info.other_event.parent_construct + = data_enter ? acc_construct_enter_data : acc_construct_exit_data; + enter_exit_data_event_info.other_event.implicit = 0; + enter_exit_data_event_info.other_event.tool_info = NULL; + } + acc_api_info api_info; + if (profiling_p) + { + thr->api_info = &api_info; + + api_info.device_api = acc_device_api_none; + api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES; + api_info.device_type = prof_info.device_type; + api_info.vendor = -1; + api_info.device_handle = NULL; + api_info.context_handle = NULL; + api_info.async_handle = NULL; + } + + if (profiling_p) + goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info, + &api_info); + + if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) + || (flags & GOACC_FLAG_HOST_FALLBACK)) + { + prof_info.device_type = acc_device_host; + api_info.device_type = prof_info.device_type; + + goto out_prof; + } + + if (num_waits) + { + va_list ap; + + va_start (ap, num_waits); + goacc_wait (async, num_waits, &ap); + va_end (ap); + } + /* In c, non-pointers and arrays are represented by a single data clause. Dynamically allocated arrays and subarrays are represented by a data clause followed by an internal GOMP_MAP_POINTER. @@ -486,6 +797,19 @@ GOACC_enter_exit_data (int flags_m, size_t mapnum, i += pointer - 1; } } + + out_prof: + if (profiling_p) + { + prof_info.event_type + = data_enter ? acc_ev_enter_data_end : acc_ev_exit_data_end; + enter_exit_data_event_info.other_event.event_type = prof_info.event_type; + goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info, + &api_info); + + thr->prof_info = NULL; + thr->api_info = NULL; + } } static void @@ -534,9 +858,64 @@ GOACC_update (int flags_m, size_t mapnum, struct goacc_thread *thr = goacc_thread (); struct gomp_device_descr *acc_dev = thr->dev; + bool profiling_p = GOACC_PROFILING_DISPATCH_P (true); + + acc_prof_info prof_info; + if (profiling_p) + { + thr->prof_info = &prof_info; + + prof_info.event_type = acc_ev_update_start; + prof_info.valid_bytes = _ACC_PROF_INFO_VALID_BYTES; + prof_info.version = _ACC_PROF_INFO_VERSION; + prof_info.device_type = acc_device_type (acc_dev->type); + prof_info.device_number = acc_dev->target_id; + prof_info.thread_id = -1; + prof_info.async = async; + prof_info.async_queue = prof_info.async; + prof_info.src_file = NULL; + prof_info.func_name = NULL; + prof_info.line_no = -1; + prof_info.end_line_no = -1; + prof_info.func_line_no = -1; + prof_info.func_end_line_no = -1; + } + acc_event_info update_event_info; + if (profiling_p) + { + update_event_info.other_event.event_type + = prof_info.event_type; + update_event_info.other_event.valid_bytes + = _ACC_OTHER_EVENT_INFO_VALID_BYTES; + update_event_info.other_event.parent_construct = acc_construct_update; + update_event_info.other_event.implicit = 0; + update_event_info.other_event.tool_info = NULL; + } + acc_api_info api_info; + if (profiling_p) + { + thr->api_info = &api_info; + + api_info.device_api = acc_device_api_none; + api_info.valid_bytes = _ACC_API_INFO_VALID_BYTES; + api_info.device_type = prof_info.device_type; + api_info.vendor = -1; + api_info.device_handle = NULL; + api_info.context_handle = NULL; + api_info.async_handle = NULL; + } + + if (profiling_p) + goacc_profiling_dispatch (&prof_info, &update_event_info, &api_info); + if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) || (flags & GOACC_FLAG_HOST_FALLBACK)) - return; + { + prof_info.device_type = acc_device_host; + api_info.device_type = prof_info.device_type; + + goto out_prof; + } if (num_waits) { @@ -608,11 +987,38 @@ GOACC_update (int flags_m, size_t mapnum, break; } } + + out_prof: + if (profiling_p) + { + prof_info.event_type = acc_ev_update_end; + update_event_info.other_event.event_type = prof_info.event_type; + goacc_profiling_dispatch (&prof_info, &update_event_info, &api_info); + + thr->prof_info = NULL; + thr->api_info = NULL; + } } void GOACC_wait (int async, int num_waits, ...) { + goacc_lazy_initialize (); + + struct goacc_thread *thr = goacc_thread (); + + /* No nesting. */ + assert (thr->prof_info == NULL); + assert (thr->api_info == NULL); + acc_prof_info prof_info; + acc_api_info api_info; + bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info); + if (profiling_p) + { + prof_info.async = async; + prof_info.async_queue = prof_info.async; + } + if (num_waits) { va_list ap; @@ -625,6 +1031,12 @@ GOACC_wait (int async, int num_waits, ...) acc_wait_all (); else acc_wait_all_async (async); + + if (profiling_p) + { + thr->prof_info = NULL; + thr->api_info = NULL; + } } /* Legacy entry point (GCC 5). */ |