From 5fae049dc272144f8e61af94ee0ba42b270915e5 Mon Sep 17 00:00:00 2001 From: Thomas Schwinge Date: Fri, 17 May 2019 21:13:36 +0200 Subject: OpenACC Profiling Interface (incomplete) libgomp/ * acc_prof.h: New file. * oacc-profiling.c: Likewise. * Makefile.am (nodist_libsubinclude_HEADERS, libgomp_la_SOURCES): Add these, respectively. * Makefile.in: Regenerate. * env.c (initialize_env): Call goacc_profiling_initialize. * oacc-plugin.c (GOMP_PLUGIN_goacc_thread) (GOMP_PLUGIN_goacc_profiling_dispatch): New functions. * oacc-plugin.h (GOMP_PLUGIN_goacc_thread) (GOMP_PLUGIN_goacc_profiling_dispatch): Declare. * libgomp.map (OACC_2.5.1): Add acc_prof_lookup, acc_prof_register, acc_prof_unregister, and acc_register_library. (GOMP_PLUGIN_1.3): Add GOMP_PLUGIN_goacc_profiling_dispatch, and GOMP_PLUGIN_goacc_thread. * oacc-int.h (struct goacc_thread): Add prof_info, api_info, prof_callbacks_enabled members. (goacc_prof_enabled, goacc_profiling_initialize) (_goacc_profiling_dispatch_p, _goacc_profiling_setup_p) (goacc_profiling_dispatch): Declare. (GOACC_PROF_ENABLED, GOACC_PROFILING_DISPATCH_P) (GOACC_PROFILING_SETUP_P): Define. * oacc-async.c (acc_async_test, acc_async_test_all, acc_wait) (acc_wait_async, acc_wait_all, acc_wait_all_async): Update for OpenACC Profiling Interface. * oacc-cuda.c (acc_get_current_cuda_device) (acc_get_current_cuda_context, acc_get_cuda_stream) (acc_set_cuda_stream): Likewise. * oacc-init.c (acc_init_1, goacc_attach_host_thread_to_device) (acc_init, acc_set_device_type, acc_get_device_type) (acc_get_device_num, goacc_lazy_initialize): Likewise. * oacc-mem.c (acc_malloc, acc_free, memcpy_tofrom_device) (acc_deviceptr, acc_hostptr, acc_is_present, acc_map_data) (acc_unmap_data, present_create_copy, delete_copyout) (update_dev_host): Likewise. * oacc-parallel.c (GOACC_parallel_keyed, GOACC_data_start) (GOACC_data_end, GOACC_enter_exit_data, GOACC_update, GOACC_wait): Likewise. * plugin/plugin-nvptx.c (nvptx_exec, nvptx_alloc, nvptx_free) (GOMP_OFFLOAD_openacc_exec, GOMP_OFFLOAD_openacc_async_exec): Likewise. * libgomp.texi: Update. * testsuite/libgomp.oacc-c-c++-common/acc_prof-dispatch-1.c: New file. * testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-valid_bytes-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-version-1.c: Likewise. From-SVN: r271346 --- libgomp/oacc-mem.c | 113 ++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 112 insertions(+), 1 deletion(-) (limited to 'libgomp/oacc-mem.c') diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c index 03df0d4..0f3832f 100644 --- a/libgomp/oacc-mem.c +++ b/libgomp/oacc-mem.c @@ -108,7 +108,19 @@ acc_malloc (size_t s) if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) return malloc (s); - return thr->dev->alloc_func (thr->dev->target_id, s); + acc_prof_info prof_info; + acc_api_info api_info; + bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info); + + void *res = thr->dev->alloc_func (thr->dev->target_id, s); + + if (profiling_p) + { + thr->prof_info = NULL; + thr->api_info = NULL; + } + + return res; } /* OpenACC 2.0a (3.2.16) doesn't specify what to do in the event @@ -131,6 +143,10 @@ acc_free (void *d) if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) return free (d); + acc_prof_info prof_info; + acc_api_info api_info; + bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info); + gomp_mutex_lock (&acc_dev->lock); /* We don't have to call lazy open here, as the ptr value must have @@ -151,6 +167,12 @@ acc_free (void *d) if (!acc_dev->free_func (acc_dev->target_id, d)) gomp_fatal ("error in freeing device memory in %s", __FUNCTION__); + + if (profiling_p) + { + thr->prof_info = NULL; + thr->api_info = NULL; + } } static void @@ -172,11 +194,26 @@ memcpy_tofrom_device (bool from, void *d, void *h, size_t s, int async, return; } + acc_prof_info prof_info; + acc_api_info api_info; + bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info); + if (profiling_p) + { + prof_info.async = async; + prof_info.async_queue = prof_info.async; + } + goacc_aq aq = get_goacc_asyncqueue (async); if (from) gomp_copy_dev2host (thr->dev, aq, h, d, s); else gomp_copy_host2dev (thr->dev, aq, d, h, s, /* TODO: cbuf? */ NULL); + + if (profiling_p) + { + thr->prof_info = NULL; + thr->api_info = NULL; + } } void @@ -221,6 +258,9 @@ acc_deviceptr (void *h) if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) return h; + /* In the following, no OpenACC Profiling Interface events can possibly be + generated. */ + gomp_mutex_lock (&dev->lock); n = lookup_host (dev, h, 1); @@ -258,6 +298,9 @@ acc_hostptr (void *d) if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) return d; + /* In the following, no OpenACC Profiling Interface events can possibly be + generated. */ + gomp_mutex_lock (&acc_dev->lock); n = lookup_dev (acc_dev->openacc.data_environ, d, 1); @@ -295,6 +338,9 @@ acc_is_present (void *h, size_t s) if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) return h != NULL; + /* In the following, no OpenACC Profiling Interface events can possibly be + generated. */ + gomp_mutex_lock (&acc_dev->lock); n = lookup_host (acc_dev, h, s); @@ -339,6 +385,10 @@ acc_map_data (void *h, void *d, size_t s) gomp_fatal ("[%p,+%d]->[%p,+%d] is a bad map", (void *)h, (int)s, (void *)d, (int)s); + acc_prof_info prof_info; + acc_api_info api_info; + bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info); + gomp_mutex_lock (&acc_dev->lock); if (lookup_host (acc_dev, h, s)) @@ -360,6 +410,12 @@ acc_map_data (void *h, void *d, size_t s) tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, &devaddrs, &sizes, &kinds, true, GOMP_MAP_VARS_OPENACC); tgt->list[0].key->refcount = REFCOUNT_INFINITY; + + if (profiling_p) + { + thr->prof_info = NULL; + thr->api_info = NULL; + } } gomp_mutex_lock (&acc_dev->lock); @@ -380,6 +436,10 @@ acc_unmap_data (void *h) if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) return; + acc_prof_info prof_info; + acc_api_info api_info; + bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info); + size_t host_size; gomp_mutex_lock (&acc_dev->lock); @@ -433,6 +493,12 @@ acc_unmap_data (void *h) gomp_mutex_unlock (&acc_dev->lock); gomp_unmap_vars (t, true); + + if (profiling_p) + { + thr->prof_info = NULL; + thr->api_info = NULL; + } } #define FLAG_PRESENT (1 << 0) @@ -456,6 +522,15 @@ present_create_copy (unsigned f, void *h, size_t s, int async) if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) return h; + acc_prof_info prof_info; + acc_api_info api_info; + bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info); + if (profiling_p) + { + prof_info.async = async; + prof_info.async_queue = prof_info.async; + } + gomp_mutex_lock (&acc_dev->lock); n = lookup_host (acc_dev, h, s); @@ -518,6 +593,12 @@ present_create_copy (unsigned f, void *h, size_t s, int async) gomp_mutex_unlock (&acc_dev->lock); } + if (profiling_p) + { + thr->prof_info = NULL; + thr->api_info = NULL; + } + return d; } @@ -599,6 +680,15 @@ delete_copyout (unsigned f, void *h, size_t s, int async, const char *libfnname) if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) return; + acc_prof_info prof_info; + acc_api_info api_info; + bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info); + if (profiling_p) + { + prof_info.async = async; + prof_info.async_queue = prof_info.async; + } + gomp_mutex_lock (&acc_dev->lock); n = lookup_host (acc_dev, h, s); @@ -672,6 +762,12 @@ delete_copyout (unsigned f, void *h, size_t s, int async, const char *libfnname) } gomp_mutex_unlock (&acc_dev->lock); + + if (profiling_p) + { + thr->prof_info = NULL; + thr->api_info = NULL; + } } void @@ -737,6 +833,15 @@ update_dev_host (int is_dev, void *h, size_t s, int async) if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) return; + acc_prof_info prof_info; + acc_api_info api_info; + bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info); + if (profiling_p) + { + prof_info.async = async; + prof_info.async_queue = prof_info.async; + } + gomp_mutex_lock (&acc_dev->lock); n = lookup_host (acc_dev, h, s); @@ -758,6 +863,12 @@ update_dev_host (int is_dev, void *h, size_t s, int async) gomp_copy_dev2host (acc_dev, aq, h, d, s); gomp_mutex_unlock (&acc_dev->lock); + + if (profiling_p) + { + thr->prof_info = NULL; + thr->api_info = NULL; + } } void -- cgit v1.1