aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThomas Schwinge <thomas@codesourcery.com>2019-06-21 10:40:38 -0700
committerSandra Loosemore <sloosemore@baylibre.com>2025-05-15 20:25:45 +0000
commit63caf6bc2f914518dbfcd242164f5e990982bdf9 (patch)
tree202ef2c9c7ae93980513f450a449c6c837db5dff
parent4566c9843f93d28f50dbe500b1b649a7731d1cd1 (diff)
downloadgcc-63caf6bc2f914518dbfcd242164f5e990982bdf9.zip
gcc-63caf6bc2f914518dbfcd242164f5e990982bdf9.tar.gz
gcc-63caf6bc2f914518dbfcd242164f5e990982bdf9.tar.bz2
Add changes to profiling interface from OG8 branch
This bundles up the parts of the profiling code from the OG8 branch that were not included in the upstream patch. libgomp/ChangeLog * Makefile.am (libgomp_la_SOURCES): Add oacc-profiling-acc_register_library.c. * Makefile.in: Regenerate. * libgomp.texi: Remove paragraph about acc_register_library. * oacc-init.c (get_property_any): Add profiling code. * oacc-parallel.c (GOACC_parallel_keyed_internal): Set device_api for profiling. * oacc-profiling-acc_register_library.c: New file. * oacc-profiling.c (goacc_profiling_initialize): Call acc_register_library. Avoid duplicate registration. (acc_register_library): Remove. * config/nvptx/oacc-profiling-acc_register_library.c: New empty file. * config/nvptx/oacc-profiling.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-dispatch-1.c: Remove call to acc_register_library. * testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-valid_bytes-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/acc_prof-version-1.c: Likewise. Co-Authored-By: Maciej W. Rozycki <macro@codesourcery.com>
-rw-r--r--libgomp/Makefile.am2
-rw-r--r--libgomp/Makefile.in7
-rw-r--r--libgomp/config/nvptx/oacc-profiling-acc_register_library.c0
-rw-r--r--libgomp/config/nvptx/oacc-profiling.c0
-rw-r--r--libgomp/libgomp.texi8
-rw-r--r--libgomp/oacc-init.c21
-rw-r--r--libgomp/oacc-parallel.c2
-rw-r--r--libgomp/oacc-profiling-acc_register_library.c39
-rw-r--r--libgomp/oacc-profiling.c32
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-dispatch-1.c2
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c2
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c19
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c2
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-valid_bytes-1.c2
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-version-1.c2
15 files changed, 100 insertions, 40 deletions
diff --git a/libgomp/Makefile.am b/libgomp/Makefile.am
index e3202ae..a2e531b 100644
--- a/libgomp/Makefile.am
+++ b/libgomp/Makefile.am
@@ -70,7 +70,7 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c error.c \
target.c splay-tree.c libgomp-plugin.c oacc-parallel.c oacc-host.c \
oacc-init.c oacc-mem.c oacc-async.c oacc-plugin.c oacc-cuda.c \
priority_queue.c affinity-fmt.c teams.c allocator.c oacc-profiling.c \
- oacc-target.c target-indirect.c
+ oacc-target.c target-indirect.c oacc-profiling-acc_register_library.c
include $(top_srcdir)/plugin/Makefrag.am
diff --git a/libgomp/Makefile.in b/libgomp/Makefile.in
index 2a0a842..b4a65a7 100644
--- a/libgomp/Makefile.in
+++ b/libgomp/Makefile.in
@@ -219,7 +219,8 @@ am_libgomp_la_OBJECTS = alloc.lo atomic.lo barrier.lo critical.lo \
oacc-parallel.lo oacc-host.lo oacc-init.lo oacc-mem.lo \
oacc-async.lo oacc-plugin.lo oacc-cuda.lo priority_queue.lo \
affinity-fmt.lo teams.lo allocator.lo oacc-profiling.lo \
- oacc-target.lo target-indirect.lo $(am__objects_1)
+ oacc-target.lo oacc-profiling-acc_register_library.lo \
+ target-indirect.lo $(am__objects_1)
libgomp_la_OBJECTS = $(am_libgomp_la_OBJECTS)
AM_V_P = $(am__v_P_@AM_V@)
am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
@@ -552,7 +553,8 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c \
oacc-parallel.c oacc-host.c oacc-init.c oacc-mem.c \
oacc-async.c oacc-plugin.c oacc-cuda.c priority_queue.c \
affinity-fmt.c teams.c allocator.c oacc-profiling.c \
- oacc-target.c target-indirect.c $(am__append_3)
+ oacc-target.c oacc-profiling-acc_register_library.c \
+ target-indirect.c $(am__append_3)
# Nvidia PTX OpenACC plugin.
@PLUGIN_NVPTX_TRUE@libgomp_plugin_nvptx_version_info = -version-info $(libtool_VERSION)
@@ -768,6 +770,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-mem.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-parallel.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-plugin.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-profiling-acc_register_library.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-profiling.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/oacc-target.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ordered.Plo@am__quote@
diff --git a/libgomp/config/nvptx/oacc-profiling-acc_register_library.c b/libgomp/config/nvptx/oacc-profiling-acc_register_library.c
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/libgomp/config/nvptx/oacc-profiling-acc_register_library.c
diff --git a/libgomp/config/nvptx/oacc-profiling.c b/libgomp/config/nvptx/oacc-profiling.c
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/libgomp/config/nvptx/oacc-profiling.c
diff --git a/libgomp/libgomp.texi b/libgomp/libgomp.texi
index 6909c2b..d7d42d0 100644
--- a/libgomp/libgomp.texi
+++ b/libgomp/libgomp.texi
@@ -6377,14 +6377,6 @@ We just handle one case specially, as required by CUDA 9.0
@code{acc_ev_device_init_start}, @code{acc_ev_device_init_end}
callbacks.
-We're not yet implementing initialization via a
-@code{acc_register_library} function that is either statically linked
-in, or dynamically via @env{LD_PRELOAD}.
-Initialization via @code{acc_register_library} functions dynamically
-loaded via the @env{ACC_PROFLIB} environment variable does work, as
-does directly calling @code{acc_prof_register},
-@code{acc_prof_unregister}, @code{acc_prof_lookup}.
-
As currently there are no inquiry functions defined, calls to
@code{acc_prof_lookup} always returns @code{NULL}.
diff --git a/libgomp/oacc-init.c b/libgomp/oacc-init.c
index 3856f85..5fb1bb8 100644
--- a/libgomp/oacc-init.c
+++ b/libgomp/oacc-init.c
@@ -810,6 +810,16 @@ get_property_any (int ord, acc_device_t d, acc_device_property_t prop)
if (d == acc_device_current && thr && thr->dev)
return thr->dev->openacc.get_property_func (thr->dev->target_id, prop);
+ acc_prof_info prof_info;
+ acc_api_info api_info;
+ bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
+
+ if (profiling_p)
+ {
+ prof_info.device_type = d;
+ prof_info.device_number = ord;
+ }
+
gomp_mutex_lock (&acc_device_lock);
struct gomp_device_descr *dev = resolve_device (d, true);
@@ -830,7 +840,16 @@ get_property_any (int ord, acc_device_t d, acc_device_property_t prop)
assert (dev);
- return dev->openacc.get_property_func (dev->target_id, prop);
+ union goacc_property_value propval =
+ dev->openacc.get_property_func (dev->target_id, prop);
+
+ if (profiling_p)
+ {
+ thr->prof_info = NULL;
+ thr->api_info = NULL;
+ }
+
+ return propval;
}
size_t
diff --git a/libgomp/oacc-parallel.c b/libgomp/oacc-parallel.c
index a873830..a1fb11b 100644
--- a/libgomp/oacc-parallel.c
+++ b/libgomp/oacc-parallel.c
@@ -367,6 +367,8 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *),
fn (hostaddrs);
goto out_prof;
}
+ else if (profiling_p)
+ api_info.device_api = acc_device_api_cuda;
/* Default: let the runtime choose. */
for (i = 0; i != GOMP_DIM_MAX; i++)
diff --git a/libgomp/oacc-profiling-acc_register_library.c b/libgomp/oacc-profiling-acc_register_library.c
new file mode 100644
index 0000000..f6b482b
--- /dev/null
+++ b/libgomp/oacc-profiling-acc_register_library.c
@@ -0,0 +1,39 @@
+/* Copyright (C) 2017 Free Software Foundation, Inc.
+
+ Contributed by Mentor Embedded.
+
+ This file is part of the GNU Offloading and Multi Processing Library
+ (libgomp).
+
+ Libgomp is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* This file provides an stub acc_register_library function. It's in a
+ separate file so that this function can easily be overridden when linking
+ statically. */
+
+#include "libgomp.h"
+#include "acc_prof.h"
+
+void
+acc_register_library (acc_prof_reg reg, acc_prof_reg unreg,
+ acc_prof_lookup_func lookup)
+{
+ gomp_debug (0, "dummy %s\n", __FUNCTION__);
+}
diff --git a/libgomp/oacc-profiling.c b/libgomp/oacc-profiling.c
index f98cc0a..d6cc9ce 100644
--- a/libgomp/oacc-profiling.c
+++ b/libgomp/oacc-profiling.c
@@ -104,7 +104,12 @@ goacc_profiling_initialize (void)
for (int i = 0; i < acc_ev_last; ++i)
goacc_prof_callbacks_enabled[i] = true;
-
+ /* We are to invoke an external acc_register_library routine, defaulting to
+ our stub oacc-profiling-acc_register_library.c:acc_register_library
+ implementation. */
+ gomp_debug (0, "%s: calling acc_register_library\n", __FUNCTION__);
+ //TODO.
+ acc_register_library (acc_prof_register, acc_prof_unregister, NULL);
#ifdef PLUGIN_SUPPORT
char *acc_proflibs = secure_getenv ("ACC_PROFLIB");
while (acc_proflibs != NULL && acc_proflibs[0] != '\0')
@@ -141,10 +146,20 @@ goacc_profiling_initialize (void)
= dlsym (dl_handle, "acc_register_library");
if (a_r_l == NULL)
goto dl_fail;
- gomp_debug (0, " %s: calling %s:acc_register_library\n",
- __FUNCTION__, acc_proflib);
- a_r_l (acc_prof_register, acc_prof_unregister,
- acc_prof_lookup);
+ /* Avoid duplicate registration, for example if the same shared
+ library is specified in LD_PRELOAD and ACC_PROFLIB -- which
+ TAU 2.26 does when using "tau_exec -openacc". */
+ if (a_r_l != acc_register_library)
+ {
+ gomp_debug (0, " %s: calling %s:acc_register_library\n",
+ __FUNCTION__, acc_proflib);
+ //TODO.
+ a_r_l (acc_prof_register, acc_prof_unregister, NULL);
+ }
+ else
+ gomp_debug (0, " %s: skipping duplicate"
+ " %s:acc_register_library\n",
+ __FUNCTION__, acc_proflib);
}
else
{
@@ -487,13 +502,6 @@ acc_prof_lookup (const char *name)
return NULL;
}
-void
-acc_register_library (acc_prof_reg reg, acc_prof_reg unreg,
- acc_prof_lookup_func lookup)
-{
- gomp_fatal ("TODO");
-}
-
/* Prepare to dispatch events? */
bool
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-dispatch-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-dispatch-1.c
index d929bfd..a9a8c74 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-dispatch-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-dispatch-1.c
@@ -114,8 +114,6 @@ void acc_register_library (acc_prof_reg reg_, acc_prof_reg unreg_, acc_prof_look
int main()
{
- acc_register_library (acc_prof_register, acc_prof_unregister, acc_prof_lookup);
-
STATE_OP (state, = 0);
reg (acc_ev_compute_construct_start, cb_compute_construct_start_1, acc_reg);
reg (acc_ev_compute_construct_start, cb_compute_construct_start_1, acc_reg);
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c
index b5e7715..91b3732 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c
@@ -270,8 +270,6 @@ static void cb_compute_construct_end (acc_prof_info *prof_info, acc_event_info *
int main()
{
- acc_register_library (acc_prof_register, acc_prof_unregister, acc_prof_lookup);
-
STATE_OP (state, = 0);
reg (acc_ev_device_init_start, cb_device_init_start, acc_reg);
reg (acc_ev_device_init_end, cb_device_init_end, acc_reg);
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c
index 2c85397..2cd2c98 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-kernels-1.c
@@ -59,6 +59,7 @@ static int state = -1;
static acc_device_t acc_device_type;
static int acc_device_num;
static int num_gangs, num_workers, vector_length;
+static int async;
static void cb_enqueue_launch_start (acc_prof_info *prof_info, acc_event_info *event_info, acc_api_info *api_info)
@@ -76,7 +77,7 @@ static void cb_enqueue_launch_start (acc_prof_info *prof_info, acc_event_info *e
assert (prof_info->device_type == acc_device_type);
assert (prof_info->device_number == acc_device_num);
assert (prof_info->thread_id == -1);
- assert (prof_info->async == acc_async_noval);
+ assert (prof_info->async == async);
assert (prof_info->async_queue == prof_info->async);
assert (prof_info->src_file == NULL);
assert (prof_info->func_name == NULL);
@@ -166,8 +167,6 @@ void acc_register_library (acc_prof_reg reg_, acc_prof_reg unreg_, acc_prof_look
int main()
{
- acc_register_library (acc_prof_register, acc_prof_unregister, acc_prof_lookup);
-
STATE_OP (state, = 0);
reg (acc_ev_enqueue_launch_start, cb_enqueue_launch_start, acc_reg);
assert (state == 0);
@@ -176,8 +175,10 @@ int main()
acc_device_num = acc_get_device_num (acc_device_type);
assert (state == 0);
- /* Parallelism dimensions: compiler/runtime decides. */
STATE_OP (state, = 0);
+ /* Implicit async. */
+ async = acc_async_noval;
+ /* Parallelism dimensions: compiler/runtime decides. */
num_gangs = num_workers = vector_length = 0;
{
#define N 100
@@ -203,8 +204,10 @@ int main()
#undef N
}
- /* Parallelism dimensions: literal. */
STATE_OP (state, = 0);
+ /* Explicit async: without argument. */
+ async = acc_async_noval;
+ /* Parallelism dimensions: literal. */
num_gangs = 30;
num_workers = 3;
vector_length = 5;
@@ -212,6 +215,7 @@ int main()
#define N 100
int x[N];
#pragma acc kernels /* { dg-line l_compute[incr c_compute] } */ \
+ async \
num_gangs (30) num_workers (3) vector_length (5)
/* { dg-note {OpenACC 'kernels' decomposition: variable 'i' declared in block requested to be made addressable} {} { target *-*-* } l_compute$c_compute }
{ dg-note {variable 'i' made addressable} {} { target *-*-* } l_compute$c_compute } */
@@ -234,8 +238,10 @@ int main()
#undef N
}
- /* Parallelism dimensions: variable. */
STATE_OP (state, = 0);
+ /* Explicit async: variable. */
+ async = 123;
+ /* Parallelism dimensions: variable. */
num_gangs = 22;
num_workers = 5;
vector_length = 7;
@@ -243,6 +249,7 @@ int main()
#define N 100
int x[N];
#pragma acc kernels /* { dg-line l_compute[incr c_compute] } */ \
+ async (async) \
num_gangs (num_gangs) num_workers (num_workers) vector_length (vector_length)
/* { dg-note {OpenACC 'kernels' decomposition: variable 'i' declared in block requested to be made addressable} {} { target *-*-* } l_compute$c_compute }
{ dg-note {variable 'i' made addressable} {} { target *-*-* } l_compute$c_compute } */
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c
index 9b4493d..27f86d3 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c
@@ -830,8 +830,6 @@ static void cb_enqueue_launch_end (acc_prof_info *prof_info, acc_event_info *eve
int main()
{
- acc_register_library (acc_prof_register, acc_prof_unregister, acc_prof_lookup);
-
STATE_OP (state, = 0);
reg (acc_ev_device_init_start, cb_device_init_start, acc_reg);
reg (acc_ev_device_init_end, cb_device_init_end, acc_reg);
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-valid_bytes-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-valid_bytes-1.c
index 5b58c51..a723ad9 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-valid_bytes-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-valid_bytes-1.c
@@ -143,8 +143,6 @@ typedef struct E
int main()
{
- acc_register_library (acc_prof_register, acc_prof_unregister, acc_prof_lookup);
-
A A1;
DEBUG_printf ("s=%zd, vb=%zd\n", sizeof A1, VALID_BYTES_A);
assert (VALID_BYTES_A <= sizeof A1);
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-version-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-version-1.c
index f537868..0f9e956 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-version-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-version-1.c
@@ -56,8 +56,6 @@ void acc_register_library (acc_prof_reg reg_, acc_prof_reg unreg_, acc_prof_look
int main()
{
- acc_register_library (acc_prof_register, acc_prof_unregister, acc_prof_lookup);
-
ev_count = 0;
/* Trigger tests done in 'cb_*' functions. */