aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThomas Schwinge <thomas@codesourcery.com>2021-07-23 15:07:34 +0200
committerThomas Schwinge <thomas@codesourcery.com>2021-07-27 11:16:25 +0200
commit29ddaf43f70e19fd1110b539e8b3d0436c757e34 (patch)
tree658c817c49661752eebdfdcee96fbcaa3fe10f63
parent599e275d7e0b3fb79ff704d4cb2d8fdb0231116e (diff)
downloadgcc-29ddaf43f70e19fd1110b539e8b3d0436c757e34.zip
gcc-29ddaf43f70e19fd1110b539e8b3d0436c757e34.tar.gz
gcc-29ddaf43f70e19fd1110b539e8b3d0436c757e34.tar.bz2
[OpenACC] Clarify sequencing of 'async' data copying vs. profiling events in 'libgomp.oacc-c-c++-common/acc_prof-{init,parallel}-1.c'
... as noticed with GCN offloading. Fix-up for r271346 (commit 5fae049dc272144f8e61af94ee0ba42b270915e5) "OpenACC Profiling Interface (incomplete)". libgomp/ * testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c: Clarify sequencing of 'async' data copying vs. profiling events. * testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c: Likewise.
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c49
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c51
2 files changed, 68 insertions, 32 deletions
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c
index 7d05f48..b5e7715 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c
@@ -19,6 +19,19 @@
#define DEBUG_printf(...) //__builtin_printf (__VA_ARGS__)
+static acc_prof_reg reg;
+static acc_prof_reg unreg;
+static acc_prof_lookup_func lookup;
+void acc_register_library (acc_prof_reg reg_, acc_prof_reg unreg_, acc_prof_lookup_func lookup_)
+{
+ DEBUG_printf ("%s\n", __FUNCTION__);
+
+ reg = reg_;
+ unreg = unreg_;
+ lookup = lookup_;
+}
+
+
static int state = -1;
#define STATE_OP(state, op) \
@@ -34,7 +47,7 @@ static int state = -1;
static acc_device_t acc_device_type;
static int acc_device_num;
-static int acc_async;
+static int acc_async = acc_async_sync;
struct tool_info
@@ -192,6 +205,21 @@ static void cb_compute_construct_end (acc_prof_info *prof_info, acc_event_info *
assert (state == 11
|| state == 111);
+#if defined COPYIN
+ /* In an 'async' setting, this event may be triggered before actual 'async'
+ data copying has completed. Given that 'state' appears in 'COPYIN', we
+ first have to synchronize (that is, let the 'async' 'COPYIN' read the
+ current 'state' value)... */
+ if (acc_async != acc_async_sync)
+ {
+ /* "We're not yet accounting for the fact that _OpenACC events may occur
+ during event processing_"; temporarily disable to avoid deadlock. */
+ unreg (acc_ev_none, NULL, acc_toggle_per_thread);
+ acc_wait (acc_async);
+ reg (acc_ev_none, NULL, acc_toggle_per_thread);
+ }
+ /* ... before modifying it in the following. */
+#endif
STATE_OP (state, ++);
assert (tool_info != NULL);
@@ -240,19 +268,6 @@ static void cb_compute_construct_end (acc_prof_info *prof_info, acc_event_info *
}
-static acc_prof_reg reg;
-static acc_prof_reg unreg;
-static acc_prof_lookup_func lookup;
-void acc_register_library (acc_prof_reg reg_, acc_prof_reg unreg_, acc_prof_lookup_func lookup_)
-{
- DEBUG_printf ("%s\n", __FUNCTION__);
-
- reg = reg_;
- unreg = unreg_;
- lookup = lookup_;
-}
-
-
int main()
{
acc_register_library (acc_prof_register, acc_prof_unregister, acc_prof_lookup);
@@ -271,14 +286,15 @@ int main()
acc_device_type = acc_get_device_type ();
acc_device_num = acc_get_device_num (acc_device_type);
- acc_async = 12;
{
int state_init;
+ acc_async = 12;
#pragma acc parallel async(acc_async) COPYIN(state) copyout(state_init)
{
state_init = state;
}
+ acc_async = acc_async_sync;
#pragma acc wait
assert (state_init == 11);
}
@@ -297,14 +313,15 @@ int main()
acc_device_type = acc_get_device_type ();
acc_device_num = acc_get_device_num (acc_device_type);
- acc_async = 12;
{
int state_init;
+ acc_async = 12;
#pragma acc parallel async(acc_async) COPYIN(state) copyout(state_init)
{
state_init = state;
}
+ acc_async = acc_async_sync;
#pragma acc wait
assert (state_init == 111);
}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c
index a5e9ab3..1f50386 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c
@@ -29,6 +29,19 @@
#define DEBUG_printf(...) //__builtin_printf (__VA_ARGS__)
+static acc_prof_reg reg;
+static acc_prof_reg unreg;
+static acc_prof_lookup_func lookup;
+void acc_register_library (acc_prof_reg reg_, acc_prof_reg unreg_, acc_prof_lookup_func lookup_)
+{
+ DEBUG_printf ("%s\n", __FUNCTION__);
+
+ reg = reg_;
+ unreg = unreg_;
+ lookup = lookup_;
+}
+
+
static int state = -1;
#define STATE_OP(state, op) \
@@ -44,7 +57,7 @@ static int state = -1;
static acc_device_t acc_device_type;
static int acc_device_num;
-static int acc_async;
+static int acc_async = acc_async_sync;
struct tool_info
@@ -235,6 +248,25 @@ static void cb_enter_data_end (acc_prof_info *prof_info, acc_event_info *event_i
assert (state == 4
|| state == 104);
+#if defined COPYIN
+ /* Conceptually, 'acc_ev_enter_data_end' marks the end of data copying,
+ before 'acc_ev_enqueue_launch_start' marks invoking the compute region.
+ That's the 'state_init = state;' intended to be captured in the compute
+ regions. */
+ /* In an 'async' setting, this event may be triggered before actual 'async'
+ data copying has completed. Given that 'state' appears in 'COPYIN', we
+ first have to synchronize (that is, let the 'async' 'COPYIN' read the
+ current 'state' value)... */
+ if (acc_async != acc_async_sync)
+ {
+ /* "We're not yet accounting for the fact that _OpenACC events may occur
+ during event processing_"; temporarily disable to avoid deadlock. */
+ unreg (acc_ev_none, NULL, acc_toggle_per_thread);
+ acc_wait (acc_async);
+ reg (acc_ev_none, NULL, acc_toggle_per_thread);
+ }
+ /* ... before modifying it in the following. */
+#endif
STATE_OP (state, ++);
assert (tool_info != NULL);
@@ -664,19 +696,6 @@ static void cb_enqueue_launch_end (acc_prof_info *prof_info, acc_event_info *eve
}
-static acc_prof_reg reg;
-static acc_prof_reg unreg;
-static acc_prof_lookup_func lookup;
-void acc_register_library (acc_prof_reg reg_, acc_prof_reg unreg_, acc_prof_lookup_func lookup_)
-{
- DEBUG_printf ("%s\n", __FUNCTION__);
-
- reg = reg_;
- unreg = unreg_;
- lookup = lookup_;
-}
-
-
int main()
{
acc_register_library (acc_prof_register, acc_prof_unregister, acc_prof_lookup);
@@ -696,7 +715,6 @@ int main()
acc_device_type = acc_get_device_type ();
acc_device_num = acc_get_device_num (acc_device_type);
- acc_async = acc_async_sync;
assert (state == 0);
{
@@ -713,15 +731,16 @@ int main()
STATE_OP (state, = 100);
- acc_async = 12;
{
int state_init;
+ acc_async = 12;
#pragma acc parallel async(acc_async) COPYIN(state) copyout(state_init)
{
asm volatile ("" : : : "memory"); // TODO PR90488
state_init = state;
}
+ acc_async = acc_async_sync;
#pragma acc wait
assert (state_init == 104);
}