diff options
author | Julian Brown <julian@codesourcery.com> | 2019-09-10 20:34:45 -0700 |
---|---|---|
committer | Thomas Schwinge <thomas@codesourcery.com> | 2020-03-03 12:51:25 +0100 |
commit | 3e9e8cccbbbf64fa929d06e5629680fb2bb41b9a (patch) | |
tree | 16fc5e1cec3eae96f56ee2fbde3c2bfb0dff2abd | |
parent | 28378bcabf080562ade1f97e2f78f93d05df3077 (diff) | |
download | gcc-3e9e8cccbbbf64fa929d06e5629680fb2bb41b9a.zip gcc-3e9e8cccbbbf64fa929d06e5629680fb2bb41b9a.tar.gz gcc-3e9e8cccbbbf64fa929d06e5629680fb2bb41b9a.tar.bz2 |
[og9] OpenACC profiling-interface fixes for asynchronous operations
libgomp/
* oacc-host.c (host_openacc_async_queue_callback): Invoke callback
function immediately.
* oacc-parallel.c (struct async_prof_callback_info, async_prof_dispatch,
queue_async_prof_dispatch): New.
(GOACC_parallel_keyed): Call queue_async_prof_dispatch for asynchronous
profile-event dispatches.
(GOACC_enter_exit_data): Likewise.
(GOACC_update): Likewise.
* testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c
(cb_compute_construct_start): Remove/fix TODO.
* testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c
(cb_exit_data_start): Tweak expected state values.
(cb_exit_data_end): Likewise.
(cb_compute_construct_start): Remove/fix TODO.
(cb_compute_construct_end): Don't do adjustments for
acc_ev_enqueue_launch_start/acc_ev_enqueue_launch_end callbacks.
(cb_compute_construct_end): Tweak expected state values.
(cb_enqueue_launch_start, cb_enqueue_launch_end): Don't expect
launch-enqueue operations to happen synchronously with respect to
profiling events on async streams.
(main): Tweak expected state values.
* testsuite/libgomp.oacc-c-c++-common/lib-94.c (main): Reorder
operations for async-safety.
(cherry picked from openacc-gcc-9-branch commit
9bd8ebbc1a1e7a3263571105b3c1237080e9eea7)
-rw-r--r-- | libgomp/ChangeLog.omp | 26 | ||||
-rw-r--r-- | libgomp/oacc-host.c | 5 | ||||
-rw-r--r-- | libgomp/oacc-parallel.c | 181 | ||||
-rw-r--r-- | libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c | 5 | ||||
-rw-r--r-- | libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c | 64 | ||||
-rw-r--r-- | libgomp/testsuite/libgomp.oacc-c-c++-common/lib-94.c | 4 |
6 files changed, 197 insertions, 88 deletions
diff --git a/libgomp/ChangeLog.omp b/libgomp/ChangeLog.omp index 41e05e9..5f39fae 100644 --- a/libgomp/ChangeLog.omp +++ b/libgomp/ChangeLog.omp @@ -1,5 +1,31 @@ 2019-09-17 Julian Brown <julian@codesourcery.com> + * oacc-host.c (host_openacc_async_queue_callback): Invoke callback + function immediately. + * oacc-parallel.c (struct async_prof_callback_info, async_prof_dispatch, + queue_async_prof_dispatch): New. + (GOACC_parallel_keyed): Call queue_async_prof_dispatch for asynchronous + profile-event dispatches. + (GOACC_enter_exit_data): Likewise. + (GOACC_update): Likewise. + * testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c + (cb_compute_construct_start): Remove/fix TODO. + * testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c + (cb_exit_data_start): Tweak expected state values. + (cb_exit_data_end): Likewise. + (cb_compute_construct_start): Remove/fix TODO. + (cb_compute_construct_end): Don't do adjustments for + acc_ev_enqueue_launch_start/acc_ev_enqueue_launch_end callbacks. + (cb_compute_construct_end): Tweak expected state values. + (cb_enqueue_launch_start, cb_enqueue_launch_end): Don't expect + launch-enqueue operations to happen synchronously with respect to + profiling events on async streams. + (main): Tweak expected state values. + * testsuite/libgomp.oacc-c-c++-common/lib-94.c (main): Reorder + operations for async-safety. + +2019-09-17 Julian Brown <julian@codesourcery.com> + * target.c (gomp_map_vars_internal): Remove read of uninitialised data. diff --git a/libgomp/oacc-host.c b/libgomp/oacc-host.c index 21f7330..0231b59 100644 --- a/libgomp/oacc-host.c +++ b/libgomp/oacc-host.c @@ -250,10 +250,9 @@ host_openacc_async_dev2host (int ord __attribute__ ((unused)), static void host_openacc_async_queue_callback (struct goacc_asyncqueue *aq __attribute__ ((unused)), - void (*callback_fn)(void *) - __attribute__ ((unused)), - void *userptr __attribute__ ((unused))) + void (*callback_fn)(void *), void *userptr) { + callback_fn (userptr); } static struct goacc_asyncqueue * diff --git a/libgomp/oacc-parallel.c b/libgomp/oacc-parallel.c index 1bd0775..0c9cb3c 100644 --- a/libgomp/oacc-parallel.c +++ b/libgomp/oacc-parallel.c @@ -169,6 +169,62 @@ goacc_call_host_fn (void (*fn) (void *), size_t mapnum, void **hostaddrs, fn (hostaddrs); } +struct async_prof_callback_info { + acc_prof_info prof_info; + acc_event_info event_info; + acc_api_info api_info; + struct async_prof_callback_info *start_info; +}; + +static void +async_prof_dispatch (void *ptr) +{ + struct async_prof_callback_info *info + = (struct async_prof_callback_info *) ptr; + + if (info->start_info) + { + /* The TOOL_INFO must be preserved from a start event to the + corresponding end event. Copy that here. */ + void *tool_info = info->start_info->event_info.other_event.tool_info; + info->event_info.other_event.tool_info = tool_info; + } + + goacc_profiling_dispatch (&info->prof_info, &info->event_info, + &info->api_info); + + /* The async_prof_dispatch function is (so far) always used for start/end + profiling event pairs: the start and end parts are queued, then each is + dispatched (or the dispatches might be interleaved before the end part is + queued). + In any case, it's not safe to delete either info structure before the + whole bracketed event is complete. */ + + if (info->start_info) + { + free (info->start_info); + free (info); + } +} + +static struct async_prof_callback_info * +queue_async_prof_dispatch (struct gomp_device_descr *devicep, goacc_aq aq, + acc_prof_info *prof_info, acc_event_info *event_info, + acc_api_info *api_info, + struct async_prof_callback_info *prev_info) +{ + struct async_prof_callback_info *info = malloc (sizeof (*info)); + + info->prof_info = *prof_info; + info->event_info = *event_info; + info->api_info = *api_info; + info->start_info = prev_info; + + devicep->openacc.async.queue_callback_func (aq, async_prof_dispatch, + (void *) info); + return info; +} + /* Launch a possibly offloaded function with FLAGS. FN is the host fn address. MAPNUM, HOSTADDRS, SIZES & KINDS describe the memory blocks to be copied to/from the device. Varadic arguments are @@ -194,6 +250,8 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *), size_t mapnum, unsigned dims[GOMP_DIM_MAX]; unsigned tag; bool args_exploded = false; + struct async_prof_callback_info *comp_start_info = NULL, + *data_start_info = NULL; #ifdef HAVE_INTTYPES_H gomp_debug (0, "%s: mapnum=%"PRIu64", hostaddrs=%p, size=%p, kinds=%p\n", @@ -255,10 +313,6 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *), size_t mapnum, api_info.async_handle = NULL; } - if (profiling_p) - goacc_profiling_dispatch (&prof_info, &compute_construct_event_info, - &api_info); - handle_ftn_pointers (mapnum, hostaddrs, sizes, kinds); /* Default: let the runtime choose. */ @@ -294,11 +348,12 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *), size_t mapnum, if (async == GOMP_LAUNCH_OP_MAX) async = va_arg (ap, unsigned); - if (profiling_p) - { - prof_info.async = async; - prof_info.async_queue = prof_info.async; - } + /* Set async number in profiling data, unless the device is the + host or we're doing host fallback. */ + if (profiling_p + && !(flags & GOACC_FLAG_HOST_FALLBACK) + && acc_device_type (acc_dev->type) != acc_device_host) + prof_info.async = prof_info.async_queue = async; break; } @@ -321,6 +376,20 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *), size_t mapnum, } va_end (ap); + goacc_aq aq = get_goacc_asyncqueue (async); + + if (profiling_p) + { + if (aq) + comp_start_info + = queue_async_prof_dispatch (acc_dev, aq, &prof_info, + &compute_construct_event_info, + &api_info, NULL); + else + goacc_profiling_dispatch (&prof_info, &compute_construct_event_info, + &api_info); + } + /* Host fallback if "if" clause is false or if the current device is set to the host. */ if (flags & GOACC_FLAG_HOST_FALLBACK) @@ -368,12 +437,16 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *), size_t mapnum, = compute_construct_event_info.other_event.parent_construct; enter_exit_data_event_info.other_event.implicit = 1; enter_exit_data_event_info.other_event.tool_info = NULL; - goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info, - &api_info); + if (aq) + data_start_info + = queue_async_prof_dispatch (acc_dev, aq, &prof_info, + &enter_exit_data_event_info, &api_info, + NULL); + else + goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info, + &api_info); } - goacc_aq aq = get_goacc_asyncqueue (async); - tgt = gomp_map_vars_async (acc_dev, aq, mapnum, hostaddrs, NULL, sizes, kinds, true, GOMP_MAP_VARS_OPENACC); @@ -391,8 +464,13 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *), size_t mapnum, prof_info.event_type = acc_ev_enter_data_end; enter_exit_data_event_info.other_event.event_type = prof_info.event_type; - goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info, - &api_info); + if (aq) + queue_async_prof_dispatch (acc_dev, aq, &prof_info, + &enter_exit_data_event_info, &api_info, + data_start_info); + else + goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info, + &api_info); } devaddrs = gomp_alloca (sizeof (void *) * mapnum); @@ -423,8 +501,14 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *), size_t mapnum, prof_info.event_type = acc_ev_exit_data_start; enter_exit_data_event_info.other_event.event_type = prof_info.event_type; enter_exit_data_event_info.other_event.tool_info = NULL; - goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info, - &api_info); + if (aq) + data_start_info + = queue_async_prof_dispatch (acc_dev, aq, &prof_info, + &enter_exit_data_event_info, &api_info, + NULL); + else + goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info, + &api_info); } /* If running synchronously, unmap immediately. */ @@ -437,8 +521,13 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *), size_t mapnum, { prof_info.event_type = acc_ev_exit_data_end; enter_exit_data_event_info.other_event.event_type = prof_info.event_type; - goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info, - &api_info); + if (aq) + queue_async_prof_dispatch (acc_dev, aq, &prof_info, + &enter_exit_data_event_info, &api_info, + data_start_info); + else + goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info, + &api_info); } #ifdef RC_CHECKING @@ -453,8 +542,13 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *), size_t mapnum, prof_info.event_type = acc_ev_compute_construct_end; compute_construct_event_info.other_event.event_type = prof_info.event_type; - goacc_profiling_dispatch (&prof_info, &compute_construct_event_info, - &api_info); + if (aq) + queue_async_prof_dispatch (acc_dev, aq, &prof_info, + &compute_construct_event_info, &api_info, + comp_start_info); + else + goacc_profiling_dispatch (&prof_info, &compute_construct_event_info, + &api_info); thr->prof_info = NULL; thr->api_info = NULL; @@ -697,6 +791,7 @@ GOACC_enter_exit_data (int flags_m, size_t mapnum, struct gomp_device_descr *acc_dev; bool data_enter = false; size_t i; + struct async_prof_callback_info *data_start_info = NULL; goacc_lazy_initialize (); @@ -806,9 +901,19 @@ GOACC_enter_exit_data (int flags_m, size_t mapnum, api_info.async_handle = NULL; } + goacc_aq aq = get_goacc_asyncqueue (async); + if (profiling_p) - goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info, - &api_info); + { + if (aq) + data_start_info + = queue_async_prof_dispatch (acc_dev, aq, &prof_info, + &enter_exit_data_event_info, &api_info, + NULL); + else + goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info, + &api_info); + } if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) || (flags & GOACC_FLAG_HOST_FALLBACK)) @@ -867,7 +972,6 @@ GOACC_enter_exit_data (int flags_m, size_t mapnum, case GOMP_MAP_STRUCT: { int elems = sizes[i]; - goacc_aq aq = get_goacc_asyncqueue (async); gomp_map_vars_async (acc_dev, aq, elems + 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i], true, GOMP_MAP_VARS_OPENACC_ENTER_DATA); @@ -890,7 +994,6 @@ GOACC_enter_exit_data (int flags_m, size_t mapnum, &sizes[i], &kinds[i]); else { - goacc_aq aq = get_goacc_asyncqueue (async); for (int j = 0; j < 2; j++) gomp_map_vars_async (acc_dev, aq, (j == 0 || pointer == 2) ? 1 : 2, @@ -1003,7 +1106,6 @@ GOACC_enter_exit_data (int flags_m, size_t mapnum, case GOMP_MAP_STRUCT: { int elems = sizes[i]; - goacc_aq aq = get_goacc_asyncqueue (async); for (int j = 1; j <= elems; j++) { struct splay_tree_key_s k; @@ -1067,8 +1169,13 @@ GOACC_enter_exit_data (int flags_m, size_t mapnum, prof_info.event_type = data_enter ? acc_ev_enter_data_end : acc_ev_exit_data_end; enter_exit_data_event_info.other_event.event_type = prof_info.event_type; - goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info, - &api_info); + if (aq) + queue_async_prof_dispatch (acc_dev, aq, &prof_info, + &enter_exit_data_event_info, &api_info, + data_start_info); + else + goacc_profiling_dispatch (&prof_info, &enter_exit_data_event_info, + &api_info); thr->prof_info = NULL; thr->api_info = NULL; @@ -1120,6 +1227,8 @@ GOACC_update (int flags_m, size_t mapnum, struct goacc_thread *thr = goacc_thread (); struct gomp_device_descr *acc_dev = thr->dev; + goacc_aq aq = NULL; + struct async_prof_callback_info *update_start_info = NULL; bool profiling_p = GOACC_PROFILING_DISPATCH_P (true); @@ -1169,7 +1278,15 @@ GOACC_update (int flags_m, size_t mapnum, } if (profiling_p) - goacc_profiling_dispatch (&prof_info, &update_event_info, &api_info); + { + aq = get_goacc_asyncqueue (async); + if (aq) + update_start_info + = queue_async_prof_dispatch (acc_dev, aq, &prof_info, + &update_event_info, &api_info, NULL); + else + goacc_profiling_dispatch (&prof_info, &update_event_info, &api_info); + } if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) || (flags & GOACC_FLAG_HOST_FALLBACK)) @@ -1257,7 +1374,11 @@ GOACC_update (int flags_m, size_t mapnum, { prof_info.event_type = acc_ev_update_end; update_event_info.other_event.event_type = prof_info.event_type; - goacc_profiling_dispatch (&prof_info, &update_event_info, &api_info); + if (aq) + queue_async_prof_dispatch (acc_dev, aq, &prof_info, &update_event_info, + &api_info, update_start_info); + else + goacc_profiling_dispatch (&prof_info, &update_event_info, &api_info); thr->prof_info = NULL; thr->api_info = NULL; diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c index cf980f1..1af53cb 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-init-1.c @@ -159,7 +159,10 @@ static void cb_compute_construct_start (acc_prof_info *prof_info, acc_event_info assert (prof_info->device_type == acc_device_type); assert (prof_info->device_number == acc_device_num); assert (prof_info->thread_id == -1); - assert (prof_info->async == /* TODO acc_async */ acc_async_sync); + if (acc_device_type == acc_device_host) + assert (prof_info->async == acc_async_sync); + else + assert (prof_info->async == acc_async); assert (prof_info->async_queue == prof_info->async); assert (prof_info->src_file == NULL); assert (prof_info->func_name == NULL); diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c index 5d39251..0cb03691 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c @@ -284,9 +284,9 @@ static void cb_exit_data_start (acc_prof_info *prof_info, acc_event_info *event_ { DEBUG_printf ("%s\n", __FUNCTION__); - assert (state == 7 + assert (state == 5 #if ASYNC_EXIT_DATA - || state == 107 + || state == 105 #endif ); STATE_OP (state, ++); @@ -340,9 +340,9 @@ static void cb_exit_data_end (acc_prof_info *prof_info, acc_event_info *event_in { DEBUG_printf ("%s\n", __FUNCTION__); - assert (state == 8 + assert (state == 6 #if ASYNC_EXIT_DATA - || state == 108 + || state == 106 #endif ); STATE_OP (state, ++); @@ -426,7 +426,10 @@ static void cb_compute_construct_start (acc_prof_info *prof_info, acc_event_info assert (prof_info->device_type == acc_device_type); assert (prof_info->device_number == acc_device_num); assert (prof_info->thread_id == -1); - assert (prof_info->async == /* TODO acc_async */ acc_async_sync); + if (acc_device_type == acc_device_host) + assert (prof_info->async == acc_async_sync); + else + assert (prof_info->async == acc_async); assert (prof_info->async_queue == prof_info->async); assert (prof_info->src_file == NULL); assert (prof_info->func_name == NULL); @@ -467,9 +470,6 @@ static void cb_compute_construct_end (acc_prof_info *prof_info, acc_event_info * { /* Compensate for the missing 'acc_ev_enter_data_end'. */ state += 1; - /* Compensate for the missing 'acc_ev_enqueue_launch_start' and - 'acc_ev_enqueue_launch_end'. */ - state += 2; /* Compensate for the missing 'acc_ev_exit_data_start' and 'acc_ev_exit_data_end'. */ state += 2; @@ -482,8 +482,8 @@ static void cb_compute_construct_end (acc_prof_info *prof_info, acc_event_info * state += 2; } #endif - assert (state == 9 - || state == 109); + assert (state == 7 + || state == 107); STATE_OP (state, ++); assert (tool_info != NULL); @@ -537,17 +537,6 @@ static void cb_enqueue_launch_start (acc_prof_info *prof_info, acc_event_info *e assert (acc_device_type != acc_device_host); - assert (state == 5 - || state == 105); - STATE_OP (state, ++); - - assert (tool_info != NULL); - assert (tool_info->event_info.other_event.event_type == acc_ev_compute_construct_start); - assert (tool_info->nested == NULL); - tool_info->nested = (struct tool_info *) malloc(sizeof *tool_info); - assert (tool_info->nested != NULL); - tool_info->nested->nested = NULL; - assert (prof_info->event_type == acc_ev_enqueue_launch_start); assert (prof_info->valid_bytes == _ACC_PROF_INFO_VALID_BYTES); assert (prof_info->version == _ACC_PROF_INFO_VERSION); @@ -591,13 +580,6 @@ static void cb_enqueue_launch_start (acc_prof_info *prof_info, acc_event_info *e assert (api_info->device_handle == NULL); assert (api_info->context_handle == NULL); assert (api_info->async_handle == NULL); - - tool_info->nested->event_info.launch_event.event_type = event_info->launch_event.event_type; - tool_info->nested->event_info.launch_event.kernel_name = strdup (event_info->launch_event.kernel_name); - tool_info->nested->event_info.launch_event.num_gangs = event_info->launch_event.num_gangs; - tool_info->nested->event_info.launch_event.num_workers = event_info->launch_event.num_workers; - tool_info->nested->event_info.launch_event.vector_length = event_info->launch_event.vector_length; - event_info->other_event.tool_info = tool_info->nested; } static void cb_enqueue_launch_end (acc_prof_info *prof_info, acc_event_info *event_info, acc_api_info *api_info) @@ -606,19 +588,6 @@ static void cb_enqueue_launch_end (acc_prof_info *prof_info, acc_event_info *eve assert (acc_device_type != acc_device_host); - assert (state == 6 - || state == 106); - STATE_OP (state, ++); - - assert (tool_info != NULL); - assert (tool_info->event_info.other_event.event_type == acc_ev_compute_construct_start); - assert (tool_info->nested != NULL); - assert (tool_info->nested->event_info.launch_event.event_type == acc_ev_enqueue_launch_start); - assert (tool_info->nested->event_info.launch_event.kernel_name != NULL); - assert (tool_info->nested->event_info.launch_event.num_gangs >= 1); - assert (tool_info->nested->event_info.launch_event.num_workers >= 1); - assert (tool_info->nested->event_info.launch_event.vector_length >= 1); - assert (prof_info->event_type == acc_ev_enqueue_launch_end); assert (prof_info->valid_bytes == _ACC_PROF_INFO_VALID_BYTES); assert (prof_info->version == _ACC_PROF_INFO_VERSION); @@ -638,12 +607,7 @@ static void cb_enqueue_launch_end (acc_prof_info *prof_info, acc_event_info *eve assert (event_info->launch_event.valid_bytes == _ACC_LAUNCH_EVENT_INFO_VALID_BYTES); assert (event_info->launch_event.parent_construct == acc_construct_parallel); assert (event_info->launch_event.implicit == 1); - assert (event_info->launch_event.tool_info == tool_info->nested); assert (event_info->launch_event.kernel_name != NULL); - assert (strcmp (event_info->launch_event.kernel_name, tool_info->nested->event_info.launch_event.kernel_name) == 0); - assert (event_info->launch_event.num_gangs == tool_info->nested->event_info.launch_event.num_gangs); - assert (event_info->launch_event.num_workers == tool_info->nested->event_info.launch_event.num_workers); - assert (event_info->launch_event.vector_length == tool_info->nested->event_info.launch_event.vector_length); if (acc_device_type == acc_device_host) assert (api_info->device_api == acc_device_api_none); @@ -657,10 +621,6 @@ static void cb_enqueue_launch_end (acc_prof_info *prof_info, acc_event_info *eve assert (api_info->device_handle == NULL); assert (api_info->context_handle == NULL); assert (api_info->async_handle == NULL); - - free ((void *) tool_info->nested->event_info.launch_event.kernel_name); - free (tool_info->nested); - tool_info->nested = NULL; } @@ -707,7 +667,7 @@ int main() } assert (state_init == 4); } - assert (state == 10); + assert (state == 8); STATE_OP (state, = 100); @@ -723,7 +683,7 @@ int main() #pragma acc wait assert (state_init == 104); } - assert (state == 110); + assert (state == 108); return 0; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-94.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-94.c index 5449723..baa3ac8 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-94.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/lib-94.c @@ -22,10 +22,10 @@ main (int argc, char **argv) acc_copyin_async (h, N, async); - memset (h, 0, N); - acc_wait (async); + memset (h, 0, N); + acc_copyout_async (h, N, async + 1); acc_wait (async + 1); |