diff options
author | Thomas Schwinge <thomas@codesourcery.com> | 2023-03-10 18:14:44 +0100 |
---|---|---|
committer | Thomas Schwinge <thomas@codesourcery.com> | 2023-03-10 18:15:19 +0100 |
commit | c64e8a52af5f05d03b6d68e2fe6deb60f71adc6c (patch) | |
tree | 61e48bad9c54cdc19019f76ed5ac91e87d5eafae | |
parent | ac859ac4df59373db27f2b39ef37a50c51c14e64 (diff) | |
download | gcc-c64e8a52af5f05d03b6d68e2fe6deb60f71adc6c.zip gcc-c64e8a52af5f05d03b6d68e2fe6deb60f71adc6c.tar.gz gcc-c64e8a52af5f05d03b6d68e2fe6deb60f71adc6c.tar.bz2 |
Use 'GOMP_MAP_VARS_TARGET' for OpenACC compute constructs [PR90596]
Thereby considerably simplify the device plugins' 'GOMP_OFFLOAD_openacc_exec',
'GOMP_OFFLOAD_openacc_async_exec' functions: in terms of lines of code, but in
particular conceptually: no more device memory allocation, host to device data
copying, device memory deallocation -- 'GOMP_MAP_VARS_TARGET' does all that for
us.
This depends on commit 2b2340e236c0bba8aaca358ea25a5accd8249fbd
"Allow libgomp 'cbuf' buffering with OpenACC 'async' for 'ephemeral' data",
where I said that "a use will emerge later", which is this one here.
PR libgomp/90596
libgomp/
* target.c (gomp_map_vars_internal): Allow for
'param_kind == GOMP_MAP_VARS_OPENACC | GOMP_MAP_VARS_TARGET'.
* oacc-parallel.c (GOACC_parallel_keyed): Pass
'GOMP_MAP_VARS_TARGET' to 'goacc_map_vars'.
* plugin/plugin-gcn.c (alloc_by_agent, gcn_exec)
(GOMP_OFFLOAD_openacc_exec, GOMP_OFFLOAD_openacc_async_exec):
Adjust, simplify.
(gomp_offload_free): Remove.
* plugin/plugin-nvptx.c (nvptx_exec, GOMP_OFFLOAD_openacc_exec)
(GOMP_OFFLOAD_openacc_async_exec): Adjust, simplify.
(cuda_free_argmem): Remove.
* testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c:
Adjust.
(cherry picked from commit f8332e52a498df480f72303de32ad0751ad899fe)
-rw-r--r-- | libgomp/ChangeLog.omp | 18 | ||||
-rw-r--r-- | libgomp/oacc-parallel.c | 13 | ||||
-rw-r--r-- | libgomp/plugin/plugin-gcn.c | 47 | ||||
-rw-r--r-- | libgomp/plugin/plugin-nvptx.c | 154 | ||||
-rw-r--r-- | libgomp/target.c | 10 | ||||
-rw-r--r-- | libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c | 58 |
6 files changed, 62 insertions, 238 deletions
diff --git a/libgomp/ChangeLog.omp b/libgomp/ChangeLog.omp index e3bab14..2b0b4c7 100644 --- a/libgomp/ChangeLog.omp +++ b/libgomp/ChangeLog.omp @@ -3,6 +3,24 @@ Backported from master: 2023-03-10 Thomas Schwinge <thomas@codesourcery.com> + PR libgomp/90596 + * target.c (gomp_map_vars_internal): Allow for + 'param_kind == GOMP_MAP_VARS_OPENACC | GOMP_MAP_VARS_TARGET'. + * oacc-parallel.c (GOACC_parallel_keyed): Pass + 'GOMP_MAP_VARS_TARGET' to 'goacc_map_vars'. + * plugin/plugin-gcn.c (alloc_by_agent, gcn_exec) + (GOMP_OFFLOAD_openacc_exec, GOMP_OFFLOAD_openacc_async_exec): + Adjust, simplify. + (gomp_offload_free): Remove. + * plugin/plugin-nvptx.c (nvptx_exec, GOMP_OFFLOAD_openacc_exec) + (GOMP_OFFLOAD_openacc_async_exec): Adjust, simplify. + (cuda_free_argmem): Remove. + * testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c: + Adjust. + + Backported from master: + 2023-03-10 Thomas Schwinge <thomas@codesourcery.com> + * target.c (gomp_copy_host2dev, gomp_map_vars_internal): Allow libgomp 'cbuf' buffering with OpenACC 'async' for 'ephemeral' data. diff --git a/libgomp/oacc-parallel.c b/libgomp/oacc-parallel.c index 9c1db40..9cd99b4 100644 --- a/libgomp/oacc-parallel.c +++ b/libgomp/oacc-parallel.c @@ -274,8 +274,6 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *), va_list ap; struct goacc_thread *thr; struct gomp_device_descr *acc_dev; - struct target_mem_desc *tgt; - void **devaddrs; unsigned int i; struct splay_tree_key_s k; splay_tree_key tgt_fn_key; @@ -468,8 +466,10 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *), goacc_aq aq = get_goacc_asyncqueue (async); - tgt = goacc_map_vars (acc_dev, aq, mapnum, hostaddrs, NULL, sizes, kinds, - nca_info, true, 0); + struct target_mem_desc *tgt + = goacc_map_vars (acc_dev, aq, mapnum, hostaddrs, NULL, sizes, kinds, + nca_info, true, GOMP_MAP_VARS_TARGET); + free (nca_info); if (profiling_p) @@ -481,10 +481,7 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *), &api_info); } - devaddrs = gomp_alloca (sizeof (void *) * mapnum); - for (i = 0; i < mapnum; i++) - devaddrs[i] = (void *) gomp_map_val (tgt, hostaddrs, i); - + void **devaddrs = (void **) tgt->tgt_start; if (aq == NULL) acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs, dims, tgt); diff --git a/libgomp/plugin/plugin-gcn.c b/libgomp/plugin/plugin-gcn.c index 11b7f25..64694cd 100644 --- a/libgomp/plugin/plugin-gcn.c +++ b/libgomp/plugin/plugin-gcn.c @@ -1874,13 +1874,6 @@ alloc_by_agent (struct agent_info *agent, size_t size) { GCN_DEBUG ("Allocating %zu bytes on device %d\n", size, agent->device_id); - /* Zero-size allocations are invalid, so in order to return a valid pointer - we need to pass a valid size. One source of zero-size allocations is - kernargs for kernels that have no inputs or outputs (the kernel may - only use console output, for example). */ - if (size == 0) - size = 4; - void *ptr; hsa_status_t status = hsa_fns.hsa_memory_allocate_fn (agent->data_region, size, &ptr); @@ -3048,15 +3041,6 @@ copy_data (void *data_) free (data); } -/* Free device data. This is intended for use as an async callback event. */ - -static void -gomp_offload_free (void *ptr) -{ - GCN_DEBUG ("Async thread ?:?: Freeing %p\n", ptr); - GOMP_OFFLOAD_free (0, ptr); -} - /* Request an asynchronous data copy, to or from a device, on a given queue. The event will be registered as a callback. */ @@ -3219,7 +3203,7 @@ usm_heap_create (size_t size) /* Execute an OpenACC kernel, synchronously or asynchronously. */ static void -gcn_exec (struct kernel_info *kernel, size_t mapnum, +gcn_exec (struct kernel_info *kernel, void **devaddrs, unsigned *dims, void *targ_mem_desc, bool async, struct goacc_asyncqueue *aq) { @@ -3229,11 +3213,6 @@ gcn_exec (struct kernel_info *kernel, size_t mapnum, /* If we get here then this must be an OpenACC kernel. */ kernel->kind = KIND_OPENACC; - /* devaddrs must be double-indirect on the target. */ - void **ind_da = alloc_by_agent (kernel->agent, sizeof (void*) * mapnum); - for (size_t i = 0; i < mapnum; i++) - hsa_fns.hsa_memory_copy_fn (&ind_da[i], &devaddrs[i], sizeof (void *)); - struct hsa_kernel_description *hsa_kernel_desc = NULL; for (unsigned i = 0; i < kernel->module->image_desc->kernel_count; i++) { @@ -3345,9 +3324,9 @@ gcn_exec (struct kernel_info *kernel, size_t mapnum, } if (!async) - run_kernel (kernel, ind_da, &kla, NULL, false); + run_kernel (kernel, devaddrs, &kla, NULL, false); else - queue_push_launch (aq, kernel, ind_da, &kla); + queue_push_launch (aq, kernel, devaddrs, &kla); if (profiling_dispatch_p) { @@ -3357,16 +3336,6 @@ gcn_exec (struct kernel_info *kernel, size_t mapnum, &enqueue_launch_event_info, api_info); } - - if (!async) - gomp_offload_free (ind_da); - else - { - if (DEBUG_QUEUES) - GCN_DEBUG ("queue_push_callback %d:%d gomp_offload_free, %p\n", - aq->agent->device_id, aq->id, ind_da); - queue_push_callback (aq, gomp_offload_free, ind_da); - } } /* }}} */ @@ -4095,14 +4064,15 @@ GOMP_OFFLOAD_is_usm_ptr (void *ptr) already-loaded KERNEL. */ void -GOMP_OFFLOAD_openacc_exec (void (*fn_ptr) (void *), size_t mapnum, +GOMP_OFFLOAD_openacc_exec (void (*fn_ptr) (void *), + size_t mapnum __attribute__((unused)), void **hostaddrs __attribute__((unused)), void **devaddrs, unsigned *dims, void *targ_mem_desc) { struct kernel_info *kernel = (struct kernel_info *) fn_ptr; - gcn_exec (kernel, mapnum, devaddrs, dims, targ_mem_desc, false, NULL); + gcn_exec (kernel, devaddrs, dims, targ_mem_desc, false, NULL); } /* Run an asynchronous OpenACC kernel on the specified queue. */ @@ -4116,7 +4086,8 @@ GOMP_OFFLOAD_openacc_exec_params (void (*fn_ptr) (void *), size_t mapnum, } void -GOMP_OFFLOAD_openacc_async_exec (void (*fn_ptr) (void *), size_t mapnum, +GOMP_OFFLOAD_openacc_async_exec (void (*fn_ptr) (void *), + size_t mapnum __attribute__((unused)), void **hostaddrs __attribute__((unused)), void **devaddrs, unsigned *dims, void *targ_mem_desc, @@ -4124,7 +4095,7 @@ GOMP_OFFLOAD_openacc_async_exec (void (*fn_ptr) (void *), size_t mapnum, { struct kernel_info *kernel = (struct kernel_info *) fn_ptr; - gcn_exec (kernel, mapnum, devaddrs, dims, targ_mem_desc, true, aq); + gcn_exec (kernel, devaddrs, dims, targ_mem_desc, true, aq); } /* Create a new asynchronous thread and queue for running future kernels. */ diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c index 1c05659..6ade34b 100644 --- a/libgomp/plugin/plugin-nvptx.c +++ b/libgomp/plugin/plugin-nvptx.c @@ -773,7 +773,7 @@ link_ptx (CUmodule *module, const struct targ_ptx_obj *ptx_objs, } static void -nvptx_exec (void (*fn), size_t mapnum, unsigned *dims, void *targ_mem_desc, +nvptx_exec (void (*fn), unsigned *dims, void *targ_mem_desc, CUdeviceptr dp, CUstream stream) { struct targ_fn_descriptor *targ_fn = (struct targ_fn_descriptor *) fn; @@ -1740,70 +1740,16 @@ GOMP_OFFLOAD_page_locked_host_free (void *ptr) void -GOMP_OFFLOAD_openacc_exec (void (*fn) (void *), size_t mapnum, +GOMP_OFFLOAD_openacc_exec (void (*fn) (void *), + size_t mapnum __attribute__((unused)), void **hostaddrs __attribute__((unused)), void **devaddrs, unsigned *dims, void *targ_mem_desc) { - GOMP_PLUGIN_debug (0, " %s: prepare mappings\n", __FUNCTION__); + GOMP_PLUGIN_debug (0, "nvptx %s\n", __FUNCTION__); - struct goacc_thread *thr = GOMP_PLUGIN_goacc_thread (); - acc_prof_info *prof_info = thr->prof_info; - acc_event_info data_event_info; - acc_api_info *api_info = thr->api_info; - bool profiling_p = __builtin_expect (prof_info != NULL, false); - - void **hp = NULL; - CUdeviceptr dp = 0; - - if (mapnum > 0) - { - size_t s = mapnum * sizeof (void *); - hp = alloca (s); - for (int i = 0; i < mapnum; i++) - hp[i] = devaddrs[i]; - CUDA_CALL_ASSERT (cuMemAlloc, &dp, s); - if (profiling_p) - goacc_profiling_acc_ev_alloc (thr, (void *) dp, s); - } - - /* Copy the (device) pointers to arguments to the device (dp and hp might in - fact have the same value on a unified-memory system). */ - if (mapnum > 0) - { - if (profiling_p) - { - prof_info->event_type = acc_ev_enqueue_upload_start; - - data_event_info.data_event.event_type = prof_info->event_type; - data_event_info.data_event.valid_bytes - = _ACC_DATA_EVENT_INFO_VALID_BYTES; - data_event_info.data_event.parent_construct - = acc_construct_parallel; - data_event_info.data_event.implicit = 1; /* Always implicit. */ - data_event_info.data_event.tool_info = NULL; - data_event_info.data_event.var_name = NULL; - data_event_info.data_event.bytes = mapnum * sizeof (void *); - data_event_info.data_event.host_ptr = hp; - data_event_info.data_event.device_ptr = (const void *) dp; - - api_info->device_api = acc_device_api_cuda; - - GOMP_PLUGIN_goacc_profiling_dispatch (prof_info, &data_event_info, - api_info); - } - CUDA_CALL_ASSERT (cuMemcpyHtoD, dp, (void *) hp, - mapnum * sizeof (void *)); - if (profiling_p) - { - prof_info->event_type = acc_ev_enqueue_upload_end; - data_event_info.data_event.event_type = prof_info->event_type; - GOMP_PLUGIN_goacc_profiling_dispatch (prof_info, &data_event_info, - api_info); - } - } - - nvptx_exec (fn, mapnum, dims, targ_mem_desc, dp, NULL); + CUdeviceptr dp = (CUdeviceptr) devaddrs; + nvptx_exec (fn, dims, targ_mem_desc, dp, NULL); CUresult r = CUDA_CALL_NOCHECK (cuStreamSynchronize, NULL); const char *maybe_abort_msg = "(perhaps abort was called)"; @@ -1812,98 +1758,20 @@ GOMP_OFFLOAD_openacc_exec (void (*fn) (void *), size_t mapnum, maybe_abort_msg); else if (r != CUDA_SUCCESS) GOMP_PLUGIN_fatal ("cuStreamSynchronize error: %s", cuda_error (r)); - - CUDA_CALL_ASSERT (cuMemFree, dp); - if (profiling_p) - goacc_profiling_acc_ev_free (thr, (void *) dp); -} - -static void -cuda_free_argmem (void *ptr) -{ - void **block = (void **) ptr; - nvptx_free (block[0], (struct ptx_device *) block[1]); - free (block); } void -GOMP_OFFLOAD_openacc_async_exec (void (*fn) (void *), size_t mapnum, +GOMP_OFFLOAD_openacc_async_exec (void (*fn) (void *), + size_t mapnum __attribute__((unused)), void **hostaddrs __attribute__((unused)), void **devaddrs, unsigned *dims, void *targ_mem_desc, struct goacc_asyncqueue *aq) { - GOMP_PLUGIN_debug (0, " %s: prepare mappings\n", __FUNCTION__); - - struct goacc_thread *thr = GOMP_PLUGIN_goacc_thread (); - acc_prof_info *prof_info = thr->prof_info; - acc_event_info data_event_info; - acc_api_info *api_info = thr->api_info; - bool profiling_p = __builtin_expect (prof_info != NULL, false); - - void **hp = NULL; - CUdeviceptr dp = 0; - void **block = NULL; - - if (mapnum > 0) - { - size_t s = mapnum * sizeof (void *); - block = (void **) GOMP_PLUGIN_malloc (2 * sizeof (void *) + s); - hp = block + 2; - for (int i = 0; i < mapnum; i++) - hp[i] = devaddrs[i]; - CUDA_CALL_ASSERT (cuMemAlloc, &dp, s); - if (profiling_p) - goacc_profiling_acc_ev_alloc (thr, (void *) dp, s); - } - - /* Copy the (device) pointers to arguments to the device (dp and hp might in - fact have the same value on a unified-memory system). */ - if (mapnum > 0) - { - if (profiling_p) - { - prof_info->event_type = acc_ev_enqueue_upload_start; - - data_event_info.data_event.event_type = prof_info->event_type; - data_event_info.data_event.valid_bytes - = _ACC_DATA_EVENT_INFO_VALID_BYTES; - data_event_info.data_event.parent_construct - = acc_construct_parallel; - data_event_info.data_event.implicit = 1; /* Always implicit. */ - data_event_info.data_event.tool_info = NULL; - data_event_info.data_event.var_name = NULL; - data_event_info.data_event.bytes = mapnum * sizeof (void *); - data_event_info.data_event.host_ptr = hp; - data_event_info.data_event.device_ptr = (const void *) dp; - - api_info->device_api = acc_device_api_cuda; - - GOMP_PLUGIN_goacc_profiling_dispatch (prof_info, &data_event_info, - api_info); - } - - CUDA_CALL_ASSERT (cuMemcpyHtoDAsync, dp, (void *) hp, - mapnum * sizeof (void *), aq->cuda_stream); - block[0] = (void *) dp; - - struct nvptx_thread *nvthd = - (struct nvptx_thread *) GOMP_PLUGIN_acc_thread (); - block[1] = (void *) nvthd->ptx_dev; - - if (profiling_p) - { - prof_info->event_type = acc_ev_enqueue_upload_end; - data_event_info.data_event.event_type = prof_info->event_type; - GOMP_PLUGIN_goacc_profiling_dispatch (prof_info, &data_event_info, - api_info); - } - } - - nvptx_exec (fn, mapnum, dims, targ_mem_desc, dp, aq->cuda_stream); + GOMP_PLUGIN_debug (0, "nvptx %s\n", __FUNCTION__); - if (mapnum > 0) - GOMP_OFFLOAD_openacc_async_queue_callback (aq, cuda_free_argmem, block); + CUdeviceptr dp = (CUdeviceptr) devaddrs; + nvptx_exec (fn, dims, targ_mem_desc, dp, aq->cuda_stream); } void * diff --git a/libgomp/target.c b/libgomp/target.c index 60f9b43..96ece0b 100644 --- a/libgomp/target.c +++ b/libgomp/target.c @@ -996,13 +996,13 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, cbuf.chunk_cnt = -1; cbuf.use_cnt = 0; cbuf.buf = NULL; - if (mapnum > 1 || pragma_kind == GOMP_MAP_VARS_TARGET) + if (mapnum > 1 || (pragma_kind & GOMP_MAP_VARS_TARGET)) { size_t chunks_size = (mapnum + 1) * sizeof (struct gomp_coalesce_chunk); cbuf.chunks = (struct gomp_coalesce_chunk *) gomp_alloca (chunks_size); cbuf.chunk_cnt = 0; } - if (pragma_kind == GOMP_MAP_VARS_TARGET) + if (pragma_kind & GOMP_MAP_VARS_TARGET) { size_t align = 4 * sizeof (void *); tgt_align = align; @@ -1346,7 +1346,7 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, tgt->tgt_start = (uintptr_t) tgt->to_free; tgt->tgt_end = tgt->tgt_start + sizes[0]; } - else if (not_found_cnt || pragma_kind == GOMP_MAP_VARS_TARGET) + else if (not_found_cnt || (pragma_kind & GOMP_MAP_VARS_TARGET)) { /* Allocate tgt_align aligned tgt_size block of memory. */ /* FIXME: Perhaps change interface to allocate properly aligned @@ -1384,7 +1384,7 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, } tgt_size = 0; - if (pragma_kind == GOMP_MAP_VARS_TARGET) + if (pragma_kind & GOMP_MAP_VARS_TARGET) tgt_size = mapnum * sizeof (void *); tgt->array = NULL; @@ -1944,7 +1944,7 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, } } - if (pragma_kind == GOMP_MAP_VARS_TARGET) + if (pragma_kind & GOMP_MAP_VARS_TARGET) { for (i = 0; i < mapnum; i++) { diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c index 711ead5..27f86d3 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c @@ -203,9 +203,7 @@ static void cb_alloc (acc_prof_info *prof_info, acc_event_info *event_info, acc_ # error TODO #else assert (state == 4 - || state == 6 - || state == 104 - || state == 106); + || state == 104); STATE_OP (state, ++); if (state == 5 @@ -217,13 +215,6 @@ static void cb_alloc (acc_prof_info *prof_info, acc_event_info *event_info, acc_ assert (tool_info->nested->event_info.other_event.event_type == acc_ev_enter_data_start); assert (tool_info->nested->nested == NULL); } - else if (state == 7 - || state == 107) - { - assert (tool_info != NULL); - assert (tool_info->event_info.other_event.event_type == acc_ev_compute_construct_start); - assert (tool_info->nested == NULL); - } else abort (); #endif @@ -268,20 +259,13 @@ static void cb_free (acc_prof_info *prof_info, acc_event_info *event_info, acc_a #if DEVICE_INIT_INSIDE_COMPUTE_CONSTRUCT # error TODO #else - assert (state == 9 - || state == 11); + assert (state == 9); STATE_OP (state, ++); if (state == 10) { assert (tool_info != NULL); assert (tool_info->event_info.other_event.event_type == acc_ev_compute_construct_start); - assert (tool_info->nested == NULL); - } - else if (state == 12) - { - assert (tool_info != NULL); - assert (tool_info->event_info.other_event.event_type == acc_ev_compute_construct_start); assert (tool_info->nested != NULL); assert (tool_info->nested->event_info.other_event.event_type == acc_ev_exit_data_start); assert (tool_info->nested->nested == NULL); @@ -449,19 +433,9 @@ static void cb_exit_data_start (acc_prof_info *prof_info, acc_event_info *event_ { DEBUG_printf ("%s\n", __FUNCTION__); + assert (state == 8 #if ASYNC_EXIT_DATA - if (acc_async != acc_async_sync) - { - /* Compensate for the deferred 'acc_ev_free'. */ - state += 1; - } -#else -# error TODO -#endif - - assert (state == 10 -#if ASYNC_EXIT_DATA - || state == 110 + || state == 108 #endif ); STATE_OP (state, ++); @@ -525,9 +499,9 @@ static void cb_exit_data_end (acc_prof_info *prof_info, acc_event_info *event_in { DEBUG_printf ("%s\n", __FUNCTION__); - assert (state == 12 + assert (state == 10 #if ASYNC_EXIT_DATA - || state == 112 + || state == 110 #endif ); STATE_OP (state, ++); @@ -654,13 +628,9 @@ static void cb_compute_construct_end (acc_prof_info *prof_info, acc_event_info * { /* Compensate for the missing 'acc_ev_enter_data_end'. */ state += 1; - /* Compensate for the missing 'acc_ev_alloc'. */ - state += 1; /* Compensate for the missing 'acc_ev_enqueue_launch_start' and 'acc_ev_enqueue_launch_end'. */ state += 2; - /* Compensate for the missing 'acc_ev_free'. */ - state += 1; /* Compensate for the missing 'acc_ev_exit_data_start'. */ state += 1; /* Compensate for the missing 'acc_ev_free'. */ @@ -676,8 +646,8 @@ static void cb_compute_construct_end (acc_prof_info *prof_info, acc_event_info * state += 2; } #endif - assert (state == 13 - || state == 113); + assert (state == 11 + || state == 111); STATE_OP (state, ++); assert (tool_info != NULL); @@ -731,8 +701,8 @@ static void cb_enqueue_launch_start (acc_prof_info *prof_info, acc_event_info *e assert (acc_device_type != acc_device_host); - assert (state == 7 - || state == 107); + assert (state == 6 + || state == 106); STATE_OP (state, ++); assert (tool_info != NULL); @@ -800,8 +770,8 @@ static void cb_enqueue_launch_end (acc_prof_info *prof_info, acc_event_info *eve assert (acc_device_type != acc_device_host); - assert (state == 8 - || state == 108); + assert (state == 7 + || state == 107); STATE_OP (state, ++); assert (tool_info != NULL); @@ -889,7 +859,7 @@ int main() } assert (state_init == 5); } - assert (state == 14); + assert (state == 12); STATE_OP (state, = 100); @@ -906,7 +876,7 @@ int main() #pragma acc wait assert (state_init == 105); } - assert (state == 114); + assert (state == 112); return 0; } |