aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThomas Schwinge <thomas@codesourcery.com>2023-03-10 18:14:44 +0100
committerThomas Schwinge <thomas@codesourcery.com>2023-03-10 18:15:19 +0100
commitc64e8a52af5f05d03b6d68e2fe6deb60f71adc6c (patch)
tree61e48bad9c54cdc19019f76ed5ac91e87d5eafae
parentac859ac4df59373db27f2b39ef37a50c51c14e64 (diff)
downloadgcc-c64e8a52af5f05d03b6d68e2fe6deb60f71adc6c.zip
gcc-c64e8a52af5f05d03b6d68e2fe6deb60f71adc6c.tar.gz
gcc-c64e8a52af5f05d03b6d68e2fe6deb60f71adc6c.tar.bz2
Use 'GOMP_MAP_VARS_TARGET' for OpenACC compute constructs [PR90596]
Thereby considerably simplify the device plugins' 'GOMP_OFFLOAD_openacc_exec', 'GOMP_OFFLOAD_openacc_async_exec' functions: in terms of lines of code, but in particular conceptually: no more device memory allocation, host to device data copying, device memory deallocation -- 'GOMP_MAP_VARS_TARGET' does all that for us. This depends on commit 2b2340e236c0bba8aaca358ea25a5accd8249fbd "Allow libgomp 'cbuf' buffering with OpenACC 'async' for 'ephemeral' data", where I said that "a use will emerge later", which is this one here. PR libgomp/90596 libgomp/ * target.c (gomp_map_vars_internal): Allow for 'param_kind == GOMP_MAP_VARS_OPENACC | GOMP_MAP_VARS_TARGET'. * oacc-parallel.c (GOACC_parallel_keyed): Pass 'GOMP_MAP_VARS_TARGET' to 'goacc_map_vars'. * plugin/plugin-gcn.c (alloc_by_agent, gcn_exec) (GOMP_OFFLOAD_openacc_exec, GOMP_OFFLOAD_openacc_async_exec): Adjust, simplify. (gomp_offload_free): Remove. * plugin/plugin-nvptx.c (nvptx_exec, GOMP_OFFLOAD_openacc_exec) (GOMP_OFFLOAD_openacc_async_exec): Adjust, simplify. (cuda_free_argmem): Remove. * testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c: Adjust. (cherry picked from commit f8332e52a498df480f72303de32ad0751ad899fe)
-rw-r--r--libgomp/ChangeLog.omp18
-rw-r--r--libgomp/oacc-parallel.c13
-rw-r--r--libgomp/plugin/plugin-gcn.c47
-rw-r--r--libgomp/plugin/plugin-nvptx.c154
-rw-r--r--libgomp/target.c10
-rw-r--r--libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c58
6 files changed, 62 insertions, 238 deletions
diff --git a/libgomp/ChangeLog.omp b/libgomp/ChangeLog.omp
index e3bab14..2b0b4c7 100644
--- a/libgomp/ChangeLog.omp
+++ b/libgomp/ChangeLog.omp
@@ -3,6 +3,24 @@
Backported from master:
2023-03-10 Thomas Schwinge <thomas@codesourcery.com>
+ PR libgomp/90596
+ * target.c (gomp_map_vars_internal): Allow for
+ 'param_kind == GOMP_MAP_VARS_OPENACC | GOMP_MAP_VARS_TARGET'.
+ * oacc-parallel.c (GOACC_parallel_keyed): Pass
+ 'GOMP_MAP_VARS_TARGET' to 'goacc_map_vars'.
+ * plugin/plugin-gcn.c (alloc_by_agent, gcn_exec)
+ (GOMP_OFFLOAD_openacc_exec, GOMP_OFFLOAD_openacc_async_exec):
+ Adjust, simplify.
+ (gomp_offload_free): Remove.
+ * plugin/plugin-nvptx.c (nvptx_exec, GOMP_OFFLOAD_openacc_exec)
+ (GOMP_OFFLOAD_openacc_async_exec): Adjust, simplify.
+ (cuda_free_argmem): Remove.
+ * testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c:
+ Adjust.
+
+ Backported from master:
+ 2023-03-10 Thomas Schwinge <thomas@codesourcery.com>
+
* target.c (gomp_copy_host2dev, gomp_map_vars_internal): Allow
libgomp 'cbuf' buffering with OpenACC 'async' for 'ephemeral'
data.
diff --git a/libgomp/oacc-parallel.c b/libgomp/oacc-parallel.c
index 9c1db40..9cd99b4 100644
--- a/libgomp/oacc-parallel.c
+++ b/libgomp/oacc-parallel.c
@@ -274,8 +274,6 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *),
va_list ap;
struct goacc_thread *thr;
struct gomp_device_descr *acc_dev;
- struct target_mem_desc *tgt;
- void **devaddrs;
unsigned int i;
struct splay_tree_key_s k;
splay_tree_key tgt_fn_key;
@@ -468,8 +466,10 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *),
goacc_aq aq = get_goacc_asyncqueue (async);
- tgt = goacc_map_vars (acc_dev, aq, mapnum, hostaddrs, NULL, sizes, kinds,
- nca_info, true, 0);
+ struct target_mem_desc *tgt
+ = goacc_map_vars (acc_dev, aq, mapnum, hostaddrs, NULL, sizes, kinds,
+ nca_info, true, GOMP_MAP_VARS_TARGET);
+
free (nca_info);
if (profiling_p)
@@ -481,10 +481,7 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *),
&api_info);
}
- devaddrs = gomp_alloca (sizeof (void *) * mapnum);
- for (i = 0; i < mapnum; i++)
- devaddrs[i] = (void *) gomp_map_val (tgt, hostaddrs, i);
-
+ void **devaddrs = (void **) tgt->tgt_start;
if (aq == NULL)
acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs, dims,
tgt);
diff --git a/libgomp/plugin/plugin-gcn.c b/libgomp/plugin/plugin-gcn.c
index 11b7f25..64694cd 100644
--- a/libgomp/plugin/plugin-gcn.c
+++ b/libgomp/plugin/plugin-gcn.c
@@ -1874,13 +1874,6 @@ alloc_by_agent (struct agent_info *agent, size_t size)
{
GCN_DEBUG ("Allocating %zu bytes on device %d\n", size, agent->device_id);
- /* Zero-size allocations are invalid, so in order to return a valid pointer
- we need to pass a valid size. One source of zero-size allocations is
- kernargs for kernels that have no inputs or outputs (the kernel may
- only use console output, for example). */
- if (size == 0)
- size = 4;
-
void *ptr;
hsa_status_t status = hsa_fns.hsa_memory_allocate_fn (agent->data_region,
size, &ptr);
@@ -3048,15 +3041,6 @@ copy_data (void *data_)
free (data);
}
-/* Free device data. This is intended for use as an async callback event. */
-
-static void
-gomp_offload_free (void *ptr)
-{
- GCN_DEBUG ("Async thread ?:?: Freeing %p\n", ptr);
- GOMP_OFFLOAD_free (0, ptr);
-}
-
/* Request an asynchronous data copy, to or from a device, on a given queue.
The event will be registered as a callback. */
@@ -3219,7 +3203,7 @@ usm_heap_create (size_t size)
/* Execute an OpenACC kernel, synchronously or asynchronously. */
static void
-gcn_exec (struct kernel_info *kernel, size_t mapnum,
+gcn_exec (struct kernel_info *kernel,
void **devaddrs, unsigned *dims, void *targ_mem_desc, bool async,
struct goacc_asyncqueue *aq)
{
@@ -3229,11 +3213,6 @@ gcn_exec (struct kernel_info *kernel, size_t mapnum,
/* If we get here then this must be an OpenACC kernel. */
kernel->kind = KIND_OPENACC;
- /* devaddrs must be double-indirect on the target. */
- void **ind_da = alloc_by_agent (kernel->agent, sizeof (void*) * mapnum);
- for (size_t i = 0; i < mapnum; i++)
- hsa_fns.hsa_memory_copy_fn (&ind_da[i], &devaddrs[i], sizeof (void *));
-
struct hsa_kernel_description *hsa_kernel_desc = NULL;
for (unsigned i = 0; i < kernel->module->image_desc->kernel_count; i++)
{
@@ -3345,9 +3324,9 @@ gcn_exec (struct kernel_info *kernel, size_t mapnum,
}
if (!async)
- run_kernel (kernel, ind_da, &kla, NULL, false);
+ run_kernel (kernel, devaddrs, &kla, NULL, false);
else
- queue_push_launch (aq, kernel, ind_da, &kla);
+ queue_push_launch (aq, kernel, devaddrs, &kla);
if (profiling_dispatch_p)
{
@@ -3357,16 +3336,6 @@ gcn_exec (struct kernel_info *kernel, size_t mapnum,
&enqueue_launch_event_info,
api_info);
}
-
- if (!async)
- gomp_offload_free (ind_da);
- else
- {
- if (DEBUG_QUEUES)
- GCN_DEBUG ("queue_push_callback %d:%d gomp_offload_free, %p\n",
- aq->agent->device_id, aq->id, ind_da);
- queue_push_callback (aq, gomp_offload_free, ind_da);
- }
}
/* }}} */
@@ -4095,14 +4064,15 @@ GOMP_OFFLOAD_is_usm_ptr (void *ptr)
already-loaded KERNEL. */
void
-GOMP_OFFLOAD_openacc_exec (void (*fn_ptr) (void *), size_t mapnum,
+GOMP_OFFLOAD_openacc_exec (void (*fn_ptr) (void *),
+ size_t mapnum __attribute__((unused)),
void **hostaddrs __attribute__((unused)),
void **devaddrs, unsigned *dims,
void *targ_mem_desc)
{
struct kernel_info *kernel = (struct kernel_info *) fn_ptr;
- gcn_exec (kernel, mapnum, devaddrs, dims, targ_mem_desc, false, NULL);
+ gcn_exec (kernel, devaddrs, dims, targ_mem_desc, false, NULL);
}
/* Run an asynchronous OpenACC kernel on the specified queue. */
@@ -4116,7 +4086,8 @@ GOMP_OFFLOAD_openacc_exec_params (void (*fn_ptr) (void *), size_t mapnum,
}
void
-GOMP_OFFLOAD_openacc_async_exec (void (*fn_ptr) (void *), size_t mapnum,
+GOMP_OFFLOAD_openacc_async_exec (void (*fn_ptr) (void *),
+ size_t mapnum __attribute__((unused)),
void **hostaddrs __attribute__((unused)),
void **devaddrs,
unsigned *dims, void *targ_mem_desc,
@@ -4124,7 +4095,7 @@ GOMP_OFFLOAD_openacc_async_exec (void (*fn_ptr) (void *), size_t mapnum,
{
struct kernel_info *kernel = (struct kernel_info *) fn_ptr;
- gcn_exec (kernel, mapnum, devaddrs, dims, targ_mem_desc, true, aq);
+ gcn_exec (kernel, devaddrs, dims, targ_mem_desc, true, aq);
}
/* Create a new asynchronous thread and queue for running future kernels. */
diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c
index 1c05659..6ade34b 100644
--- a/libgomp/plugin/plugin-nvptx.c
+++ b/libgomp/plugin/plugin-nvptx.c
@@ -773,7 +773,7 @@ link_ptx (CUmodule *module, const struct targ_ptx_obj *ptx_objs,
}
static void
-nvptx_exec (void (*fn), size_t mapnum, unsigned *dims, void *targ_mem_desc,
+nvptx_exec (void (*fn), unsigned *dims, void *targ_mem_desc,
CUdeviceptr dp, CUstream stream)
{
struct targ_fn_descriptor *targ_fn = (struct targ_fn_descriptor *) fn;
@@ -1740,70 +1740,16 @@ GOMP_OFFLOAD_page_locked_host_free (void *ptr)
void
-GOMP_OFFLOAD_openacc_exec (void (*fn) (void *), size_t mapnum,
+GOMP_OFFLOAD_openacc_exec (void (*fn) (void *),
+ size_t mapnum __attribute__((unused)),
void **hostaddrs __attribute__((unused)),
void **devaddrs,
unsigned *dims, void *targ_mem_desc)
{
- GOMP_PLUGIN_debug (0, " %s: prepare mappings\n", __FUNCTION__);
+ GOMP_PLUGIN_debug (0, "nvptx %s\n", __FUNCTION__);
- struct goacc_thread *thr = GOMP_PLUGIN_goacc_thread ();
- acc_prof_info *prof_info = thr->prof_info;
- acc_event_info data_event_info;
- acc_api_info *api_info = thr->api_info;
- bool profiling_p = __builtin_expect (prof_info != NULL, false);
-
- void **hp = NULL;
- CUdeviceptr dp = 0;
-
- if (mapnum > 0)
- {
- size_t s = mapnum * sizeof (void *);
- hp = alloca (s);
- for (int i = 0; i < mapnum; i++)
- hp[i] = devaddrs[i];
- CUDA_CALL_ASSERT (cuMemAlloc, &dp, s);
- if (profiling_p)
- goacc_profiling_acc_ev_alloc (thr, (void *) dp, s);
- }
-
- /* Copy the (device) pointers to arguments to the device (dp and hp might in
- fact have the same value on a unified-memory system). */
- if (mapnum > 0)
- {
- if (profiling_p)
- {
- prof_info->event_type = acc_ev_enqueue_upload_start;
-
- data_event_info.data_event.event_type = prof_info->event_type;
- data_event_info.data_event.valid_bytes
- = _ACC_DATA_EVENT_INFO_VALID_BYTES;
- data_event_info.data_event.parent_construct
- = acc_construct_parallel;
- data_event_info.data_event.implicit = 1; /* Always implicit. */
- data_event_info.data_event.tool_info = NULL;
- data_event_info.data_event.var_name = NULL;
- data_event_info.data_event.bytes = mapnum * sizeof (void *);
- data_event_info.data_event.host_ptr = hp;
- data_event_info.data_event.device_ptr = (const void *) dp;
-
- api_info->device_api = acc_device_api_cuda;
-
- GOMP_PLUGIN_goacc_profiling_dispatch (prof_info, &data_event_info,
- api_info);
- }
- CUDA_CALL_ASSERT (cuMemcpyHtoD, dp, (void *) hp,
- mapnum * sizeof (void *));
- if (profiling_p)
- {
- prof_info->event_type = acc_ev_enqueue_upload_end;
- data_event_info.data_event.event_type = prof_info->event_type;
- GOMP_PLUGIN_goacc_profiling_dispatch (prof_info, &data_event_info,
- api_info);
- }
- }
-
- nvptx_exec (fn, mapnum, dims, targ_mem_desc, dp, NULL);
+ CUdeviceptr dp = (CUdeviceptr) devaddrs;
+ nvptx_exec (fn, dims, targ_mem_desc, dp, NULL);
CUresult r = CUDA_CALL_NOCHECK (cuStreamSynchronize, NULL);
const char *maybe_abort_msg = "(perhaps abort was called)";
@@ -1812,98 +1758,20 @@ GOMP_OFFLOAD_openacc_exec (void (*fn) (void *), size_t mapnum,
maybe_abort_msg);
else if (r != CUDA_SUCCESS)
GOMP_PLUGIN_fatal ("cuStreamSynchronize error: %s", cuda_error (r));
-
- CUDA_CALL_ASSERT (cuMemFree, dp);
- if (profiling_p)
- goacc_profiling_acc_ev_free (thr, (void *) dp);
-}
-
-static void
-cuda_free_argmem (void *ptr)
-{
- void **block = (void **) ptr;
- nvptx_free (block[0], (struct ptx_device *) block[1]);
- free (block);
}
void
-GOMP_OFFLOAD_openacc_async_exec (void (*fn) (void *), size_t mapnum,
+GOMP_OFFLOAD_openacc_async_exec (void (*fn) (void *),
+ size_t mapnum __attribute__((unused)),
void **hostaddrs __attribute__((unused)),
void **devaddrs,
unsigned *dims, void *targ_mem_desc,
struct goacc_asyncqueue *aq)
{
- GOMP_PLUGIN_debug (0, " %s: prepare mappings\n", __FUNCTION__);
-
- struct goacc_thread *thr = GOMP_PLUGIN_goacc_thread ();
- acc_prof_info *prof_info = thr->prof_info;
- acc_event_info data_event_info;
- acc_api_info *api_info = thr->api_info;
- bool profiling_p = __builtin_expect (prof_info != NULL, false);
-
- void **hp = NULL;
- CUdeviceptr dp = 0;
- void **block = NULL;
-
- if (mapnum > 0)
- {
- size_t s = mapnum * sizeof (void *);
- block = (void **) GOMP_PLUGIN_malloc (2 * sizeof (void *) + s);
- hp = block + 2;
- for (int i = 0; i < mapnum; i++)
- hp[i] = devaddrs[i];
- CUDA_CALL_ASSERT (cuMemAlloc, &dp, s);
- if (profiling_p)
- goacc_profiling_acc_ev_alloc (thr, (void *) dp, s);
- }
-
- /* Copy the (device) pointers to arguments to the device (dp and hp might in
- fact have the same value on a unified-memory system). */
- if (mapnum > 0)
- {
- if (profiling_p)
- {
- prof_info->event_type = acc_ev_enqueue_upload_start;
-
- data_event_info.data_event.event_type = prof_info->event_type;
- data_event_info.data_event.valid_bytes
- = _ACC_DATA_EVENT_INFO_VALID_BYTES;
- data_event_info.data_event.parent_construct
- = acc_construct_parallel;
- data_event_info.data_event.implicit = 1; /* Always implicit. */
- data_event_info.data_event.tool_info = NULL;
- data_event_info.data_event.var_name = NULL;
- data_event_info.data_event.bytes = mapnum * sizeof (void *);
- data_event_info.data_event.host_ptr = hp;
- data_event_info.data_event.device_ptr = (const void *) dp;
-
- api_info->device_api = acc_device_api_cuda;
-
- GOMP_PLUGIN_goacc_profiling_dispatch (prof_info, &data_event_info,
- api_info);
- }
-
- CUDA_CALL_ASSERT (cuMemcpyHtoDAsync, dp, (void *) hp,
- mapnum * sizeof (void *), aq->cuda_stream);
- block[0] = (void *) dp;
-
- struct nvptx_thread *nvthd =
- (struct nvptx_thread *) GOMP_PLUGIN_acc_thread ();
- block[1] = (void *) nvthd->ptx_dev;
-
- if (profiling_p)
- {
- prof_info->event_type = acc_ev_enqueue_upload_end;
- data_event_info.data_event.event_type = prof_info->event_type;
- GOMP_PLUGIN_goacc_profiling_dispatch (prof_info, &data_event_info,
- api_info);
- }
- }
-
- nvptx_exec (fn, mapnum, dims, targ_mem_desc, dp, aq->cuda_stream);
+ GOMP_PLUGIN_debug (0, "nvptx %s\n", __FUNCTION__);
- if (mapnum > 0)
- GOMP_OFFLOAD_openacc_async_queue_callback (aq, cuda_free_argmem, block);
+ CUdeviceptr dp = (CUdeviceptr) devaddrs;
+ nvptx_exec (fn, dims, targ_mem_desc, dp, aq->cuda_stream);
}
void *
diff --git a/libgomp/target.c b/libgomp/target.c
index 60f9b43..96ece0b 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -996,13 +996,13 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
cbuf.chunk_cnt = -1;
cbuf.use_cnt = 0;
cbuf.buf = NULL;
- if (mapnum > 1 || pragma_kind == GOMP_MAP_VARS_TARGET)
+ if (mapnum > 1 || (pragma_kind & GOMP_MAP_VARS_TARGET))
{
size_t chunks_size = (mapnum + 1) * sizeof (struct gomp_coalesce_chunk);
cbuf.chunks = (struct gomp_coalesce_chunk *) gomp_alloca (chunks_size);
cbuf.chunk_cnt = 0;
}
- if (pragma_kind == GOMP_MAP_VARS_TARGET)
+ if (pragma_kind & GOMP_MAP_VARS_TARGET)
{
size_t align = 4 * sizeof (void *);
tgt_align = align;
@@ -1346,7 +1346,7 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
tgt->tgt_start = (uintptr_t) tgt->to_free;
tgt->tgt_end = tgt->tgt_start + sizes[0];
}
- else if (not_found_cnt || pragma_kind == GOMP_MAP_VARS_TARGET)
+ else if (not_found_cnt || (pragma_kind & GOMP_MAP_VARS_TARGET))
{
/* Allocate tgt_align aligned tgt_size block of memory. */
/* FIXME: Perhaps change interface to allocate properly aligned
@@ -1384,7 +1384,7 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
}
tgt_size = 0;
- if (pragma_kind == GOMP_MAP_VARS_TARGET)
+ if (pragma_kind & GOMP_MAP_VARS_TARGET)
tgt_size = mapnum * sizeof (void *);
tgt->array = NULL;
@@ -1944,7 +1944,7 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep,
}
}
- if (pragma_kind == GOMP_MAP_VARS_TARGET)
+ if (pragma_kind & GOMP_MAP_VARS_TARGET)
{
for (i = 0; i < mapnum; i++)
{
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c
index 711ead5..27f86d3 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/acc_prof-parallel-1.c
@@ -203,9 +203,7 @@ static void cb_alloc (acc_prof_info *prof_info, acc_event_info *event_info, acc_
# error TODO
#else
assert (state == 4
- || state == 6
- || state == 104
- || state == 106);
+ || state == 104);
STATE_OP (state, ++);
if (state == 5
@@ -217,13 +215,6 @@ static void cb_alloc (acc_prof_info *prof_info, acc_event_info *event_info, acc_
assert (tool_info->nested->event_info.other_event.event_type == acc_ev_enter_data_start);
assert (tool_info->nested->nested == NULL);
}
- else if (state == 7
- || state == 107)
- {
- assert (tool_info != NULL);
- assert (tool_info->event_info.other_event.event_type == acc_ev_compute_construct_start);
- assert (tool_info->nested == NULL);
- }
else
abort ();
#endif
@@ -268,20 +259,13 @@ static void cb_free (acc_prof_info *prof_info, acc_event_info *event_info, acc_a
#if DEVICE_INIT_INSIDE_COMPUTE_CONSTRUCT
# error TODO
#else
- assert (state == 9
- || state == 11);
+ assert (state == 9);
STATE_OP (state, ++);
if (state == 10)
{
assert (tool_info != NULL);
assert (tool_info->event_info.other_event.event_type == acc_ev_compute_construct_start);
- assert (tool_info->nested == NULL);
- }
- else if (state == 12)
- {
- assert (tool_info != NULL);
- assert (tool_info->event_info.other_event.event_type == acc_ev_compute_construct_start);
assert (tool_info->nested != NULL);
assert (tool_info->nested->event_info.other_event.event_type == acc_ev_exit_data_start);
assert (tool_info->nested->nested == NULL);
@@ -449,19 +433,9 @@ static void cb_exit_data_start (acc_prof_info *prof_info, acc_event_info *event_
{
DEBUG_printf ("%s\n", __FUNCTION__);
+ assert (state == 8
#if ASYNC_EXIT_DATA
- if (acc_async != acc_async_sync)
- {
- /* Compensate for the deferred 'acc_ev_free'. */
- state += 1;
- }
-#else
-# error TODO
-#endif
-
- assert (state == 10
-#if ASYNC_EXIT_DATA
- || state == 110
+ || state == 108
#endif
);
STATE_OP (state, ++);
@@ -525,9 +499,9 @@ static void cb_exit_data_end (acc_prof_info *prof_info, acc_event_info *event_in
{
DEBUG_printf ("%s\n", __FUNCTION__);
- assert (state == 12
+ assert (state == 10
#if ASYNC_EXIT_DATA
- || state == 112
+ || state == 110
#endif
);
STATE_OP (state, ++);
@@ -654,13 +628,9 @@ static void cb_compute_construct_end (acc_prof_info *prof_info, acc_event_info *
{
/* Compensate for the missing 'acc_ev_enter_data_end'. */
state += 1;
- /* Compensate for the missing 'acc_ev_alloc'. */
- state += 1;
/* Compensate for the missing 'acc_ev_enqueue_launch_start' and
'acc_ev_enqueue_launch_end'. */
state += 2;
- /* Compensate for the missing 'acc_ev_free'. */
- state += 1;
/* Compensate for the missing 'acc_ev_exit_data_start'. */
state += 1;
/* Compensate for the missing 'acc_ev_free'. */
@@ -676,8 +646,8 @@ static void cb_compute_construct_end (acc_prof_info *prof_info, acc_event_info *
state += 2;
}
#endif
- assert (state == 13
- || state == 113);
+ assert (state == 11
+ || state == 111);
STATE_OP (state, ++);
assert (tool_info != NULL);
@@ -731,8 +701,8 @@ static void cb_enqueue_launch_start (acc_prof_info *prof_info, acc_event_info *e
assert (acc_device_type != acc_device_host);
- assert (state == 7
- || state == 107);
+ assert (state == 6
+ || state == 106);
STATE_OP (state, ++);
assert (tool_info != NULL);
@@ -800,8 +770,8 @@ static void cb_enqueue_launch_end (acc_prof_info *prof_info, acc_event_info *eve
assert (acc_device_type != acc_device_host);
- assert (state == 8
- || state == 108);
+ assert (state == 7
+ || state == 107);
STATE_OP (state, ++);
assert (tool_info != NULL);
@@ -889,7 +859,7 @@ int main()
}
assert (state_init == 5);
}
- assert (state == 14);
+ assert (state == 12);
STATE_OP (state, = 100);
@@ -906,7 +876,7 @@ int main()
#pragma acc wait
assert (state_init == 105);
}
- assert (state == 114);
+ assert (state == 112);
return 0;
}