diff options
author | Julian Brown <julian@codesourcery.com> | 2019-09-11 13:22:03 -0700 |
---|---|---|
committer | Thomas Schwinge <thomas@codesourcery.com> | 2020-03-03 12:51:25 +0100 |
commit | c03050895c360ad34a2c13a2cefe1a0c7e12b6b9 (patch) | |
tree | 8cdb6f3f2f9eeaf7b023218485f99f9983d51fc8 | |
parent | 3e9e8cccbbbf64fa929d06e5629680fb2bb41b9a (diff) | |
download | gcc-c03050895c360ad34a2c13a2cefe1a0c7e12b6b9.zip gcc-c03050895c360ad34a2c13a2cefe1a0c7e12b6b9.tar.gz gcc-c03050895c360ad34a2c13a2cefe1a0c7e12b6b9.tar.bz2 |
[og9] Fix OpenACC "ephemeral" asynchronous host-to-device copies
libgomp/
* libgomp-plugin.h (GOMP_OFFLOAD_openacc_async_host2dev): Update
prototype.
* libgomp.h (gomp_copy_host2dev): Update prototype.
* oacc-host.c (host_openacc_async_host2dev): Add ephemeral parameter.
* oacc-mem.c (memcpy_tofrom_device): Update call to gomp_copy_host2dev.
(update_dev_host): Likewise.
* oacc-parallel.c (GOACC_enter_exit_data): Call async versions of
acc_attach/acc_detach/acc_detach_finalize functions.
* plugin/plugin-gcn.c (wait_for_queue_nonfull): Don't lock/unlock
aq->mutex here.
(queue_push_launch): Lock aq->mutex before calling
wait_for_queue_nonfull.
(queue_push_callback): Likewise.
(queue_push_asyncwait): Likewise.
(queue_push_placeholder): Likewise.
(GOMP_OFFLOAD_openacc_async_host2dev): Add ephemeral parameter. Copy
source data to temporary space immediately if true, and pass to
queue_push_copy.
(goacc_device_copy_async): Remove.
(gomp_copy_host2dev): Add ephemeral parameter. Update function comment.
Call async host2dev plugin hook directly.
(gomp_copy_dev2host): Call async dev2host plugin hook directly.
(gomp_map_vars_existing, gomp_map_pointer, gomp_attach_pointer,
gomp_detach_pointer): Update calls to gomp_copy_host2dev.
(gomp_map_vars_internal): Don't use coalescing buffer for asynchronous
copies. Update calls to gomp_copy_host2dev.
(gomp_update): Update calls to gomp_copy_host2dev.
* testsuite/libgomp.oacc-c-c++-common/deep-copy-10.c (main): Fix
async-safety issue. Increase number of iterations.
* testsuite/libgomp.oacc-fortran/lib-16-2.f90: Fix async-safety issue.
(cherry picked from openacc-gcc-9-branch commit
3a25e449d04d5768c3a799264ba0e5cab8ae181f)
-rw-r--r-- | libgomp/ChangeLog.omp | 33 | ||||
-rw-r--r-- | libgomp/libgomp-plugin.h | 3 | ||||
-rw-r--r-- | libgomp/libgomp.h | 2 | ||||
-rw-r--r-- | libgomp/oacc-host.c | 1 | ||||
-rw-r--r-- | libgomp/oacc-mem.c | 4 | ||||
-rw-r--r-- | libgomp/oacc-parallel.c | 10 | ||||
-rw-r--r-- | libgomp/plugin/plugin-gcn.c | 43 | ||||
-rw-r--r-- | libgomp/target.c | 101 | ||||
-rw-r--r-- | libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-10.c | 18 | ||||
-rw-r--r-- | libgomp/testsuite/libgomp.oacc-fortran/lib-16-2.f90 | 5 |
10 files changed, 133 insertions, 87 deletions
diff --git a/libgomp/ChangeLog.omp b/libgomp/ChangeLog.omp index 5f39fae..1006b81 100644 --- a/libgomp/ChangeLog.omp +++ b/libgomp/ChangeLog.omp @@ -1,5 +1,38 @@ 2019-09-17 Julian Brown <julian@codesourcery.com> + * libgomp-plugin.h (GOMP_OFFLOAD_openacc_async_host2dev): Update + prototype. + * libgomp.h (gomp_copy_host2dev): Update prototype. + * oacc-host.c (host_openacc_async_host2dev): Add ephemeral parameter. + * oacc-mem.c (memcpy_tofrom_device): Update call to gomp_copy_host2dev. + (update_dev_host): Likewise. + * oacc-parallel.c (GOACC_enter_exit_data): Call async versions of + acc_attach/acc_detach/acc_detach_finalize functions. + * plugin/plugin-gcn.c (wait_for_queue_nonfull): Don't lock/unlock + aq->mutex here. + (queue_push_launch): Lock aq->mutex before calling + wait_for_queue_nonfull. + (queue_push_callback): Likewise. + (queue_push_asyncwait): Likewise. + (queue_push_placeholder): Likewise. + (GOMP_OFFLOAD_openacc_async_host2dev): Add ephemeral parameter. Copy + source data to temporary space immediately if true, and pass to + queue_push_copy. + (goacc_device_copy_async): Remove. + (gomp_copy_host2dev): Add ephemeral parameter. Update function comment. + Call async host2dev plugin hook directly. + (gomp_copy_dev2host): Call async dev2host plugin hook directly. + (gomp_map_vars_existing, gomp_map_pointer, gomp_attach_pointer, + gomp_detach_pointer): Update calls to gomp_copy_host2dev. + (gomp_map_vars_internal): Don't use coalescing buffer for asynchronous + copies. Update calls to gomp_copy_host2dev. + (gomp_update): Update calls to gomp_copy_host2dev. + * testsuite/libgomp.oacc-c-c++-common/deep-copy-10.c (main): Fix + async-safety issue. Increase number of iterations. + * testsuite/libgomp.oacc-fortran/lib-16-2.f90: Fix async-safety issue. + +2019-09-17 Julian Brown <julian@codesourcery.com> + * oacc-host.c (host_openacc_async_queue_callback): Invoke callback function immediately. * oacc-parallel.c (struct async_prof_callback_info, async_prof_dispatch, diff --git a/libgomp/libgomp-plugin.h b/libgomp/libgomp-plugin.h index bd63c42..fcd4727 100644 --- a/libgomp/libgomp-plugin.h +++ b/libgomp/libgomp-plugin.h @@ -141,7 +141,8 @@ extern void GOMP_OFFLOAD_openacc_async_exec_params (void (*) (void *), size_t, struct goacc_asyncqueue *); extern bool GOMP_OFFLOAD_openacc_async_dev2host (int, void *, const void *, size_t, struct goacc_asyncqueue *); -extern bool GOMP_OFFLOAD_openacc_async_host2dev (int, void *, const void *, size_t, +extern bool GOMP_OFFLOAD_openacc_async_host2dev (int, void *, const void *, + size_t, bool, struct goacc_asyncqueue *); extern void *GOMP_OFFLOAD_openacc_cuda_get_current_device (void); extern void *GOMP_OFFLOAD_openacc_cuda_get_current_context (void); diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h index 803f72d..ab216a3 100644 --- a/libgomp/libgomp.h +++ b/libgomp/libgomp.h @@ -1120,7 +1120,7 @@ extern void gomp_acc_declare_allocate (bool, size_t, void **, size_t *, struct gomp_coalesce_buf; extern void gomp_copy_host2dev (struct gomp_device_descr *, struct goacc_asyncqueue *, void *, const void *, - size_t, struct gomp_coalesce_buf *); + size_t, bool, struct gomp_coalesce_buf *); extern void gomp_copy_dev2host (struct gomp_device_descr *, struct goacc_asyncqueue *, void *, const void *, size_t); diff --git a/libgomp/oacc-host.c b/libgomp/oacc-host.c index 0231b59..4bc2eeb 100644 --- a/libgomp/oacc-host.c +++ b/libgomp/oacc-host.c @@ -230,6 +230,7 @@ host_openacc_async_host2dev (int ord __attribute__ ((unused)), void *dst __attribute__ ((unused)), const void *src __attribute__ ((unused)), size_t n __attribute__ ((unused)), + bool eph __attribute__ ((unused)), struct goacc_asyncqueue *aq __attribute__ ((unused))) { diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c index c07a5eb..f8c71bf 100644 --- a/libgomp/oacc-mem.c +++ b/libgomp/oacc-mem.c @@ -203,7 +203,7 @@ memcpy_tofrom_device (bool from, void *d, void *h, size_t s, int async, if (from) gomp_copy_dev2host (thr->dev, aq, h, d, s); else - gomp_copy_host2dev (thr->dev, aq, d, h, s, /* TODO: cbuf? */ NULL); + gomp_copy_host2dev (thr->dev, aq, d, h, s, false, /* TODO: cbuf? */ NULL); if (profiling_p) { @@ -819,7 +819,7 @@ update_dev_host (int is_dev, void *h, size_t s, int async) goacc_aq aq = get_goacc_asyncqueue (async); if (is_dev) - gomp_copy_host2dev (acc_dev, aq, d, h, s, /* TODO: cbuf? */ NULL); + gomp_copy_host2dev (acc_dev, aq, d, h, s, false, /* TODO: cbuf? */ NULL); else gomp_copy_dev2host (acc_dev, aq, h, d, s); diff --git a/libgomp/oacc-parallel.c b/libgomp/oacc-parallel.c index 0c9cb3c..a3ec0ed 100644 --- a/libgomp/oacc-parallel.c +++ b/libgomp/oacc-parallel.c @@ -1022,7 +1022,7 @@ GOACC_enter_exit_data (int flags_m, size_t mapnum, if (!pointer) { if (kind == GOMP_MAP_ATTACH) - acc_attach (hostaddrs[i]); + acc_attach_async (hostaddrs[i], async); else if (kind == GOMP_MAP_STRUCT) i += sizes[i]; } @@ -1042,9 +1042,9 @@ GOACC_enter_exit_data (int flags_m, size_t mapnum, if (!pointer) { if (kind == GOMP_MAP_DETACH) - acc_detach (hostaddrs[i]); + acc_detach_async (hostaddrs[i], async); else if (kind == GOMP_MAP_FORCE_DETACH) - acc_detach_finalize (hostaddrs[i]); + acc_detach_finalize_async (hostaddrs[i], async); else if (kind == GOMP_MAP_STRUCT) i += sizes[i]; } @@ -1053,9 +1053,9 @@ GOACC_enter_exit_data (int flags_m, size_t mapnum, unsigned char kind2 = kinds[i + pointer - 1] & 0xff; if (kind2 == GOMP_MAP_DETACH) - acc_detach (hostaddrs[i + pointer - 1]); + acc_detach_async (hostaddrs[i + pointer - 1], async); else if (kind2 == GOMP_MAP_FORCE_DETACH) - acc_detach_finalize (hostaddrs[i + pointer - 1]); + acc_detach_finalize_async (hostaddrs[i + pointer - 1], async); i += pointer - 1; } diff --git a/libgomp/plugin/plugin-gcn.c b/libgomp/plugin/plugin-gcn.c index b8ec963..b5995af 100644 --- a/libgomp/plugin/plugin-gcn.c +++ b/libgomp/plugin/plugin-gcn.c @@ -1408,13 +1408,9 @@ wait_for_queue_nonfull (struct goacc_asyncqueue *aq) { if (aq->queue_n == ASYNC_QUEUE_SIZE) { - pthread_mutex_lock (&aq->mutex); - /* Queue is full. Wait for it to not be full. */ while (aq->queue_n == ASYNC_QUEUE_SIZE) pthread_cond_wait (&aq->queue_cond_out, &aq->mutex); - - pthread_mutex_unlock (&aq->mutex); } } @@ -1424,10 +1420,10 @@ queue_push_launch (struct goacc_asyncqueue *aq, struct kernel_info *kernel, { assert (aq->agent == kernel->agent); - wait_for_queue_nonfull (aq); - pthread_mutex_lock (&aq->mutex); + wait_for_queue_nonfull (aq); + int queue_last = ((aq->queue_first + aq->queue_n) % ASYNC_QUEUE_SIZE); if (DEBUG_QUEUES) @@ -1453,10 +1449,10 @@ static void queue_push_callback (struct goacc_asyncqueue *aq, void (*fn)(void *), void *data) { - wait_for_queue_nonfull (aq); - pthread_mutex_lock (&aq->mutex); + wait_for_queue_nonfull (aq); + int queue_last = ((aq->queue_first + aq->queue_n) % ASYNC_QUEUE_SIZE); if (DEBUG_QUEUES) @@ -1484,10 +1480,10 @@ static void queue_push_asyncwait (struct goacc_asyncqueue *aq, struct placeholder *placeholderp) { - wait_for_queue_nonfull (aq); - pthread_mutex_lock (&aq->mutex); + wait_for_queue_nonfull (aq); + int queue_last = ((aq->queue_first + aq->queue_n) % ASYNC_QUEUE_SIZE); if (DEBUG_QUEUES) HSA_DEBUG ("queue_push_asyncwait %d:%d: at %i\n", aq->agent->device_id, @@ -1511,10 +1507,10 @@ queue_push_placeholder (struct goacc_asyncqueue *aq) { struct placeholder *placeholderp; - wait_for_queue_nonfull (aq); - pthread_mutex_lock (&aq->mutex); + wait_for_queue_nonfull (aq); + int queue_last = ((aq->queue_first + aq->queue_n) % ASYNC_QUEUE_SIZE); if (DEBUG_QUEUES) HSA_DEBUG ("queue_push_placeholder %d:%d: at %i\n", aq->agent->device_id, @@ -3683,19 +3679,22 @@ GOMP_OFFLOAD_openacc_async_queue_callback (struct goacc_asyncqueue *aq, bool GOMP_OFFLOAD_openacc_async_host2dev (int device, void *dst, const void *src, - size_t n, struct goacc_asyncqueue *aq) + size_t n, bool ephemeral, + struct goacc_asyncqueue *aq) { struct agent_info *agent = get_agent_info (device); assert (agent == aq->agent); - /* The source data does not necessarily remain live until the deferred - copy happens. Taking a snapshot of the data here avoids reading - uninitialised data later, but means that (a) data is copied twice and - (b) modifications to the copied data between the "spawning" point of - the asynchronous kernel and when it is executed will not be seen. - But, that is probably correct. */ - void *src_copy = GOMP_PLUGIN_malloc (n); - memcpy (src_copy, src, n); - queue_push_copy (aq, dst, src_copy, n, true); + + if (ephemeral) + { + /* The source data is on the stack or otherwise may be deallocated + before the asynchronous copy takes place. Take a copy of the source + data. */ + void *src_copy = GOMP_PLUGIN_malloc (n); + memcpy (src_copy, src, n); + src = src_copy; + } + queue_push_copy (aq, dst, src, n, ephemeral); return true; } diff --git a/libgomp/target.c b/libgomp/target.c index 0656df1..683a42b 100644 --- a/libgomp/target.c +++ b/libgomp/target.c @@ -194,22 +194,6 @@ gomp_device_copy (struct gomp_device_descr *devicep, } } -static inline void -goacc_device_copy_async (struct gomp_device_descr *devicep, - bool (*copy_func) (int, void *, const void *, size_t, - struct goacc_asyncqueue *), - const char *dst, void *dstaddr, - const char *src, const void *srcaddr, - size_t size, struct goacc_asyncqueue *aq) -{ - if (!copy_func (devicep->target_id, dstaddr, srcaddr, size, aq)) - { - gomp_mutex_unlock (&devicep->lock); - gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed", - src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size); - } -} - /* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses) host to device memory transfers. */ @@ -303,15 +287,17 @@ gomp_to_device_kind_p (int kind) } /* Copy host memory to an offload device. In asynchronous mode (if AQ is - non-NULL), H may point to a stack location. It is up to the underlying - plugin to ensure that this data is read immediately, rather than at some - later point when the stack frame will likely have been destroyed. */ + non-NULL), when the source data is stack or may otherwise be deallocated + before the asynchronous copy takes place, EPHEMERAL must be passed as + TRUE. The CBUF isn't used for non-ephemeral asynchronous copies, because + the host data might not be computed yet (by an earlier asynchronous compute + region). */ attribute_hidden void gomp_copy_host2dev (struct gomp_device_descr *devicep, struct goacc_asyncqueue *aq, void *d, const void *h, size_t sz, - struct gomp_coalesce_buf *cbuf) + bool ephemeral, struct gomp_coalesce_buf *cbuf) { if (cbuf) { @@ -339,8 +325,15 @@ gomp_copy_host2dev (struct gomp_device_descr *devicep, } } if (__builtin_expect (aq != NULL, 0)) - goacc_device_copy_async (devicep, devicep->openacc.async.host2dev_func, - "dev", d, "host", h, sz, aq); + { + if (!devicep->openacc.async.host2dev_func (devicep->target_id, d, h, sz, + ephemeral, aq)) + { + gomp_mutex_unlock (&devicep->lock); + gomp_fatal ("Copying of host object [%p..%p) to dev object [%p..%p) " + "failed", h, h + sz, d, d + sz); + } + } else gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz); } @@ -351,8 +344,15 @@ gomp_copy_dev2host (struct gomp_device_descr *devicep, void *h, const void *d, size_t sz) { if (__builtin_expect (aq != NULL, 0)) - goacc_device_copy_async (devicep, devicep->openacc.async.dev2host_func, - "host", h, "dev", d, sz, aq); + { + if (!devicep->openacc.async.dev2host_func (devicep->target_id, h, d, sz, + aq)) + { + gomp_mutex_unlock (&devicep->lock); + gomp_fatal ("Copying of dev object [%p..%p) to host object [%p..%p) " + "failed", d, d + sz, h, h + sz); + } + } else gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz); } @@ -579,7 +579,7 @@ gomp_map_vars_existing (struct gomp_device_descr *devicep, (void *) (oldn->tgt->tgt_start + oldn->tgt_offset + newn->host_start - oldn->host_start), (void *) newn->host_start, - newn->host_end - newn->host_start, cbuf); + newn->host_end - newn->host_start, false, cbuf); if (oldn->refcount != REFCOUNT_INFINITY) oldn->refcount++; @@ -607,8 +607,8 @@ gomp_map_pointer (struct target_mem_desc *tgt, struct goacc_asyncqueue *aq, cur_node.tgt_offset = (uintptr_t) NULL; gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + target_offset), - (void *) &cur_node.tgt_offset, sizeof (void *), - cbuf); + (void *) &cur_node.tgt_offset, + sizeof (void *), true, cbuf); return; } /* Add bias to the pointer value. */ @@ -628,7 +628,8 @@ gomp_map_pointer (struct target_mem_desc *tgt, struct goacc_asyncqueue *aq, to initialize the pointer with. */ cur_node.tgt_offset -= bias; gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + target_offset), - (void *) &cur_node.tgt_offset, sizeof (void *), cbuf); + (void *) &cur_node.tgt_offset, sizeof (void *), true, + cbuf); } static void @@ -760,7 +761,7 @@ gomp_attach_pointer (struct gomp_device_descr *devicep, (void *) (n->tgt->tgt_start + n->tgt_offset), (void *) data); gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &data, - sizeof (void *), cbufp); + sizeof (void *), true, cbufp); } else gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__, @@ -815,7 +816,7 @@ gomp_detach_pointer (struct gomp_device_descr *devicep, (void *) target); gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &target, - sizeof (void *), cbufp); + sizeof (void *), true, cbufp); } else gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__, @@ -1147,8 +1148,9 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, for (i = first; i <= last; i++) { tgt->list[i].key = NULL; - if (gomp_to_device_kind_p (get_kind (short_mapkind, kinds, i) - & typemask)) + if (!aq + && gomp_to_device_kind_p (get_kind (short_mapkind, kinds, + i) & typemask)) gomp_coalesce_buf_add (&cbuf, tgt_size - cur_node.host_end + (uintptr_t) hostaddrs[i], @@ -1209,8 +1211,9 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, if (tgt_align < align) tgt_align = align; tgt_size = (tgt_size + align - 1) & ~(align - 1); - gomp_coalesce_buf_add (&cbuf, tgt_size, - cur_node.host_end - cur_node.host_start); + if (!aq) + gomp_coalesce_buf_add (&cbuf, tgt_size, + cur_node.host_end - cur_node.host_start); tgt_size += cur_node.host_end - cur_node.host_start; has_firstprivate = true; continue; @@ -1240,7 +1243,7 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, if (tgt_align < align) tgt_align = align; tgt_size = (tgt_size + align - 1) & ~(align - 1); - if (gomp_to_device_kind_p (kind & typemask)) + if (!aq && gomp_to_device_kind_p (kind & typemask)) gomp_coalesce_buf_add (&cbuf, tgt_size, cur_node.host_end - cur_node.host_start); tgt_size += cur_node.host_end - cur_node.host_start; @@ -1395,7 +1398,7 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, len = sizes[i]; gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + tgt_size), - (void *) hostaddrs[i], len, cbufp); + (void *) hostaddrs[i], len, false, cbufp); tgt_size += len; continue; case GOMP_MAP_FIRSTPRIVATE_INT: @@ -1448,12 +1451,11 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, if (cur_node.tgt_offset) cur_node.tgt_offset -= sizes[i]; gomp_copy_host2dev (devicep, aq, - (void *) (n->tgt->tgt_start - + n->tgt_offset + (void *) (n->tgt->tgt_start + n->tgt_offset + cur_node.host_start - n->host_start), (void *) &cur_node.tgt_offset, - sizeof (void *), cbufp); + sizeof (void *), true, cbufp); cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset + cur_node.host_start - n->host_start; continue; @@ -1612,7 +1614,8 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, (void *) (tgt->tgt_start + k->tgt_offset), (void *) k->host_start, - k->host_end - k->host_start, cbufp); + k->host_end - k->host_start, false, + cbufp); break; case GOMP_MAP_POINTER: gomp_map_pointer (tgt, aq, @@ -1624,7 +1627,8 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, (void *) (tgt->tgt_start + k->tgt_offset), (void *) k->host_start, - k->host_end - k->host_start, cbufp); + k->host_end - k->host_start, false, + cbufp); for (j = i + 1; j < mapnum; j++) if (!GOMP_MAP_POINTER_P (get_kind (short_mapkind, kinds, @@ -1676,7 +1680,7 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, (void *) (tgt->tgt_start + k->tgt_offset), (void *) k->host_start, - sizeof (void *), cbufp); + sizeof (void *), false, cbufp); break; default: gomp_mutex_unlock (&devicep->lock); @@ -1692,7 +1696,7 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, /* We intentionally do not use coalescing here, as it's not data allocated by the current call to this function. */ gomp_copy_host2dev (devicep, aq, (void *) n->tgt_offset, - &tgt_addr, sizeof (void *), NULL); + &tgt_addr, sizeof (void *), true, NULL); } array++; } @@ -1779,7 +1783,7 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, (void *) tgt->tgt_start + k->tgt_offset, (void *) k->host_start, - da->data_row_size, cbufp); + da->data_row_size, false, cbufp); array++; } target_data_rows[row_start + j] = (void *) target_row_addr; @@ -1793,7 +1797,7 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, void *ptrblock = gomp_dynamic_array_create_ptrblock (da, target_ptrblock, target_data_rows + row_start); gomp_copy_host2dev (devicep, aq, target_ptrblock, ptrblock, - da->ptrblock_size, cbufp); + da->ptrblock_size, true, cbufp); free (ptrblock); } @@ -1817,7 +1821,7 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + i * sizeof (void *)), (void *) &cur_node.tgt_offset, sizeof (void *), - cbufp); + true, cbufp); } } @@ -1829,7 +1833,8 @@ gomp_map_vars_internal (struct gomp_device_descr *devicep, (void *) (tgt->tgt_start + cbuf.chunks[c].start), (char *) cbuf.buf + (cbuf.chunks[c].start - cbuf.chunks[0].start), - cbuf.chunks[c].end - cbuf.chunks[c].start, NULL); + cbuf.chunks[c].end - cbuf.chunks[c].start, true, + NULL); free (cbuf.buf); cbuf.buf = NULL; cbufp = NULL; @@ -2099,7 +2104,7 @@ gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs, if (GOMP_MAP_COPY_TO_P (kind & typemask)) gomp_copy_host2dev (devicep, NULL, devaddr, hostaddr, size, - NULL); + false, NULL); if (GOMP_MAP_COPY_FROM_P (kind & typemask)) gomp_copy_dev2host (devicep, NULL, hostaddr, devaddr, size); } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-10.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-10.c index 37cde4e..2ccb5f5 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-10.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-10.c @@ -1,6 +1,8 @@ #include <stdlib.h> -/* Test asyncronous attach and detach operation. */ +#define ITERATIONS 1023 + +/* Test asynchronous attach and detach operation. */ typedef struct { int *a; @@ -25,13 +27,13 @@ main (int argc, char* argv[]) #pragma acc enter data copyin(m) - for (int i = 0; i < 99; i++) + for (int i = 0; i < ITERATIONS; i++) { int j; -#pragma acc parallel loop copy(m.a[0:N]) async(i % 2) +#pragma acc parallel loop copy(m.a[0:N]) async(0) for (j = 0; j < N; j++) m.a[j]++; -#pragma acc parallel loop copy(m.b[0:N]) async((i + 1) % 2) +#pragma acc parallel loop copy(m.b[0:N]) async(1) for (j = 0; j < N; j++) m.b[j]++; } @@ -40,10 +42,10 @@ main (int argc, char* argv[]) for (i = 0; i < N; i++) { - if (m.a[i] != 99) - abort (); - if (m.b[i] != 99) - abort (); + if (m.a[i] != ITERATIONS) + abort (); + if (m.b[i] != ITERATIONS) + abort (); } free (m.a); diff --git a/libgomp/testsuite/libgomp.oacc-fortran/lib-16-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/lib-16-2.f90 index fa76f65..94b80d0 100644 --- a/libgomp/testsuite/libgomp.oacc-fortran/lib-16-2.f90 +++ b/libgomp/testsuite/libgomp.oacc-fortran/lib-16-2.f90 @@ -27,6 +27,9 @@ program main if (acc_is_present (h) .neqv. .TRUE.) call abort + ! We must wait for the update to be done. + call acc_wait (async) + h(:) = 0 call acc_copyout_async (h, sizeof (h), async) @@ -45,6 +48,8 @@ program main if (acc_is_present (h) .neqv. .TRUE.) call abort + call acc_wait (async) + do i = 1, N if (h(i) /= i + i) call abort end do |