diff options
Diffstat (limited to 'libgomp/plugin')
-rw-r--r-- | libgomp/plugin/plugin-gcn.c | 17 | ||||
-rw-r--r-- | libgomp/plugin/plugin-nvptx.c | 43 |
2 files changed, 55 insertions, 5 deletions
diff --git a/libgomp/plugin/plugin-gcn.c b/libgomp/plugin/plugin-gcn.c index b39a94b..f823b27 100644 --- a/libgomp/plugin/plugin-gcn.c +++ b/libgomp/plugin/plugin-gcn.c @@ -5081,7 +5081,8 @@ GOMP_OFFLOAD_openacc_async_queue_callback (struct goacc_asyncqueue *aq, queue_push_callback (aq, fn, data); } -/* Queue up an asynchronous data copy from host to DEVICE. */ +/* Queue up an asynchronous data copy from host to DEVICE. + (Also handles dev2host and dev2dev.) */ bool GOMP_OFFLOAD_openacc_async_host2dev (int device, void *dst, const void *src, @@ -5099,10 +5100,16 @@ bool GOMP_OFFLOAD_openacc_async_dev2host (int device, void *dst, const void *src, size_t n, struct goacc_asyncqueue *aq) { - struct agent_info *agent = get_agent_info (device); - assert (agent == aq->agent); - queue_push_copy (aq, dst, src, n); - return true; + return GOMP_OFFLOAD_openacc_async_host2dev (device, dst, src, n, aq); +} + +/* Queue up an asynchronous data copy from DEVICE to DEVICE. */ + +bool +GOMP_OFFLOAD_openacc_async_dev2dev (int device, void *dst, const void *src, + size_t n, struct goacc_asyncqueue *aq) +{ + return GOMP_OFFLOAD_openacc_async_host2dev (device, dst, src, n, aq); } union goacc_property_value diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c index a6c8198..712c8b7 100644 --- a/libgomp/plugin/plugin-nvptx.c +++ b/libgomp/plugin/plugin-nvptx.c @@ -2060,6 +2060,34 @@ GOMP_OFFLOAD_openacc_async_queue_callback (struct goacc_asyncqueue *aq, } static bool +cuda_memcpy_dev_sanity_check (const void *d1, const void *d2, size_t s) +{ + CUdeviceptr pb1, pb2; + size_t ps1, ps2; + if (!s) + return true; + if (!d1 || !d2) + { + GOMP_PLUGIN_error ("invalid device address"); + return false; + } + CUDA_CALL (cuMemGetAddressRange, &pb1, &ps1, (CUdeviceptr) d1); + CUDA_CALL (cuMemGetAddressRange, &pb2, &ps2, (CUdeviceptr) d2); + if (!pb1 || !pb2) + { + GOMP_PLUGIN_error ("invalid device address"); + return false; + } + if ((void *)(d1 + s) > (void *)(pb1 + ps1) + || (void *)(d2 + s) > (void *)(pb2 + ps2)) + { + GOMP_PLUGIN_error ("invalid size"); + return false; + } + return true; +} + +static bool cuda_memcpy_sanity_check (const void *h, const void *d, size_t s) { CUdeviceptr pb; @@ -2118,6 +2146,9 @@ GOMP_OFFLOAD_dev2host (int ord, void *dst, const void *src, size_t n) bool GOMP_OFFLOAD_dev2dev (int ord, void *dst, const void *src, size_t n) { + if (!nvptx_attach_host_thread_to_device (ord) + || !cuda_memcpy_dev_sanity_check (dst, src, n)) + return false; CUDA_CALL (cuMemcpyDtoDAsync, (CUdeviceptr) dst, (CUdeviceptr) src, n, NULL); return true; } @@ -2329,6 +2360,18 @@ GOMP_OFFLOAD_openacc_async_dev2host (int ord, void *dst, const void *src, return true; } +bool +GOMP_OFFLOAD_openacc_async_dev2dev (int ord, void *dst, const void *src, + size_t n, struct goacc_asyncqueue *aq) +{ + if (!nvptx_attach_host_thread_to_device (ord) + || !cuda_memcpy_dev_sanity_check (dst, src, n)) + return false; + CUDA_CALL (cuMemcpyDtoDAsync, (CUdeviceptr) dst, (CUdeviceptr) src, n, + aq->cuda_stream); + return true; +} + union goacc_property_value GOMP_OFFLOAD_openacc_get_property (int n, enum goacc_property prop) { |