aboutsummaryrefslogtreecommitdiff
path: root/libgomp/plugin
diff options
context:
space:
mode:
Diffstat (limited to 'libgomp/plugin')
-rw-r--r--libgomp/plugin/plugin-gcn.c17
-rw-r--r--libgomp/plugin/plugin-nvptx.c43
2 files changed, 55 insertions, 5 deletions
diff --git a/libgomp/plugin/plugin-gcn.c b/libgomp/plugin/plugin-gcn.c
index b39a94b..f823b27 100644
--- a/libgomp/plugin/plugin-gcn.c
+++ b/libgomp/plugin/plugin-gcn.c
@@ -5081,7 +5081,8 @@ GOMP_OFFLOAD_openacc_async_queue_callback (struct goacc_asyncqueue *aq,
queue_push_callback (aq, fn, data);
}
-/* Queue up an asynchronous data copy from host to DEVICE. */
+/* Queue up an asynchronous data copy from host to DEVICE.
+ (Also handles dev2host and dev2dev.) */
bool
GOMP_OFFLOAD_openacc_async_host2dev (int device, void *dst, const void *src,
@@ -5099,10 +5100,16 @@ bool
GOMP_OFFLOAD_openacc_async_dev2host (int device, void *dst, const void *src,
size_t n, struct goacc_asyncqueue *aq)
{
- struct agent_info *agent = get_agent_info (device);
- assert (agent == aq->agent);
- queue_push_copy (aq, dst, src, n);
- return true;
+ return GOMP_OFFLOAD_openacc_async_host2dev (device, dst, src, n, aq);
+}
+
+/* Queue up an asynchronous data copy from DEVICE to DEVICE. */
+
+bool
+GOMP_OFFLOAD_openacc_async_dev2dev (int device, void *dst, const void *src,
+ size_t n, struct goacc_asyncqueue *aq)
+{
+ return GOMP_OFFLOAD_openacc_async_host2dev (device, dst, src, n, aq);
}
union goacc_property_value
diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c
index a6c8198..712c8b7 100644
--- a/libgomp/plugin/plugin-nvptx.c
+++ b/libgomp/plugin/plugin-nvptx.c
@@ -2060,6 +2060,34 @@ GOMP_OFFLOAD_openacc_async_queue_callback (struct goacc_asyncqueue *aq,
}
static bool
+cuda_memcpy_dev_sanity_check (const void *d1, const void *d2, size_t s)
+{
+ CUdeviceptr pb1, pb2;
+ size_t ps1, ps2;
+ if (!s)
+ return true;
+ if (!d1 || !d2)
+ {
+ GOMP_PLUGIN_error ("invalid device address");
+ return false;
+ }
+ CUDA_CALL (cuMemGetAddressRange, &pb1, &ps1, (CUdeviceptr) d1);
+ CUDA_CALL (cuMemGetAddressRange, &pb2, &ps2, (CUdeviceptr) d2);
+ if (!pb1 || !pb2)
+ {
+ GOMP_PLUGIN_error ("invalid device address");
+ return false;
+ }
+ if ((void *)(d1 + s) > (void *)(pb1 + ps1)
+ || (void *)(d2 + s) > (void *)(pb2 + ps2))
+ {
+ GOMP_PLUGIN_error ("invalid size");
+ return false;
+ }
+ return true;
+}
+
+static bool
cuda_memcpy_sanity_check (const void *h, const void *d, size_t s)
{
CUdeviceptr pb;
@@ -2118,6 +2146,9 @@ GOMP_OFFLOAD_dev2host (int ord, void *dst, const void *src, size_t n)
bool
GOMP_OFFLOAD_dev2dev (int ord, void *dst, const void *src, size_t n)
{
+ if (!nvptx_attach_host_thread_to_device (ord)
+ || !cuda_memcpy_dev_sanity_check (dst, src, n))
+ return false;
CUDA_CALL (cuMemcpyDtoDAsync, (CUdeviceptr) dst, (CUdeviceptr) src, n, NULL);
return true;
}
@@ -2329,6 +2360,18 @@ GOMP_OFFLOAD_openacc_async_dev2host (int ord, void *dst, const void *src,
return true;
}
+bool
+GOMP_OFFLOAD_openacc_async_dev2dev (int ord, void *dst, const void *src,
+ size_t n, struct goacc_asyncqueue *aq)
+{
+ if (!nvptx_attach_host_thread_to_device (ord)
+ || !cuda_memcpy_dev_sanity_check (dst, src, n))
+ return false;
+ CUDA_CALL (cuMemcpyDtoDAsync, (CUdeviceptr) dst, (CUdeviceptr) src, n,
+ aq->cuda_stream);
+ return true;
+}
+
union goacc_property_value
GOMP_OFFLOAD_openacc_get_property (int n, enum goacc_property prop)
{