diff options
Diffstat (limited to 'libgomp/plugin/plugin-nvptx.c')
-rw-r--r-- | libgomp/plugin/plugin-nvptx.c | 70 |
1 files changed, 68 insertions, 2 deletions
diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c index 822c6a4..0ba445e 100644 --- a/libgomp/plugin/plugin-nvptx.c +++ b/libgomp/plugin/plugin-nvptx.c @@ -2019,6 +2019,34 @@ GOMP_OFFLOAD_openacc_async_queue_callback (struct goacc_asyncqueue *aq, } static bool +cuda_memcpy_dev_sanity_check (const void *d1, const void *d2, size_t s) +{ + CUdeviceptr pb1, pb2; + size_t ps1, ps2; + if (!s) + return true; + if (!d1 || !d2) + { + GOMP_PLUGIN_error ("invalid device address"); + return false; + } + CUDA_CALL (cuMemGetAddressRange, &pb1, &ps1, (CUdeviceptr) d1); + CUDA_CALL (cuMemGetAddressRange, &pb2, &ps2, (CUdeviceptr) d2); + if (!pb1 || !pb2) + { + GOMP_PLUGIN_error ("invalid device address"); + return false; + } + if ((void *)(d1 + s) > (void *)(pb1 + ps1) + || (void *)(d2 + s) > (void *)(pb2 + ps2)) + { + GOMP_PLUGIN_error ("invalid size"); + return false; + } + return true; +} + +static bool cuda_memcpy_sanity_check (const void *h, const void *d, size_t s) { CUdeviceptr pb; @@ -2077,6 +2105,9 @@ GOMP_OFFLOAD_dev2host (int ord, void *dst, const void *src, size_t n) bool GOMP_OFFLOAD_dev2dev (int ord, void *dst, const void *src, size_t n) { + if (!nvptx_attach_host_thread_to_device (ord) + || !cuda_memcpy_dev_sanity_check (dst, src, n)) + return false; CUDA_CALL (cuMemcpyDtoDAsync, (CUdeviceptr) dst, (CUdeviceptr) src, n, NULL); return true; } @@ -2267,6 +2298,15 @@ GOMP_OFFLOAD_memcpy3d (int dst_ord, int src_ord, size_t dim2_size, } bool +GOMP_OFFLOAD_memset (int ord, void *ptr, int val, size_t count) +{ + if (!nvptx_attach_host_thread_to_device (ord)) + return false; + CUDA_CALL (cuMemsetD8, (CUdeviceptr) ptr, (unsigned char) val, count); + return true; +} + +bool GOMP_OFFLOAD_openacc_async_host2dev (int ord, void *dst, const void *src, size_t n, struct goacc_asyncqueue *aq) { @@ -2288,6 +2328,18 @@ GOMP_OFFLOAD_openacc_async_dev2host (int ord, void *dst, const void *src, return true; } +bool +GOMP_OFFLOAD_openacc_async_dev2dev (int ord, void *dst, const void *src, + size_t n, struct goacc_asyncqueue *aq) +{ + if (!nvptx_attach_host_thread_to_device (ord) + || !cuda_memcpy_dev_sanity_check (dst, src, n)) + return false; + CUDA_CALL (cuMemcpyDtoDAsync, (CUdeviceptr) dst, (CUdeviceptr) src, n, + aq->cuda_stream); + return true; +} + union goacc_property_value GOMP_OFFLOAD_openacc_get_property (int n, enum goacc_property prop) { @@ -2483,12 +2535,26 @@ GOMP_OFFLOAD_interop (struct interop_obj_t *obj, int ord, break; } - obj->device_data = ptx_devices[ord]; + struct ptx_device *ptx_dev = obj->device_data = ptx_devices[ord]; if (targetsync) { CUstream stream = NULL; - CUDA_CALL_ASSERT (cuStreamCreate, &stream, CU_STREAM_DEFAULT); + CUdevice cur_ctx_dev; + CUresult res = CUDA_CALL_NOCHECK (cuCtxGetDevice, &cur_ctx_dev); + if (res != CUDA_SUCCESS && res != CUDA_ERROR_INVALID_CONTEXT) + GOMP_PLUGIN_fatal ("cuCtxGetDevice error: %s", cuda_error (res)); + if (res != CUDA_ERROR_INVALID_CONTEXT && ptx_dev->dev == cur_ctx_dev) + CUDA_CALL_ASSERT (cuStreamCreate, &stream, CU_STREAM_DEFAULT); + else + { + CUcontext old_ctx; + assert (ptx_dev->ctx); + CUDA_CALL_ASSERT (cuCtxPushCurrent, ptx_dev->ctx); + CUDA_CALL_ASSERT (cuStreamCreate, &stream, CU_STREAM_DEFAULT); + if (res != CUDA_ERROR_INVALID_CONTEXT) + CUDA_CALL_ASSERT (cuCtxPopCurrent, &old_ctx); + } obj->stream = stream; } } |