aboutsummaryrefslogtreecommitdiff
path: root/libgomp/libgomp.h
diff options
context:
space:
mode:
authorTobias Burnus <tobias@codesourcery.com>2023-07-26 16:22:35 +0200
committerTobias Burnus <tobias@codesourcery.com>2023-07-26 16:22:35 +0200
commit25072a477a56a727b369bf9b20f4d18198ff5894 (patch)
tree8dc40c0f128509b0e5c78ff32d5102c321bbaa4d /libgomp/libgomp.h
parentc194a413369e9c9f92f1c9334556b359c7417742 (diff)
downloadgcc-25072a477a56a727b369bf9b20f4d18198ff5894.zip
gcc-25072a477a56a727b369bf9b20f4d18198ff5894.tar.gz
gcc-25072a477a56a727b369bf9b20f4d18198ff5894.tar.bz2
OpenMP: Call cuMemcpy2D/cuMemcpy3D for nvptx for omp_target_memcpy_rect
When copying a 2D or 3D rectangular memmory block, the performance is better when using CUDA's cuMemcpy2D/cuMemcpy3D instead of copying the data one by one. That's what this commit does. Additionally, it permits device-to-device copies, if neccessary using a temporary variable on the host. include/ChangeLog: * cuda/cuda.h (CUlimit): Add CUDA_ERROR_NOT_INITIALIZED, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_INVALID_HANDLE. (CUarray, CUmemorytype, CUDA_MEMCPY2D, CUDA_MEMCPY3D, CUDA_MEMCPY3D_PEER): New typdefs. (cuMemcpy2D, cuMemcpy2DAsync, cuMemcpy2DUnaligned, cuMemcpy3D, cuMemcpy3DAsync, cuMemcpy3DPeer, cuMemcpy3DPeerAsync): New prototypes. libgomp/ChangeLog: * libgomp-plugin.h (GOMP_OFFLOAD_memcpy2d, GOMP_OFFLOAD_memcpy3d): New prototypes. * libgomp.h (struct gomp_device_descr): Add memcpy2d_func and memcpy3d_func. * libgomp.texi (nvtpx): Document when cuMemcpy2D/cuMemcpy3D is used. * oacc-host.c (memcpy2d_func, .memcpy3d_func): Init with NULL. * plugin/cuda-lib.def (cuMemcpy2D, cuMemcpy2DUnaligned, cuMemcpy3D): Invoke via CUDA_ONE_CALL. * plugin/plugin-nvptx.c (GOMP_OFFLOAD_memcpy2d, GOMP_OFFLOAD_memcpy3d): New. * target.c (omp_target_memcpy_rect_worker): (omp_target_memcpy_rect_check, omp_target_memcpy_rect_copy): Permit all device-to-device copyies; invoke new plugins for 2D and 3D copying when available. (gomp_load_plugin_for_device): DLSYM the new plugin functions. * testsuite/libgomp.c/target-12.c: Fix dimension bug. * testsuite/libgomp.fortran/target-12.f90: Likewise. * testsuite/libgomp.fortran/target-memcpy-rect-1.f90: New test.
Diffstat (limited to 'libgomp/libgomp.h')
-rw-r--r--libgomp/libgomp.h2
1 files changed, 2 insertions, 0 deletions
diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h
index 4d2bfab..68f2065 100644
--- a/libgomp/libgomp.h
+++ b/libgomp/libgomp.h
@@ -1388,6 +1388,8 @@ struct gomp_device_descr
__typeof (GOMP_OFFLOAD_free) *free_func;
__typeof (GOMP_OFFLOAD_dev2host) *dev2host_func;
__typeof (GOMP_OFFLOAD_host2dev) *host2dev_func;
+ __typeof (GOMP_OFFLOAD_memcpy2d) *memcpy2d_func;
+ __typeof (GOMP_OFFLOAD_memcpy3d) *memcpy3d_func;
__typeof (GOMP_OFFLOAD_dev2dev) *dev2dev_func;
__typeof (GOMP_OFFLOAD_can_run) *can_run_func;
__typeof (GOMP_OFFLOAD_run) *run_func;