aboutsummaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorTobias Burnus <tobias@codesourcery.com>2023-07-29 13:25:03 +0200
committerTobias Burnus <tobias@codesourcery.com>2023-07-29 13:25:03 +0200
commit8b9e559fe7ca5715c74115322af99dbf9137a399 (patch)
tree1e9c2a5af7f731371133e14689a94c3af9812513 /include
parent5ffa9d0a5e22f6f763b7f04877a940689e7abcba (diff)
downloadgcc-8b9e559fe7ca5715c74115322af99dbf9137a399.zip
gcc-8b9e559fe7ca5715c74115322af99dbf9137a399.tar.gz
gcc-8b9e559fe7ca5715c74115322af99dbf9137a399.tar.bz2
libgomp: cuda.h and omp_target_memcpy_rect cleanup
Fixes for commit r14-2792-g25072a477a56a727b369bf9b20f4d18198ff5894 "OpenMP: Call cuMemcpy2D/cuMemcpy3D for nvptx for omp_target_memcpy_rect", namely: In that commit, the code was changed to handle shared-memory devices; however, as pointed out, omp_target_memcpy_check already set the pointer to NULL in that case. Hence, this commit reverts to the prior version. In cuda.h, it adds cuMemcpyPeer{,Async} for symmetry for cuMemcpy3DPeer (all currently unused) and in three structs, fixes reserved-member names and remove a bogus 'const' in three structs. And it changes a DLSYM to DLSYM_OPT as not all plugins support the new functions, yet. include/ChangeLog: * cuda/cuda.h (CUDA_MEMCPY2D, CUDA_MEMCPY3D, CUDA_MEMCPY3D_PEER): Remove bogus 'const' from 'const void *dst' and fix reserved-name name in those structs. (cuMemcpyPeer, cuMemcpyPeerAsync): Add. libgomp/ChangeLog: * target.c (omp_target_memcpy_rect_worker): Undo dim=1 change for GOMP_OFFLOAD_CAP_SHARED_MEM. (omp_target_memcpy_rect_copy): Likewise for lock condition. (gomp_load_plugin_for_device): Use DLSYM_OPT not DLSYM for memcpy3d/memcpy2d. * plugin/plugin-nvptx.c (GOMP_OFFLOAD_memcpy2d, GOMP_OFFLOAD_memcpy3d): Use memset 0 to nullify reserved and unused src/dst fields for that mem type; remove '{src,dst}LOD = 0'.
Diffstat (limited to 'include')
-rw-r--r--include/cuda/cuda.h12
1 files changed, 7 insertions, 5 deletions
diff --git a/include/cuda/cuda.h b/include/cuda/cuda.h
index 09c3c2b..94fc64a 100644
--- a/include/cuda/cuda.h
+++ b/include/cuda/cuda.h
@@ -147,7 +147,7 @@ typedef struct {
size_t dstXInBytes, dstY;
CUmemorytype dstMemoryType;
- const void *dstHost;
+ void *dstHost;
CUdeviceptr dstDevice;
CUarray dstArray;
size_t dstPitch;
@@ -162,16 +162,16 @@ typedef struct {
const void *srcHost;
CUdeviceptr srcDevice;
CUarray srcArray;
- void *dummy;
+ void *reserved0;
size_t srcPitch, srcHeight;
size_t dstXInBytes, dstY, dstZ;
size_t dstLOD;
CUmemorytype dstMemoryType;
- const void *dstHost;
+ void *dstHost;
CUdeviceptr dstDevice;
CUarray dstArray;
- void *dummy2;
+ void *reserved1;
size_t dstPitch, dstHeight;
size_t WidthInBytes, Height, Depth;
@@ -190,7 +190,7 @@ typedef struct {
size_t dstXInBytes, dstY, dstZ;
size_t dstLOD;
CUmemorytype dstMemoryType;
- const void *dstHost;
+ void *dstHost;
CUdeviceptr dstDevice;
CUarray dstArray;
CUcontext dstContext;
@@ -246,6 +246,8 @@ CUresult cuMemAlloc (CUdeviceptr *, size_t);
CUresult cuMemAllocHost (void **, size_t);
CUresult cuMemHostAlloc (void **, size_t, unsigned int);
CUresult cuMemcpy (CUdeviceptr, CUdeviceptr, size_t);
+CUresult cuMemcpyPeer (CUdeviceptr, CUcontext, CUdeviceptr, CUcontext, size_t);
+CUresult cuMemcpyPeerAsync (CUdeviceptr, CUcontext, CUdeviceptr, CUcontext, size_t, CUstream);
#define cuMemcpyDtoDAsync cuMemcpyDtoDAsync_v2
CUresult cuMemcpyDtoDAsync (CUdeviceptr, CUdeviceptr, size_t, CUstream);
#define cuMemcpyDtoH cuMemcpyDtoH_v2