From 8b9e559fe7ca5715c74115322af99dbf9137a399 Mon Sep 17 00:00:00 2001 From: Tobias Burnus Date: Sat, 29 Jul 2023 13:25:03 +0200 Subject: libgomp: cuda.h and omp_target_memcpy_rect cleanup Fixes for commit r14-2792-g25072a477a56a727b369bf9b20f4d18198ff5894 "OpenMP: Call cuMemcpy2D/cuMemcpy3D for nvptx for omp_target_memcpy_rect", namely: In that commit, the code was changed to handle shared-memory devices; however, as pointed out, omp_target_memcpy_check already set the pointer to NULL in that case. Hence, this commit reverts to the prior version. In cuda.h, it adds cuMemcpyPeer{,Async} for symmetry for cuMemcpy3DPeer (all currently unused) and in three structs, fixes reserved-member names and remove a bogus 'const' in three structs. And it changes a DLSYM to DLSYM_OPT as not all plugins support the new functions, yet. include/ChangeLog: * cuda/cuda.h (CUDA_MEMCPY2D, CUDA_MEMCPY3D, CUDA_MEMCPY3D_PEER): Remove bogus 'const' from 'const void *dst' and fix reserved-name name in those structs. (cuMemcpyPeer, cuMemcpyPeerAsync): Add. libgomp/ChangeLog: * target.c (omp_target_memcpy_rect_worker): Undo dim=1 change for GOMP_OFFLOAD_CAP_SHARED_MEM. (omp_target_memcpy_rect_copy): Likewise for lock condition. (gomp_load_plugin_for_device): Use DLSYM_OPT not DLSYM for memcpy3d/memcpy2d. * plugin/plugin-nvptx.c (GOMP_OFFLOAD_memcpy2d, GOMP_OFFLOAD_memcpy3d): Use memset 0 to nullify reserved and unused src/dst fields for that mem type; remove '{src,dst}LOD = 0'. --- include/cuda/cuda.h | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/cuda/cuda.h b/include/cuda/cuda.h index 09c3c2b..94fc64a 100644 --- a/include/cuda/cuda.h +++ b/include/cuda/cuda.h @@ -147,7 +147,7 @@ typedef struct { size_t dstXInBytes, dstY; CUmemorytype dstMemoryType; - const void *dstHost; + void *dstHost; CUdeviceptr dstDevice; CUarray dstArray; size_t dstPitch; @@ -162,16 +162,16 @@ typedef struct { const void *srcHost; CUdeviceptr srcDevice; CUarray srcArray; - void *dummy; + void *reserved0; size_t srcPitch, srcHeight; size_t dstXInBytes, dstY, dstZ; size_t dstLOD; CUmemorytype dstMemoryType; - const void *dstHost; + void *dstHost; CUdeviceptr dstDevice; CUarray dstArray; - void *dummy2; + void *reserved1; size_t dstPitch, dstHeight; size_t WidthInBytes, Height, Depth; @@ -190,7 +190,7 @@ typedef struct { size_t dstXInBytes, dstY, dstZ; size_t dstLOD; CUmemorytype dstMemoryType; - const void *dstHost; + void *dstHost; CUdeviceptr dstDevice; CUarray dstArray; CUcontext dstContext; @@ -246,6 +246,8 @@ CUresult cuMemAlloc (CUdeviceptr *, size_t); CUresult cuMemAllocHost (void **, size_t); CUresult cuMemHostAlloc (void **, size_t, unsigned int); CUresult cuMemcpy (CUdeviceptr, CUdeviceptr, size_t); +CUresult cuMemcpyPeer (CUdeviceptr, CUcontext, CUdeviceptr, CUcontext, size_t); +CUresult cuMemcpyPeerAsync (CUdeviceptr, CUcontext, CUdeviceptr, CUcontext, size_t, CUstream); #define cuMemcpyDtoDAsync cuMemcpyDtoDAsync_v2 CUresult cuMemcpyDtoDAsync (CUdeviceptr, CUdeviceptr, size_t, CUstream); #define cuMemcpyDtoH cuMemcpyDtoH_v2 -- cgit v1.1