From 8b9e559fe7ca5715c74115322af99dbf9137a399 Mon Sep 17 00:00:00 2001 From: Tobias Burnus Date: Sat, 29 Jul 2023 13:25:03 +0200 Subject: libgomp: cuda.h and omp_target_memcpy_rect cleanup Fixes for commit r14-2792-g25072a477a56a727b369bf9b20f4d18198ff5894 "OpenMP: Call cuMemcpy2D/cuMemcpy3D for nvptx for omp_target_memcpy_rect", namely: In that commit, the code was changed to handle shared-memory devices; however, as pointed out, omp_target_memcpy_check already set the pointer to NULL in that case. Hence, this commit reverts to the prior version. In cuda.h, it adds cuMemcpyPeer{,Async} for symmetry for cuMemcpy3DPeer (all currently unused) and in three structs, fixes reserved-member names and remove a bogus 'const' in three structs. And it changes a DLSYM to DLSYM_OPT as not all plugins support the new functions, yet. include/ChangeLog: * cuda/cuda.h (CUDA_MEMCPY2D, CUDA_MEMCPY3D, CUDA_MEMCPY3D_PEER): Remove bogus 'const' from 'const void *dst' and fix reserved-name name in those structs. (cuMemcpyPeer, cuMemcpyPeerAsync): Add. libgomp/ChangeLog: * target.c (omp_target_memcpy_rect_worker): Undo dim=1 change for GOMP_OFFLOAD_CAP_SHARED_MEM. (omp_target_memcpy_rect_copy): Likewise for lock condition. (gomp_load_plugin_for_device): Use DLSYM_OPT not DLSYM for memcpy3d/memcpy2d. * plugin/plugin-nvptx.c (GOMP_OFFLOAD_memcpy2d, GOMP_OFFLOAD_memcpy3d): Use memset 0 to nullify reserved and unused src/dst fields for that mem type; remove '{src,dst}LOD = 0'. --- libgomp/target.c | 52 +++++++++++++++++----------------------------------- 1 file changed, 17 insertions(+), 35 deletions(-) (limited to 'libgomp/target.c') diff --git a/libgomp/target.c b/libgomp/target.c index 5cf2e8d..cd4cc1b 100644 --- a/libgomp/target.c +++ b/libgomp/target.c @@ -4540,33 +4540,22 @@ omp_target_memcpy_rect_worker (void *dst, const void *src, size_t element_size, || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off) || __builtin_mul_overflow (element_size, src_offsets[0], &src_off)) return EINVAL; - if (src_devicep != NULL && src_devicep == dst_devicep) - ret = src_devicep->dev2dev_func (src_devicep->target_id, - (char *) dst + dst_off, - (const char *) src + src_off, - length); - else if (src_devicep != NULL - && (dst_devicep == NULL - || (dst_devicep->capabilities - & GOMP_OFFLOAD_CAP_SHARED_MEM))) - ret = src_devicep->dev2host_func (src_devicep->target_id, + if (dst_devicep == NULL && src_devicep == NULL) + { + memcpy ((char *) dst + dst_off, (const char *) src + src_off, + length); + ret = 1; + } + else if (src_devicep == NULL) + ret = dst_devicep->host2dev_func (dst_devicep->target_id, (char *) dst + dst_off, (const char *) src + src_off, length); - else if (dst_devicep != NULL - && (src_devicep == NULL - || (src_devicep->capabilities - & GOMP_OFFLOAD_CAP_SHARED_MEM))) - ret = dst_devicep->host2dev_func (dst_devicep->target_id, + else if (dst_devicep == NULL) + ret = src_devicep->dev2host_func (src_devicep->target_id, (char *) dst + dst_off, (const char *) src + src_off, length); - else if (dst_devicep == NULL && src_devicep == NULL) - { - memcpy ((char *) dst + dst_off, (const char *) src + src_off, - length); - ret = 1; - } else if (src_devicep == dst_devicep) ret = src_devicep->dev2dev_func (src_devicep->target_id, (char *) dst + dst_off, @@ -4584,7 +4573,8 @@ omp_target_memcpy_rect_worker (void *dst, const void *src, size_t element_size, else if (*tmp_size < length) { *tmp_size = length; - *tmp = realloc (*tmp, length); + free (*tmp); + *tmp = malloc (length); if (*tmp == NULL) return ENOMEM; } @@ -4599,7 +4589,7 @@ omp_target_memcpy_rect_worker (void *dst, const void *src, size_t element_size, return ret ? 0 : EINVAL; } - /* host->device, device->host and same-device device->device. */ + /* host->device, device->host and intra device. */ if (num_dims == 2 && ((src_devicep && src_devicep == dst_devicep @@ -4711,16 +4701,8 @@ omp_target_memcpy_rect_copy (void *dst, const void *src, bool lock_src; bool lock_dst; - lock_src = (src_devicep - && (!dst_devicep - || src_devicep == dst_devicep - || !(src_devicep->capabilities - & GOMP_OFFLOAD_CAP_SHARED_MEM))); - lock_dst = (dst_devicep - && (!lock_src - || (src_devicep != dst_devicep - && !(dst_devicep->capabilities - & GOMP_OFFLOAD_CAP_SHARED_MEM)))); + lock_src = src_devicep != NULL; + lock_dst = dst_devicep != NULL && src_devicep != dst_devicep; if (lock_src) gomp_mutex_lock (&src_devicep->lock); if (lock_dst) @@ -5076,8 +5058,8 @@ gomp_load_plugin_for_device (struct gomp_device_descr *device, DLSYM (free); DLSYM (dev2host); DLSYM (host2dev); - DLSYM (memcpy2d); - DLSYM (memcpy3d); + DLSYM_OPT (memcpy2d, memcpy2d); + DLSYM_OPT (memcpy3d, memcpy3d); device->capabilities = device->get_caps_func (); if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) { -- cgit v1.1