diff options
author | Tom de Vries <tdevries@suse.de> | 2019-01-23 08:16:56 +0000 |
---|---|---|
committer | Tom de Vries <vries@gcc.gnu.org> | 2019-01-23 08:16:56 +0000 |
commit | 4a75460b0099618b2d79ffda615a9516dcd5c224 (patch) | |
tree | 2596f989d134272b77a51936fc8e04458455975c /libgomp/plugin | |
parent | 4fef8e4d8c8901db0fa21c4d49b7a851bff4ac9a (diff) | |
download | gcc-4a75460b0099618b2d79ffda615a9516dcd5c224.zip gcc-4a75460b0099618b2d79ffda615a9516dcd5c224.tar.gz gcc-4a75460b0099618b2d79ffda615a9516dcd5c224.tar.bz2 |
[nvptx, libgomp] Fix cuMemAlloc with size zero
Consider test-case:
...
int
main (void)
{
#pragma acc parallel async
;
#pragma acc parallel async
;
#pragma acc wait
return 0;
}
...
This fails with:
...
libgomp: cuMemAlloc error: invalid argument
Segmentation fault (core dumped)
...
The cuMemAlloc error is due to the fact that we're try to allocate 0 bytes.
Fix this by preventing calling map_push with size zero argument in nvptx_exec.
This also has the consequence that for the abort-1.c test-case, we end up
calling cuMemFree during map_fini for the struct cuda_map allocated in
map_init, which fails because an abort happened. Fix this by calling
cuMemFree with CUDA_CALL_NOCHECK in cuda_map_destroy.
2019-01-23 Tom de Vries <tdevries@suse.de>
PR target/PR88946
* plugin/plugin-nvptx.c (cuda_map_destroy): Use CUDA_CALL_NOCHECK for
cuMemFree.
(nvptx_exec): Don't call map_push if mapnum == 0.
* testsuite/libgomp.oacc-c-c++-common/pr88946.c: New test.
From-SVN: r268178
Diffstat (limited to 'libgomp/plugin')
-rw-r--r-- | libgomp/plugin/plugin-nvptx.c | 48 |
1 files changed, 28 insertions, 20 deletions
diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c index 4a67191..ff90b67 100644 --- a/libgomp/plugin/plugin-nvptx.c +++ b/libgomp/plugin/plugin-nvptx.c @@ -260,7 +260,7 @@ cuda_map_destroy (struct cuda_map *map) atexit handler (PR83795). */ ; else - CUDA_CALL_ASSERT (cuMemFree, map->d); + CUDA_CALL_NOCHECK (cuMemFree, map->d); free (map); } @@ -1164,7 +1164,7 @@ nvptx_exec (void (*fn), size_t mapnum, void **hostaddrs, void **devaddrs, struct ptx_stream *dev_str; void *kargs[1]; void *hp; - CUdeviceptr dp; + CUdeviceptr dp = 0; struct nvptx_thread *nvthd = nvptx_thread (); int warp_size = nvthd->ptx_dev->warp_size; const char *maybe_abort_msg = "(perhaps abort was called)"; @@ -1361,23 +1361,27 @@ nvptx_exec (void (*fn), size_t mapnum, void **hostaddrs, void **devaddrs, dims[GOMP_DIM_VECTOR]); } - /* This reserves a chunk of a pre-allocated page of memory mapped on both - the host and the device. HP is a host pointer to the new chunk, and DP is - the corresponding device pointer. */ - pthread_mutex_lock (&ptx_event_lock); - dp = map_push (dev_str, mapnum * sizeof (void *)); - pthread_mutex_unlock (&ptx_event_lock); - - GOMP_PLUGIN_debug (0, " %s: prepare mappings\n", __FUNCTION__); - - /* Copy the array of arguments to the mapped page. */ - hp = alloca(sizeof(void *) * mapnum); - for (i = 0; i < mapnum; i++) - ((void **) hp)[i] = devaddrs[i]; + if (mapnum > 0) + { + /* This reserves a chunk of a pre-allocated page of memory mapped on both + the host and the device. HP is a host pointer to the new chunk, and DP is + the corresponding device pointer. */ + pthread_mutex_lock (&ptx_event_lock); + dp = map_push (dev_str, mapnum * sizeof (void *)); + pthread_mutex_unlock (&ptx_event_lock); + + GOMP_PLUGIN_debug (0, " %s: prepare mappings\n", __FUNCTION__); + + /* Copy the array of arguments to the mapped page. */ + hp = alloca(sizeof(void *) * mapnum); + for (i = 0; i < mapnum; i++) + ((void **) hp)[i] = devaddrs[i]; + + /* Copy the (device) pointers to arguments to the device */ + CUDA_CALL_ASSERT (cuMemcpyHtoD, dp, hp, + mapnum * sizeof (void *)); + } - /* Copy the (device) pointers to arguments to the device */ - CUDA_CALL_ASSERT (cuMemcpyHtoD, dp, hp, - mapnum * sizeof (void *)); GOMP_PLUGIN_debug (0, " %s: kernel %s: launch" " gangs=%u, workers=%u, vectors=%u\n", __FUNCTION__, targ_fn->launch->fn, dims[GOMP_DIM_GANG], @@ -1422,7 +1426,8 @@ nvptx_exec (void (*fn), size_t mapnum, void **hostaddrs, void **devaddrs, CUDA_CALL_ASSERT (cuEventRecord, *e, dev_str->stream); - event_add (PTX_EVT_KNL, e, (void *)dev_str, 0); + if (mapnum > 0) + event_add (PTX_EVT_KNL, e, (void *)dev_str, 0); } #else r = CUDA_CALL_NOCHECK (cuCtxSynchronize, ); @@ -1439,7 +1444,10 @@ nvptx_exec (void (*fn), size_t mapnum, void **hostaddrs, void **devaddrs, #ifndef DISABLE_ASYNC if (async < acc_async_noval) #endif - map_pop (dev_str); + { + if (mapnum > 0) + map_pop (dev_str); + } } void * openacc_get_current_cuda_context (void); |