From 199867d07be65cb0227a318ebf42b8376ca09313 Mon Sep 17 00:00:00 2001 From: Thomas Schwinge Date: Mon, 27 Feb 2023 12:02:02 +0100 Subject: Simplify OpenACC 'no_create' clause implementation For 'OFFSET_INLINED', 'gomp_map_val' does the right thing, and we may then simplify the device plugins accordingly. This is a follow-up to Subversion r279551 (Git commit a6163563f2ce502bd4ef444bd5de33570bb8eeb1) "Add OpenACC 2.6's no_create", Subversion r279622 (Git commit 5bcd470bf0749e1f56d05dd43aa9584ff2e3a090) "Use gomp_map_val for OpenACC host-to-device address translation". libgomp/ * target.c (gomp_map_vars_internal): Use 'OFFSET_INLINED' for 'GOMP_MAP_IF_PRESENT'. * plugin/plugin-gcn.c (gcn_exec, GOMP_OFFLOAD_openacc_exec) (GOMP_OFFLOAD_openacc_async_exec): Adjust. * plugin/plugin-nvptx.c (nvptx_exec, GOMP_OFFLOAD_openacc_exec) (GOMP_OFFLOAD_openacc_async_exec): Likewise. * testsuite/libgomp.oacc-c-c++-common/no_create-1.c: Add 'async' testing. * testsuite/libgomp.oacc-c-c++-common/no_create-2.c: Likewise. --- libgomp/plugin/plugin-nvptx.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) (limited to 'libgomp/plugin/plugin-nvptx.c') diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c index 1166807..13e3115 100644 --- a/libgomp/plugin/plugin-nvptx.c +++ b/libgomp/plugin/plugin-nvptx.c @@ -742,8 +742,7 @@ link_ptx (CUmodule *module, const struct targ_ptx_obj *ptx_objs, } static void -nvptx_exec (void (*fn), size_t mapnum, void **hostaddrs, void **devaddrs, - unsigned *dims, void *targ_mem_desc, +nvptx_exec (void (*fn), size_t mapnum, unsigned *dims, void *targ_mem_desc, CUdeviceptr dp, CUstream stream) { struct targ_fn_descriptor *targ_fn = (struct targ_fn_descriptor *) fn; @@ -1530,7 +1529,8 @@ GOMP_OFFLOAD_free (int ord, void *ptr) void GOMP_OFFLOAD_openacc_exec (void (*fn) (void *), size_t mapnum, - void **hostaddrs, void **devaddrs, + void **hostaddrs __attribute__((unused)), + void **devaddrs, unsigned *dims, void *targ_mem_desc) { GOMP_PLUGIN_debug (0, " %s: prepare mappings\n", __FUNCTION__); @@ -1549,7 +1549,7 @@ GOMP_OFFLOAD_openacc_exec (void (*fn) (void *), size_t mapnum, size_t s = mapnum * sizeof (void *); hp = alloca (s); for (int i = 0; i < mapnum; i++) - hp[i] = (devaddrs[i] ? devaddrs[i] : hostaddrs[i]); + hp[i] = devaddrs[i]; CUDA_CALL_ASSERT (cuMemAlloc, &dp, s); if (profiling_p) goacc_profiling_acc_ev_alloc (thr, (void *) dp, s); @@ -1591,8 +1591,7 @@ GOMP_OFFLOAD_openacc_exec (void (*fn) (void *), size_t mapnum, } } - nvptx_exec (fn, mapnum, hostaddrs, devaddrs, dims, targ_mem_desc, - dp, NULL); + nvptx_exec (fn, mapnum, dims, targ_mem_desc, dp, NULL); CUresult r = CUDA_CALL_NOCHECK (cuStreamSynchronize, NULL); const char *maybe_abort_msg = "(perhaps abort was called)"; @@ -1617,7 +1616,8 @@ cuda_free_argmem (void *ptr) void GOMP_OFFLOAD_openacc_async_exec (void (*fn) (void *), size_t mapnum, - void **hostaddrs, void **devaddrs, + void **hostaddrs __attribute__((unused)), + void **devaddrs, unsigned *dims, void *targ_mem_desc, struct goacc_asyncqueue *aq) { @@ -1639,7 +1639,7 @@ GOMP_OFFLOAD_openacc_async_exec (void (*fn) (void *), size_t mapnum, block = (void **) GOMP_PLUGIN_malloc (2 * sizeof (void *) + s); hp = block + 2; for (int i = 0; i < mapnum; i++) - hp[i] = (devaddrs[i] ? devaddrs[i] : hostaddrs[i]); + hp[i] = devaddrs[i]; CUDA_CALL_ASSERT (cuMemAlloc, &dp, s); if (profiling_p) goacc_profiling_acc_ev_alloc (thr, (void *) dp, s); @@ -1688,8 +1688,7 @@ GOMP_OFFLOAD_openacc_async_exec (void (*fn) (void *), size_t mapnum, } } - nvptx_exec (fn, mapnum, hostaddrs, devaddrs, dims, targ_mem_desc, - dp, aq->cuda_stream); + nvptx_exec (fn, mapnum, dims, targ_mem_desc, dp, aq->cuda_stream); if (mapnum > 0) GOMP_OFFLOAD_openacc_async_queue_callback (aq, cuda_free_argmem, block); -- cgit v1.1