[libgomp, nvptx] Report launch dimensions in GOMP_OFFLOAD_run

Using this patch, when using GOMP_DEBUG=1 and launching a kernel in GOMP_OFFLOAD_run (used by the omp implementation), we see the kernel launch dimensions: ... GOMP_OFFLOAD_run: kernel main$_omp_fn$0: \ launch [(teams: 1), 1, 1] [(lanes: 32), (threads: 1), 1] ... Build on x86_64-linux with nvptx accelerator, tested libgomp. libgomp/ChangeLog: 2020-10-08 Tom de Vries <tdevries@suse.de> PR libgomp/81802 * plugin/plugin-nvptx.c (GOMP_OFFLOAD_run): Report launch dimensions.
author: Tom de Vries <tdevries@suse.de> 2020-10-08 08:22:39 +0200
committer: Tom de Vries <tdevries@suse.de> 2020-10-08 11:03:29 +0200
commit: 7345ef6c2a197d0a2581c67838b7ba1650dfad30 (patch)
tree: 567612353abd9396dafae46f8565935709794b88
parent: c1c62aec6751678e958ab5c61b2d903a09d7efd9 (diff)
download: gcc-7345ef6c2a197d0a2581c67838b7ba1650dfad30.zip
gcc-7345ef6c2a197d0a2581c67838b7ba1650dfad30.tar.gz
gcc-7345ef6c2a197d0a2581c67838b7ba1650dfad30.tar.bz2
1 files changed, 8 insertions, 1 deletions
diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c
index a63dd1a..11d4cee 100644
--- a/libgomp/plugin/plugin-nvptx.c
+++ b/libgomp/plugin/plugin-nvptx.c
@@ -1891,7 +1891,11 @@ nvptx_stacks_free (void *p, int num)
 void
 GOMP_OFFLOAD_run (int ord, void *tgt_fn, void *tgt_vars, void **args)
 {
-  CUfunction function = ((struct targ_fn_descriptor *) tgt_fn)->fn;
+  struct targ_fn_descriptor *tgt_fn_desc
+    = (struct targ_fn_descriptor *) tgt_fn;
+  CUfunction function = tgt_fn_desc->fn;
+  const struct targ_fn_launch *launch = tgt_fn_desc->launch;
+  const char *fn_name = launch->fn;
   CUresult r;
   struct ptx_device *ptx_dev = ptx_devices[ord];
   const char *maybe_abort_msg = "(perhaps abort was called)";
@@ -1926,6 +1930,9 @@ GOMP_OFFLOAD_run (int ord, void *tgt_fn, void *tgt_vars, void **args)
     CU_LAUNCH_PARAM_BUFFER_SIZE, &fn_args_size,
     CU_LAUNCH_PARAM_END
   };
+  GOMP_PLUGIN_debug (0, "  %s: kernel %s: launch"
+		     " [(teams: %u), 1, 1] [(lanes: 32), (threads: %u), 1]\n",
+		     __FUNCTION__, fn_name, teams, threads);
   r = CUDA_CALL_NOCHECK (cuLaunchKernel, function, teams, 1, 1,
 			 32, threads, 1, 0, NULL, NULL, config);
   if (r != CUDA_SUCCESS)
author	Tom de Vries <tdevries@suse.de>	2020-10-08 08:22:39 +0200
committer	Tom de Vries <tdevries@suse.de>	2020-10-08 11:03:29 +0200
commit	7345ef6c2a197d0a2581c67838b7ba1650dfad30 (patch)
tree	567612353abd9396dafae46f8565935709794b88
parent	c1c62aec6751678e958ab5c61b2d903a09d7efd9 (diff)
download	gcc-7345ef6c2a197d0a2581c67838b7ba1650dfad30.zip gcc-7345ef6c2a197d0a2581c67838b7ba1650dfad30.tar.gz gcc-7345ef6c2a197d0a2581c67838b7ba1650dfad30.tar.bz2