aboutsummaryrefslogtreecommitdiff
path: root/libgomp/plugin
diff options
context:
space:
mode:
authorThomas Schwinge <tschwinge@baylibre.com>2024-05-31 17:04:39 +0200
committerThomas Schwinge <tschwinge@baylibre.com>2024-06-06 13:41:47 +0200
commit0d25989d60d15866ef4737d66e02432f50717255 (patch)
treecbe4e5dfd487bd3b9745958af2751c9580f36ec5 /libgomp/plugin
parenta29c5852a606588175d11844db84da0881227100 (diff)
downloadgcc-0d25989d60d15866ef4737d66e02432f50717255.zip
gcc-0d25989d60d15866ef4737d66e02432f50717255.tar.gz
gcc-0d25989d60d15866ef4737d66e02432f50717255.tar.bz2
nvptx offloading: 'GOMP_NVPTX_NATIVE_GPU_THREAD_STACK_SIZE' environment variable [PR97384, PR105274]
... as a means to manually set the "native" GPU thread stack size. PR libgomp/97384 PR libgomp/105274 libgomp/ * plugin/cuda-lib.def (cuCtxSetLimit): Add. * plugin/plugin-nvptx.c (nvptx_open_device): Handle 'GOMP_NVPTX_NATIVE_GPU_THREAD_STACK_SIZE' environment variable.
Diffstat (limited to 'libgomp/plugin')
-rw-r--r--libgomp/plugin/cuda-lib.def1
-rw-r--r--libgomp/plugin/plugin-nvptx.c45
2 files changed, 46 insertions, 0 deletions
diff --git a/libgomp/plugin/cuda-lib.def b/libgomp/plugin/cuda-lib.def
index 007c6e0..9255c1c 100644
--- a/libgomp/plugin/cuda-lib.def
+++ b/libgomp/plugin/cuda-lib.def
@@ -4,6 +4,7 @@ CUDA_ONE_CALL (cuCtxGetCurrent)
CUDA_ONE_CALL (cuCtxGetDevice)
CUDA_ONE_CALL (cuCtxPopCurrent)
CUDA_ONE_CALL (cuCtxPushCurrent)
+CUDA_ONE_CALL (cuCtxSetLimit)
CUDA_ONE_CALL (cuCtxSynchronize)
CUDA_ONE_CALL (cuDeviceGet)
CUDA_ONE_CALL (cuDeviceGetAttribute)
diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c
index 0f3a3be..99cbcb6 100644
--- a/libgomp/plugin/plugin-nvptx.c
+++ b/libgomp/plugin/plugin-nvptx.c
@@ -150,6 +150,8 @@ init_cuda_lib (void)
#include "secure_getenv.h"
+static void notify_var (const char *, const char *);
+
#undef MIN
#undef MAX
#define MIN(X,Y) ((X) < (Y) ? (X) : (Y))
@@ -341,6 +343,9 @@ struct ptx_device
static struct ptx_device **ptx_devices;
+/* "Native" GPU thread stack size. */
+static unsigned native_gpu_thread_stack_size = 0;
+
/* OpenMP kernels reserve a small amount of ".shared" space for use by
omp_alloc. The size is configured using GOMP_NVPTX_LOWLAT_POOL, but the
default is set here. */
@@ -555,6 +560,46 @@ nvptx_open_device (int n)
ptx_dev->free_blocks = NULL;
pthread_mutex_init (&ptx_dev->free_blocks_lock, NULL);
+ /* "Native" GPU thread stack size. */
+ {
+ /* This is intentionally undocumented, until we work out a proper, common
+ scheme (as much as makes sense) between all offload plugins as well
+ as between nvptx offloading use of "native" stacks for OpenACC vs.
+ OpenMP "soft stacks" vs. OpenMP '-msoft-stack-reserve-local=[...]'.
+
+ GCN offloading has a 'GCN_STACK_SIZE' environment variable (without
+ 'GOMP_' prefix): documented; presumably used for all things OpenACC and
+ OpenMP? Based on GCN command-line option '-mstack-size=[...]' (marked
+ "obsolete"), that one may be set via a GCN 'mkoffload'-synthesized
+ 'constructor' function. */
+ const char *var_name = "GOMP_NVPTX_NATIVE_GPU_THREAD_STACK_SIZE";
+ const char *env_var = secure_getenv (var_name);
+ notify_var (var_name, env_var);
+
+ if (env_var != NULL)
+ {
+ char *endptr;
+ unsigned long val = strtoul (env_var, &endptr, 10);
+ if (endptr == NULL || *endptr != '\0'
+ || errno == ERANGE || errno == EINVAL
+ || val > UINT_MAX)
+ GOMP_PLUGIN_error ("Error parsing %s", var_name);
+ else
+ native_gpu_thread_stack_size = val;
+ }
+ }
+ if (native_gpu_thread_stack_size == 0)
+ ; /* Zero means use default. */
+ else
+ {
+ GOMP_PLUGIN_debug (0, "Setting \"native\" GPU thread stack size"
+ " ('CU_LIMIT_STACK_SIZE') to %u bytes\n",
+ native_gpu_thread_stack_size);
+ CUDA_CALL (cuCtxSetLimit,
+ CU_LIMIT_STACK_SIZE, (size_t) native_gpu_thread_stack_size);
+ }
+
+ /* OpenMP "soft stacks". */
ptx_dev->omp_stacks.ptr = 0;
ptx_dev->omp_stacks.size = 0;
pthread_mutex_init (&ptx_dev->omp_stacks.lock, NULL);