aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom de Vries <tom@codesourcery.com>2018-04-26 13:27:04 +0000
committerTom de Vries <vries@gcc.gnu.org>2018-04-26 13:27:04 +0000
commitdf36a3d3befd76f8db5cc4397f00208539e49fbf (patch)
tree08e3aea28bfda1f840bec42d2cb0130ae65d2f98
parenta874808c6143032cec029a1a8a421d6b7cdf05e8 (diff)
downloadgcc-df36a3d3befd76f8db5cc4397f00208539e49fbf.zip
gcc-df36a3d3befd76f8db5cc4397f00208539e49fbf.tar.gz
gcc-df36a3d3befd76f8db5cc4397f00208539e49fbf.tar.bz2
[nvptx, libgomp] Add GOMP_NVPTX_JIT=-O[0-4] in nvptx libgomp plugin
2018-04-26 Tom de Vries <tom@codesourcery.com> PR libgomp/84020 * plugin/cuda/cuda.h (CUjit_option): Add CU_JIT_OPTIMIZATION_LEVEL. * plugin/plugin-nvptx.c (_GNU_SOURCE): Define. (process_GOMP_NVPTX_JIT): New function. (link_ptx): Use process_GOMP_NVPTX_JIT. From-SVN: r259678
-rw-r--r--libgomp/ChangeLog8
-rw-r--r--libgomp/plugin/cuda/cuda.h1
-rw-r--r--libgomp/plugin/plugin-nvptx.c56
3 files changed, 62 insertions, 3 deletions
diff --git a/libgomp/ChangeLog b/libgomp/ChangeLog
index e498a08..0a612a3 100644
--- a/libgomp/ChangeLog
+++ b/libgomp/ChangeLog
@@ -1,3 +1,11 @@
+2018-04-26 Tom de Vries <tom@codesourcery.com>
+
+ PR libgomp/84020
+ * plugin/cuda/cuda.h (CUjit_option): Add CU_JIT_OPTIMIZATION_LEVEL.
+ * plugin/plugin-nvptx.c (_GNU_SOURCE): Define.
+ (process_GOMP_NVPTX_JIT): New function.
+ (link_ptx): Use process_GOMP_NVPTX_JIT.
+
2018-04-26 Richard Biener <rguenther@suse.de>
Tom de Vries <tom@codesourcery.com>
diff --git a/libgomp/plugin/cuda/cuda.h b/libgomp/plugin/cuda/cuda.h
index edad4c6..4799825 100644
--- a/libgomp/plugin/cuda/cuda.h
+++ b/libgomp/plugin/cuda/cuda.h
@@ -88,6 +88,7 @@ typedef enum {
CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES = 4,
CU_JIT_ERROR_LOG_BUFFER = 5,
CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES = 6,
+ CU_JIT_OPTIMIZATION_LEVEL = 7,
CU_JIT_LOG_VERBOSE = 12
} CUjit_option;
diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c
index 9ae6095..2b875ae 100644
--- a/libgomp/plugin/plugin-nvptx.c
+++ b/libgomp/plugin/plugin-nvptx.c
@@ -31,6 +31,7 @@
is not clear as to what that state might be. Or how one might
propagate it from one thread to another. */
+#define _GNU_SOURCE
#include "openacc.h"
#include "config.h"
#include "libgomp-plugin.h"
@@ -138,6 +139,8 @@ init_cuda_lib (void)
# define init_cuda_lib() true
#endif
+#include "secure_getenv.h"
+
/* Convenience macros for the frequently used CUDA library call and
error handling sequence as well as CUDA library calls that
do the error checking themselves or don't do it at all. */
@@ -876,12 +879,42 @@ notify_var (const char *var_name, const char *env_var)
GOMP_PLUGIN_debug (0, "%s: '%s'\n", var_name, env_var);
}
+static void
+process_GOMP_NVPTX_JIT (intptr_t *gomp_nvptx_o)
+{
+ const char *var_name = "GOMP_NVPTX_JIT";
+ const char *env_var = secure_getenv (var_name);
+ notify_var (var_name, env_var);
+
+ if (env_var == NULL)
+ return;
+
+ const char *c = env_var;
+ while (*c != '\0')
+ {
+ while (*c == ' ')
+ c++;
+
+ if (c[0] == '-' && c[1] == 'O'
+ && '0' <= c[2] && c[2] <= '4'
+ && (c[3] == '\0' || c[3] == ' '))
+ {
+ *gomp_nvptx_o = c[2] - '0';
+ c += 3;
+ continue;
+ }
+
+ GOMP_PLUGIN_error ("Error parsing %s", var_name);
+ break;
+ }
+}
+
static bool
link_ptx (CUmodule *module, const struct targ_ptx_obj *ptx_objs,
unsigned num_objs)
{
- CUjit_option opts[6];
- void *optvals[6];
+ CUjit_option opts[7];
+ void *optvals[7];
float elapsed = 0.0;
char elog[1024];
char ilog[16384];
@@ -908,7 +941,24 @@ link_ptx (CUmodule *module, const struct targ_ptx_obj *ptx_objs,
opts[5] = CU_JIT_LOG_VERBOSE;
optvals[5] = (void *) 1;
- CUDA_CALL (cuLinkCreate, 6, opts, optvals, &linkstate);
+ static intptr_t gomp_nvptx_o = -1;
+
+ static bool init_done = false;
+ if (!init_done)
+ {
+ process_GOMP_NVPTX_JIT (&gomp_nvptx_o);
+ init_done = true;
+ }
+
+ int nopts = 6;
+ if (gomp_nvptx_o != -1)
+ {
+ opts[nopts] = CU_JIT_OPTIMIZATION_LEVEL;
+ optvals[nopts] = (void *) gomp_nvptx_o;
+ nopts++;
+ }
+
+ CUDA_CALL (cuLinkCreate, nopts, opts, optvals, &linkstate);
for (; num_objs--; ptx_objs++)
{