aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDylan Baker <dylan@pnwbakers.com>2021-02-16 16:51:51 -0800
committerGitHub <noreply@github.com>2021-02-16 16:51:51 -0800
commit7812ceec5fe6147bfe8a5a265b58db1282d2cabc (patch)
tree037936fbe637fd1fe0b3df8d9a04091b289a6d5a
parentf3727c62c6c8e05c4b05f5f547f7b757f69b65ea (diff)
parent3410465178b471d57d8591561a5c4f45015aa782 (diff)
downloadmeson-7812ceec5fe6147bfe8a5a265b58db1282d2cabc.zip
meson-7812ceec5fe6147bfe8a5a265b58db1282d2cabc.tar.gz
meson-7812ceec5fe6147bfe8a5a265b58db1282d2cabc.tar.bz2
Merge pull request #8355 from obilaniu/cudafixes
CUDA Compiler Fixes
-rw-r--r--docs/markdown/Builtin-options.md1
-rw-r--r--mesonbuild/compilers/compilers.py4
-rw-r--r--mesonbuild/compilers/cuda.py450
-rw-r--r--mesonbuild/modules/unstable_cuda.py3
-rwxr-xr-xrun_unittests.py2
5 files changed, 419 insertions, 41 deletions
diff --git a/docs/markdown/Builtin-options.md b/docs/markdown/Builtin-options.md
index 2d7c01c..e3805ac 100644
--- a/docs/markdown/Builtin-options.md
+++ b/docs/markdown/Builtin-options.md
@@ -194,6 +194,7 @@ or compiler being used:
| cpp_thread_count | 4 | integer value ≥ 0 | Number of threads to use with emcc when using threads |
| cpp_winlibs | see below | free-form comma-separated list | Standard Windows libs to link against |
| fortran_std | none | [none, legacy, f95, f2003, f2008, f2018] | Fortran language standard to use |
+| cuda_ccbindir | | filesystem path | CUDA non-default toolchain directory to use (-ccbin) *(Added in 0.57.1)* |
The default values of `c_winlibs` and `cpp_winlibs` are in
compiler-specific argument forms, but the libraries are: kernel32,
diff --git a/mesonbuild/compilers/compilers.py b/mesonbuild/compilers/compilers.py
index 1872253..8171758 100644
--- a/mesonbuild/compilers/compilers.py
+++ b/mesonbuild/compilers/compilers.py
@@ -175,8 +175,8 @@ class CompileCheckMode(enum.Enum):
cuda_buildtype_args = {'plain': [],
- 'debug': [],
- 'debugoptimized': [],
+ 'debug': ['-g', '-G'],
+ 'debugoptimized': ['-g', '-lineinfo'],
'release': [],
'minsize': [],
'custom': [],
diff --git a/mesonbuild/compilers/cuda.py b/mesonbuild/compilers/cuda.py
index 7fa3e4f..4efe0c6 100644
--- a/mesonbuild/compilers/cuda.py
+++ b/mesonbuild/compilers/cuda.py
@@ -14,6 +14,7 @@
import enum
import os.path
+import string
import typing as T
from .. import coredata
@@ -45,7 +46,131 @@ class CudaCompiler(Compiler):
LINKER_PREFIX = '-Xlinker='
language = 'cuda'
- _universal_flags = {_Phase.COMPILER: ['-I', '-D', '-U', '-E'], _Phase.LINKER: ['-l', '-L']} # type: T.Dict[_Phase, T.List[str]]
+ # NVCC flags taking no arguments.
+ _FLAG_PASSTHRU_NOARGS = {
+ # NVCC --long-option, NVCC -short-option CUDA Toolkit 11.2.1 Reference
+ '--objdir-as-tempdir', '-objtemp', # 4.2.1.2
+ '--generate-dependency-targets', '-MP', # 4.2.1.12
+ '--allow-unsupported-compiler', '-allow-unsupported-compiler', # 4.2.1.14
+ '--link', # 4.2.2.1
+ '--lib', '-lib', # 4.2.2.2
+ '--device-link', '-dlink', # 4.2.2.3
+ '--device-c', '-dc', # 4.2.2.4
+ '--device-w', '-dw', # 4.2.2.5
+ '--cuda', '-cuda', # 4.2.2.6
+ '--compile', '-c', # 4.2.2.7
+ '--fatbin', '-fatbin', # 4.2.2.8
+ '--cubin', '-cubin', # 4.2.2.9
+ '--ptx', '-ptx', # 4.2.2.10
+ '--preprocess', '-E', # 4.2.2.11
+ '--generate-dependencies', '-M', # 4.2.2.12
+ '--generate-nonsystem-dependencies', '-MM', # 4.2.2.13
+ '--generate-dependencies-with-compile', '-MD', # 4.2.2.14
+ '--generate-nonsystem-dependencies-with-compile', '-MMD', # 4.2.2.15
+ '--run', # 4.2.2.16
+ '--profile', '-pg', # 4.2.3.1
+ '--debug', '-g', # 4.2.3.2
+ '--device-debug', '-G', # 4.2.3.3
+ '--extensible-whole-program', '-ewp', # 4.2.3.4
+ '--generate-line-info', '-lineinfo', # 4.2.3.5
+ '--dlink-time-opt', '-dlto', # 4.2.3.8
+ '--no-exceptions', '-noeh', # 4.2.3.11
+ '--shared', '-shared', # 4.2.3.12
+ '--no-host-device-initializer-list', '-nohdinitlist', # 4.2.3.15
+ '--expt-relaxed-constexpr', '-expt-relaxed-constexpr', # 4.2.3.16
+ '--extended-lambda', '-extended-lambda', # 4.2.3.17
+ '--expt-extended-lambda', '-expt-extended-lambda', # 4.2.3.18
+ '--m32', '-m32', # 4.2.3.20
+ '--m64', '-m64', # 4.2.3.21
+ '--forward-unknown-to-host-compiler', '-forward-unknown-to-host-compiler', # 4.2.5.1
+ '--forward-unknown-to-host-linker', '-forward-unknown-to-host-linker', # 4.2.5.2
+ '--dont-use-profile', '-noprof', # 4.2.5.3
+ '--dryrun', '-dryrun', # 4.2.5.5
+ '--verbose', '-v', # 4.2.5.6
+ '--keep', '-keep', # 4.2.5.7
+ '--save-temps', '-save-temps', # 4.2.5.9
+ '--clean-targets', '-clean', # 4.2.5.10
+ '--no-align-double', # 4.2.5.16
+ '--no-device-link', '-nodlink', # 4.2.5.17
+ '--allow-unsupported-compiler', '-allow-unsupported-compiler', # 4.2.5.18
+ '--use_fast_math', '-use_fast_math', # 4.2.7.7
+ '--extra-device-vectorization', '-extra-device-vectorization', # 4.2.7.12
+ '--compile-as-tools-patch', '-astoolspatch', # 4.2.7.13
+ '--keep-device-functions', '-keep-device-functions', # 4.2.7.14
+ '--disable-warnings', '-w', # 4.2.8.1
+ '--source-in-ptx', '-src-in-ptx', # 4.2.8.2
+ '--restrict', '-restrict', # 4.2.8.3
+ '--Wno-deprecated-gpu-targets', '-Wno-deprecated-gpu-targets', # 4.2.8.4
+ '--Wno-deprecated-declarations', '-Wno-deprecated-declarations', # 4.2.8.5
+ '--Wreorder', '-Wreorder', # 4.2.8.6
+ '--Wdefault-stream-launch', '-Wdefault-stream-launch', # 4.2.8.7
+ '--Wext-lambda-captures-this', '-Wext-lambda-captures-this', # 4.2.8.8
+ '--display-error-number', '-err-no', # 4.2.8.10
+ '--resource-usage', '-res-usage', # 4.2.8.14
+ '--help', '-h', # 4.2.8.15
+ '--version', '-V', # 4.2.8.16
+ '--list-gpu-code', '-code-ls', # 4.2.8.20
+ '--list-gpu-arch', '-arch-ls', # 4.2.8.21
+ }
+ # Dictionary of NVCC flags taking either one argument or a comma-separated list.
+ # Maps --long to -short options, because the short options are more GCC-like.
+ _FLAG_LONG2SHORT_WITHARGS = {
+ '--output-file': '-o', # 4.2.1.1
+ '--pre-include': '-include', # 4.2.1.3
+ '--library': '-l', # 4.2.1.4
+ '--define-macro': '-D', # 4.2.1.5
+ '--undefine-macro': '-U', # 4.2.1.6
+ '--include-path': '-I', # 4.2.1.7
+ '--system-include': '-isystem', # 4.2.1.8
+ '--library-path': '-L', # 4.2.1.9
+ '--output-directory': '-odir', # 4.2.1.10
+ '--dependency-output': '-MF', # 4.2.1.11
+ '--compiler-bindir': '-ccbin', # 4.2.1.13
+ '--archiver-binary': '-arbin', # 4.2.1.15
+ '--cudart': '-cudart', # 4.2.1.16
+ '--cudadevrt': '-cudadevrt', # 4.2.1.17
+ '--libdevice-directory': '-ldir', # 4.2.1.18
+ '--target-directory': '-target-dir', # 4.2.1.19
+ '--optimization-info': '-opt-info', # 4.2.3.6
+ '--optimize': '-O', # 4.2.3.7
+ '--ftemplate-backtrace-limit': '-ftemplate-backtrace-limit', # 4.2.3.9
+ '--ftemplate-depth': '-ftemplate-depth', # 4.2.3.10
+ '--x': '-x', # 4.2.3.13
+ '--std': '-std', # 4.2.3.14
+ '--machine': '-m', # 4.2.3.19
+ '--compiler-options': '-Xcompiler', # 4.2.4.1
+ '--linker-options': '-Xlinker', # 4.2.4.2
+ '--archive-options': '-Xarchive', # 4.2.4.3
+ '--ptxas-options': '-Xptxas', # 4.2.4.4
+ '--nvlink-options': '-Xnvlink', # 4.2.4.5
+ '--threads': '-t', # 4.2.5.4
+ '--keep-dir': '-keep-dir', # 4.2.5.8
+ '--run-args': '-run-args', # 4.2.5.11
+ '--input-drive-prefix': '-idp', # 4.2.5.12
+ '--dependency-drive-prefix': '-ddp', # 4.2.5.13
+ '--drive-prefix': '-dp', # 4.2.5.14
+ '--dependency-target-name': '-MT', # 4.2.5.15
+ '--default-stream': '-default-stream', # 4.2.6.1
+ '--gpu-architecture': '-arch', # 4.2.7.1
+ '--gpu-code': '-code', # 4.2.7.2
+ '--generate-code': '-gencode', # 4.2.7.3
+ '--relocatable-device-code': '-rdc', # 4.2.7.4
+ '--entries': '-e', # 4.2.7.5
+ '--maxrregcount': '-maxrregcount', # 4.2.7.6
+ '--ftz': '-ftz', # 4.2.7.8
+ '--prec-div': '-prec-div', # 4.2.7.9
+ '--prec-sqrt': '-prec-sqrt', # 4.2.7.10
+ '--fmad': '-fmad', # 4.2.7.11
+ '--Werror': '-Werror', # 4.2.8.9
+ '--diag-error': '-diag-error', # 4.2.8.11
+ '--diag-suppress': '-diag-suppress', # 4.2.8.12
+ '--diag-warn': '-diag-warn', # 4.2.8.13
+ '--options-file': '-optf', # 4.2.8.17
+ '--time': '-time', # 4.2.8.18
+ '--qpp-config': '-qpp-config', # 4.2.8.19
+ }
+ # Reverse map -short to --long options.
+ _FLAG_SHORT2LONG_WITHARGS = {v:k for k,v in _FLAG_LONG2SHORT_WITHARGS.items()}
def __init__(self, exelist: T.List[str], version: str, for_machine: MachineChoice,
is_cross: bool, exe_wrapper: T.Optional['ExternalProgram'],
@@ -60,23 +185,261 @@ class CudaCompiler(Compiler):
self.warn_args = {level: self._to_host_flags(flags) for level, flags in host_compiler.warn_args.items()}
@classmethod
- def _to_host_flags(cls, flags: T.List[str], phase: _Phase = _Phase.COMPILER) -> T.List[str]:
- return [cls._to_host_flag(f, phase=phase) for f in flags]
+ def _shield_nvcc_list_arg(cls, arg: str, listmode: bool=True) -> str:
+ """
+ Shield an argument against both splitting by NVCC's list-argument
+ parse logic, and interpretation by any shell.
+
+ NVCC seems to consider every comma , that is neither escaped by \ nor inside
+ a double-quoted string a split-point. Single-quotes do not provide protection
+ against splitting; In fact, after splitting they are \-escaped. Unfortunately,
+ double-quotes don't protect against shell expansion. What follows is a
+ complex dance to accomodate everybody.
+ """
+
+ SQ = "'"
+ DQ = '"'
+ CM = ","
+ BS = "\\"
+ DQSQ = DQ+SQ+DQ
+ quotable = set(string.whitespace+'"$`\\')
+
+ if CM not in arg or not listmode:
+ if SQ not in arg:
+ # If any of the special characters "$`\ or whitespace are present, single-quote.
+ # Otherwise return bare.
+ if set(arg).intersection(quotable):
+ return SQ+arg+SQ
+ else:
+ return arg # Easy case: no splits, no quoting.
+ else:
+ # There are single quotes. Double-quote them, and single-quote the
+ # strings between them.
+ l = [cls._shield_nvcc_list_arg(s) for s in arg.split(SQ)]
+ l = sum([[s, DQSQ] for s in l][:-1], []) # Interleave l with DQSQs
+
+ # The list l now has the structure of shielded strings interleaved
+ # with double-quoted single-quotes.
+ #
+ # Plain concatenation would result in the tripling of the length of
+ # a string made up only of single quotes. See if we can merge some
+ # DQSQs together first.
+ def isdqsq(x:str) -> bool:
+ return x.startswith(SQ) and x.endswith(SQ) and x[1:-1].strip(SQ) == ''
+ for i in range(1, len(l)-2, 2):
+ if isdqsq(l[i]) and l[i+1] == '' and isdqsq(l[i+2]):
+ l[i+2] = l[i][:-1]+l[i+2][1:]
+ l[i] = ''
+
+ # With DQSQs merged, simply concatenate everything together and return.
+ return ''.join(l)
+ else:
+ # A comma is present, and list mode was active.
+ # We apply (what we guess is) the (primitive) NVCC splitting rule:
+ l = ['']
+ instring = False
+ argit = iter(arg)
+ for c in argit:
+ if c == CM and not instring:
+ l.append('')
+ elif c == DQ:
+ l[-1] += c
+ instring = not instring
+ elif c == BS:
+ try:
+ l[-1] += next(argit)
+ except StopIteration:
+ break
+ else:
+ l[-1] += c
+
+ # Shield individual strings, without listmode, then return them with
+ # escaped commas between them.
+ l = [cls._shield_nvcc_list_arg(s, listmode=False) for s in l]
+ return '\,'.join(l)
@classmethod
- def _to_host_flag(cls, flag: str, phase: _Phase) -> str:
- if not flag[0] in ['-', '/'] or flag[:2] in cls._universal_flags[phase]:
- return flag
+ def _to_host_flags(cls, flags: T.List[str], phase: _Phase = _Phase.COMPILER) -> T.List[str]:
+ """
+ Translate generic "GCC-speak" plus particular "NVCC-speak" flags to NVCC flags.
+
+ NVCC's "short" flags have broad similarities to the GCC standard, but have
+ gratuitous, irritating differences.
+ """
+
+ xflags = []
+ flagit = iter(flags)
+
+ for flag in flagit:
+ # The CUDA Toolkit Documentation, in 4.1. Command Option Types and Notation,
+ # specifies that NVCC does not parse the standard flags as GCC does. It has
+ # its own strategy, to wit:
+ #
+ # nvcc recognizes three types of command options: boolean options, single
+ # value options, and list options.
+ #
+ # Boolean options do not have an argument; they are either specified on a
+ # command line or not. Single value options must be specified at most once,
+ # and list options may be repeated. Examples of each of these option types
+ # are, respectively: --verbose (switch to verbose mode), --output-file
+ # (specify output file), and --include-path (specify include path).
+ #
+ # Single value options and list options must have arguments, which must
+ # follow the name of the option itself by either one of more spaces or an
+ # equals character. When a one-character short name such as -I, -l, and -L
+ # is used, the value of the option may also immediately follow the option
+ # itself without being seperated by spaces or an equal character. The
+ # individual values of list options may be separated by commas in a single
+ # instance of the option, or the option may be repeated, or any
+ # combination of these two cases.
+ #
+ # One strange consequence of this choice is that directory and filenames that
+ # contain commas (',') cannot be passed to NVCC (at least, not as easily as
+ # in GCC). Another strange consequence is that it is legal to supply flags
+ # such as
+ #
+ # -lpthread,rt,dl,util
+ # -l pthread,rt,dl,util
+ # -l=pthread,rt,dl,util
+ #
+ # and each of the above alternatives is equivalent to GCC-speak
+ #
+ # -lpthread -lrt -ldl -lutil
+ # -l pthread -l rt -l dl -l util
+ # -l=pthread -l=rt -l=dl -l=util
+ #
+ # *With the exception of commas in the name*, GCC-speak for these list flags
+ # is a strict subset of NVCC-speak, so we passthrough those flags.
+ #
+ # The -D macro-define flag is documented as somehow shielding commas from
+ # splitting a definition. Balanced parentheses, braces and single-quotes
+ # around the comma are not sufficient, but balanced double-quotes are. The
+ # shielding appears to work with -l, -I, -L flags as well, for instance.
+ #
+ # Since our goal is to replicate GCC-speak as much as possible, we check for
+ # commas in all list-arguments and shield them with double-quotes. We make
+ # an exception for -D (where this would be value-changing) and -U (because
+ # it isn't possible to define a macro with a comma in the name).
+
+ if flag in cls._FLAG_PASSTHRU_NOARGS:
+ xflags.append(flag)
+ continue
+
+
+ # Handle breakup of flag-values into a flag-part and value-part.
+ if flag[:1] not in '-/':
+ # This is not a flag. It's probably a file input. Pass it through.
+ xflags.append(flag)
+ continue
+ elif flag[:1] == '/':
+ # This is ambiguously either an MVSC-style /switch or an absolute path
+ # to a file. For some magical reason the following works acceptably in
+ # both cases.
+ wrap = '"' if ',' in flag else ''
+ xflags.append(f'-X{phase.value}={wrap}{flag}{wrap}')
+ continue
+ elif len(flag) >= 2 and flag[0] == '-' and flag[1] in 'IDULlmOxmte':
+ # This is a single-letter short option. These options (with the
+ # exception of -o) are allowed to receive their argument with neither
+ # space nor = sign before them. Detect and separate them in that event.
+ if flag[2:3] == '': # -I something
+ try:
+ val = next(flagit)
+ except StopIteration:
+ pass
+ elif flag[2:3] == '=': # -I=something
+ val = flag[3:]
+ else: # -Isomething
+ val = flag[2:]
+ flag = flag[:2] # -I
+ elif flag in cls._FLAG_LONG2SHORT_WITHARGS or \
+ flag in cls._FLAG_SHORT2LONG_WITHARGS:
+ # This is either -o or a multi-letter flag, and it is receiving its
+ # value isolated.
+ try:
+ val = next(flagit) # -o something
+ except StopIteration:
+ pass
+ elif flag.split('=',1)[0] in cls._FLAG_LONG2SHORT_WITHARGS or \
+ flag.split('=',1)[0] in cls._FLAG_SHORT2LONG_WITHARGS:
+ # This is either -o or a multi-letter flag, and it is receiving its
+ # value after an = sign.
+ flag, val = flag.split('=',1) # -o=something
+ else:
+ # This is a flag, and it's foreign to NVCC.
+ #
+ # We do not know whether this GCC-speak flag takes an isolated
+ # argument. Assuming it does not (the vast majority indeed don't),
+ # wrap this argument in an -Xcompiler flag and send it down to NVCC.
+ if flag == '-ffast-math':
+ xflags.append('-use_fast_math')
+ xflags.append('-Xcompiler='+flag)
+ elif flag == '-fno-fast-math':
+ xflags.append('-ftz=false')
+ xflags.append('-prec-div=true')
+ xflags.append('-prec-sqrt=true')
+ xflags.append('-Xcompiler='+flag)
+ elif flag == '-freciprocal-math':
+ xflags.append('-prec-div=false')
+ xflags.append('-Xcompiler='+flag)
+ elif flag == '-fno-reciprocal-math':
+ xflags.append('-prec-div=true')
+ xflags.append('-Xcompiler='+flag)
+ else:
+ xflags.append('-Xcompiler='+cls._shield_nvcc_list_arg(flag))
+ # The above should securely handle GCC's -Wl, -Wa, -Wp, arguments.
+ continue
+
+
+ assert val is not None # Should only trip if there is a missing argument.
+
+
+ # Take care of the various NVCC-supported flags that need special handling.
+ flag = cls._FLAG_LONG2SHORT_WITHARGS.get(flag,flag)
+
+ if flag in {'-include','-isystem','-I','-L','-l'}:
+ # These flags are known to GCC, but list-valued in NVCC. They potentially
+ # require double-quoting to prevent NVCC interpreting the flags as lists
+ # when GCC would not have done so.
+ #
+ # We avoid doing this quoting for -D to avoid redefining macros and for
+ # -U because it isn't possible to define a macro with a comma in the name.
+ # -U with comma arguments is impossible in GCC-speak (and thus unambiguous
+ #in NVCC-speak, albeit unportable).
+ if len(flag) == 2:
+ xflags.append(flag+cls._shield_nvcc_list_arg(val))
+ else:
+ xflags.append(flag)
+ xflags.append(cls._shield_nvcc_list_arg(val))
+ elif flag == '-O':
+ # Handle optimization levels GCC knows about that NVCC does not.
+ if val == 'fast':
+ xflags.append('-O3')
+ xflags.append('-use_fast_math')
+ xflags.append('-Xcompiler')
+ xflags.append(flag+val)
+ elif val in {'s', 'g', 'z'}:
+ xflags.append('-Xcompiler')
+ xflags.append(flag+val)
+ else:
+ xflags.append(flag+val)
+ elif flag in {'-D', '-U', '-m', '-t'}:
+ xflags.append(flag+val) # For style, keep glued.
+ elif flag in {'-std'}:
+ xflags.append(flag+'='+val) # For style, keep glued.
+ else:
+ xflags.append(flag)
+ xflags.append(val)
- return '-X{}={}'.format(phase.value, flag)
+ return xflags
def needs_static_linker(self) -> bool:
return False
def thread_link_flags(self, environment: 'Environment') -> T.List[str]:
- return self._to_host_flags(self.host_compiler.thread_link_flags(environment))
+ return self._to_host_flags(self.host_compiler.thread_link_flags(environment), _Phase.LINKER)
- def sanity_check(self, work_dir: str, environment: 'Environment') -> None:
+ def sanity_check(self, work_dir: str, env: 'Environment') -> None:
mlog.debug('Sanity testing ' + self.get_display_language() + ' compiler:', ' '.join(self.exelist))
mlog.debug('Is cross compiler: %s.' % str(self.is_cross))
@@ -120,7 +483,18 @@ class CudaCompiler(Compiler):
# environment set up properly. Of course, this only works for native
# builds; For cross builds we must still use the exe_wrapper (if any).
self.detected_cc = ''
- flags = ['-w', '-cudart', 'static', source_name]
+ flags = []
+
+ # Disable warnings, compile with statically-linked runtime for minimum
+ # reliance on the system.
+ flags += ['-w', '-cudart', 'static', source_name]
+
+ # Use the -ccbin option, if available, even during sanity checking.
+ # Otherwise, on systems where CUDA does not support the default compiler,
+ # NVCC becomes unusable.
+ flags += self.get_ccbin_args(env.coredata.options)
+
+ # If cross-compiling, we can't run the sanity check, only compile it.
if self.is_cross and self.exe_wrapper is None:
# Linking cross built apps is painful. You can't really
# tell if you should use -nostdlib or not and for example
@@ -200,10 +574,14 @@ class CudaCompiler(Compiler):
def get_options(self) -> 'KeyedOptionDictType':
opts = super().get_options()
- key = OptionKey('std', machine=self.for_machine, lang=self.language)
- opts.update({key: coredata.UserComboOption('C++ language standard to use with cuda',
- ['none', 'c++03', 'c++11', 'c++14'],
- 'none')})
+ std_key = OptionKey('std', machine=self.for_machine, lang=self.language)
+ ccbindir_key = OptionKey('ccbindir', machine=self.for_machine, lang=self.language)
+ opts.update({
+ std_key: coredata.UserComboOption('C++ language standard to use with CUDA',
+ ['none', 'c++03', 'c++11', 'c++14', 'c++17'], 'none'),
+ ccbindir_key: coredata.UserStringOption('CUDA non-default toolchain directory to use (-ccbin)',
+ ''),
+ })
return opts
def _to_host_compiler_options(self, options: 'KeyedOptionDictType') -> 'KeyedOptionDictType':
@@ -211,7 +589,7 @@ class CudaCompiler(Compiler):
return OptionOverrideProxy(overrides, self.host_compiler.get_options())
def get_option_compile_args(self, options: 'KeyedOptionDictType') -> T.List[str]:
- args = []
+ args = self.get_ccbin_args(options)
# On Windows, the version of the C++ standard used by nvcc is dictated by
# the combination of CUDA version and MSVC version; the --std= is thus ignored
# and attempting to use it will result in a warning: https://stackoverflow.com/a/51272091/741027
@@ -223,27 +601,17 @@ class CudaCompiler(Compiler):
return args + self._to_host_flags(self.host_compiler.get_option_compile_args(self._to_host_compiler_options(options)))
- @classmethod
- def _cook_link_args(cls, args: T.List[str]) -> T.List[str]:
- # Prepare link args for nvcc
- cooked = [] # type: T.List[str]
- for arg in args:
- if arg.startswith('-Wl,'): # strip GNU-style -Wl prefix
- arg = arg.replace('-Wl,', '', 1)
- arg = arg.replace(' ', '\\') # espace whitespace
- cooked.append(arg)
- return cls._to_host_flags(cooked, _Phase.LINKER)
-
def get_option_link_args(self, options: 'KeyedOptionDictType') -> T.List[str]:
- return self._cook_link_args(self.host_compiler.get_option_link_args(self._to_host_compiler_options(options)))
+ args = self.get_ccbin_args(options)
+ return args + self._to_host_flags(self.host_compiler.get_option_link_args(self._to_host_compiler_options(options)), _Phase.LINKER)
def get_soname_args(self, env: 'Environment', prefix: str, shlib_name: str,
suffix: str, soversion: str,
darwin_versions: T.Tuple[str, str],
is_shared_module: bool) -> T.List[str]:
- return self._cook_link_args(self.host_compiler.get_soname_args(
+ return self._to_host_flags(self.host_compiler.get_soname_args(
env, prefix, shlib_name, suffix, soversion, darwin_versions,
- is_shared_module))
+ is_shared_module), _Phase.LINKER)
def get_compile_only_args(self) -> T.List[str]:
return ['-c']
@@ -275,26 +643,26 @@ class CudaCompiler(Compiler):
def get_include_args(self, path: str, is_system: bool) -> T.List[str]:
if path == '':
path = '.'
- return ['-I' + path]
+ return ['-isystem=' + path] if is_system else ['-I' + path]
def get_compile_debugfile_args(self, rel_obj: str, pch: bool = False) -> T.List[str]:
return self._to_host_flags(self.host_compiler.get_compile_debugfile_args(rel_obj, pch))
def get_link_debugfile_args(self, targetfile: str) -> T.List[str]:
- return self._cook_link_args(self.host_compiler.get_link_debugfile_args(targetfile))
+ return self._to_host_flags(self.host_compiler.get_link_debugfile_args(targetfile), _Phase.LINKER)
def get_depfile_suffix(self) -> str:
return 'd'
def get_buildtype_linker_args(self, buildtype: str) -> T.List[str]:
- return self._cook_link_args(self.host_compiler.get_buildtype_linker_args(buildtype))
+ return self._to_host_flags(self.host_compiler.get_buildtype_linker_args(buildtype), _Phase.LINKER)
def build_rpath_args(self, env: 'Environment', build_dir: str, from_dir: str,
rpath_paths: str, build_rpath: str,
install_rpath: str) -> T.Tuple[T.List[str], T.Set[bytes]]:
(rpath_args, rpath_dirs_to_remove) = self.host_compiler.build_rpath_args(
env, build_dir, from_dir, rpath_paths, build_rpath, install_rpath)
- return (self._cook_link_args(rpath_args), rpath_dirs_to_remove)
+ return (self._to_host_flags(rpath_args, _Phase.LINKER), rpath_dirs_to_remove)
def linker_to_compiler_args(self, args: T.List[str]) -> T.List[str]:
return args
@@ -310,7 +678,7 @@ class CudaCompiler(Compiler):
return ['-o', target]
def get_std_exe_link_args(self) -> T.List[str]:
- return self._cook_link_args(self.host_compiler.get_std_exe_link_args())
+ return self._to_host_flags(self.host_compiler.get_std_exe_link_args(), _Phase.LINKER)
def find_library(self, libname: str, env: 'Environment', extra_dirs: T.List[str],
libtype: LibType = LibType.PREFER_SHARED) -> T.Optional[T.List[str]]:
@@ -326,13 +694,21 @@ class CudaCompiler(Compiler):
host_crt_compile_args = self.host_compiler.get_crt_compile_args(crt_val, buildtype)
if any(arg in ['/MDd', '/MD', '/MTd'] for arg in host_crt_compile_args):
host_link_arg_overrides += ['/NODEFAULTLIB:LIBCMT.lib']
- return self._cook_link_args(host_link_arg_overrides + self.host_compiler.get_crt_link_args(crt_val, buildtype))
+ return self._to_host_flags(host_link_arg_overrides + self.host_compiler.get_crt_link_args(crt_val, buildtype), _Phase.LINKER)
def get_target_link_args(self, target: 'BuildTarget') -> T.List[str]:
- return self._cook_link_args(super().get_target_link_args(target))
+ return self._to_host_flags(super().get_target_link_args(target), _Phase.LINKER)
def get_dependency_compile_args(self, dep: 'Dependency') -> T.List[str]:
return self._to_host_flags(super().get_dependency_compile_args(dep))
def get_dependency_link_args(self, dep: 'Dependency') -> T.List[str]:
- return self._cook_link_args(super().get_dependency_link_args(dep))
+ return self._to_host_flags(super().get_dependency_link_args(dep), _Phase.LINKER)
+
+ def get_ccbin_args(self, options: 'KeyedOptionDictType') -> T.List[str]:
+ key = OptionKey('ccbindir', machine=self.for_machine, lang=self.language)
+ ccbindir = options[key].value
+ if isinstance(ccbindir, str) and ccbindir != '':
+ return [self._shield_nvcc_list_arg('-ccbin='+ccbindir, False)]
+ else:
+ return []
diff --git a/mesonbuild/modules/unstable_cuda.py b/mesonbuild/modules/unstable_cuda.py
index 0a5f031..33df0bd 100644
--- a/mesonbuild/modules/unstable_cuda.py
+++ b/mesonbuild/modules/unstable_cuda.py
@@ -43,7 +43,8 @@ class CudaModule(ExtensionModule):
cuda_version = args[0]
driver_version_table = [
- {'cuda_version': '>=11.2.0', 'windows': '460.89', 'linux': '460.27.04'},
+ {'cuda_version': '>=11.2.1', 'windows': '461.09', 'linux': '460.32.03'},
+ {'cuda_version': '>=11.2.0', 'windows': '460.82', 'linux': '460.27.03'},
{'cuda_version': '>=11.1.1', 'windows': '456.81', 'linux': '455.32'},
{'cuda_version': '>=11.1.0', 'windows': '456.38', 'linux': '455.23'},
{'cuda_version': '>=11.0.3', 'windows': '451.82', 'linux': '450.51.06'},
diff --git a/run_unittests.py b/run_unittests.py
index 8083342..0bd6d4b 100755
--- a/run_unittests.py
+++ b/run_unittests.py
@@ -3769,7 +3769,7 @@ class AllPlatformTests(BasePlatformTests):
env = get_fake_env()
for l in ['cpp', 'cs', 'd', 'java', 'cuda', 'fortran', 'objc', 'objcpp', 'rust']:
try:
- comp = getattr(env, f'detect_{l}_compiler')(MachineChoice.HOST)
+ comp = env.detect_compiler_for(l, MachineChoice.HOST)
with tempfile.TemporaryDirectory() as d:
comp.sanity_check(d, env)
langs.append(l)