aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--docs/markdown/Cuda-module.md10
-rw-r--r--docs/markdown/Reference-manual.md3
-rw-r--r--mesonbuild/backend/vs2010backend.py18
-rw-r--r--mesonbuild/backend/xcodebackend.py2
-rw-r--r--mesonbuild/compilers/cuda.py152
-rw-r--r--mesonbuild/modules/unstable_cuda.py20
6 files changed, 124 insertions, 81 deletions
diff --git a/docs/markdown/Cuda-module.md b/docs/markdown/Cuda-module.md
index caa1756..f161eac 100644
--- a/docs/markdown/Cuda-module.md
+++ b/docs/markdown/Cuda-module.md
@@ -71,6 +71,14 @@ mixed with architecture names or compute capabilities. Their interpretation is:
| `'Common'` | Relatively common CCs supported by given NVCC compiler. Generally excludes Tegra and Tesla devices. |
| `'Auto'` | The CCs provided by the `detected:` keyword, filtered for support by given NVCC compiler. |
+As a special case, when `nvcc_arch_flags()` is invoked with
+
+- an NVCC `compiler` object `nvcc`,
+- `'Auto'` mode and
+- no `detected:` keyword,
+
+Meson uses `nvcc`'s architecture auto-detection results.
+
The supported architecture names and their corresponding compute capabilities
are:
@@ -85,7 +93,7 @@ are:
| `'Pascal'` | 6.0, 6.1 |
| `'Pascal+Tegra'` | 6.2 |
| `'Volta'` | 7.0 |
-| `'Volta+Tegra'` | 7.2 |
+| `'Xavier'` | 7.2 |
| `'Turing'` | 7.5 |
diff --git a/docs/markdown/Reference-manual.md b/docs/markdown/Reference-manual.md
index adaffe7..6c3b3ad 100644
--- a/docs/markdown/Reference-manual.md
+++ b/docs/markdown/Reference-manual.md
@@ -2256,7 +2256,8 @@ sample piece of code with [`compiler.run()`](#compiler-object) or
[`run_command()`](#run_command). It has the following methods:
- `compiled()` if true, the compilation succeeded, if false it did not
- and the other methods return unspecified data
+ and the other methods return unspecified data. This is only available
+ for `compiler.run()` results.
- `returncode()` the return code of executing the compiled binary
- `stderr()` the standard error produced when the command was run
- `stdout()` the standard out produced when the command was run
diff --git a/mesonbuild/backend/vs2010backend.py b/mesonbuild/backend/vs2010backend.py
index 8827f20..2a50dd2 100644
--- a/mesonbuild/backend/vs2010backend.py
+++ b/mesonbuild/backend/vs2010backend.py
@@ -547,6 +547,10 @@ class Vs2010Backend(backends.Backend):
extra_paths=extra_paths,
capture=ofilenames[0] if target.capture else None)
wrapper_cmd = self.environment.get_build_command() + ['--internal', 'exe', exe_data]
+ if target.build_always_stale:
+ # Use a nonexistent file to always consider the target out-of-date.
+ ofilenames += [self.nonexistent_file(os.path.join(self.environment.get_scratch_dir(),
+ 'outofdate.file'))]
self.add_custom_build(root, 'custom_target', ' '.join(self.quote_arguments(wrapper_cmd)),
deps=[exe_data] + srcs + depend_files, outputs=ofilenames)
ET.SubElement(root, 'Import', Project=r'$(VCTargetsPath)\Microsoft.Cpp.targets')
@@ -1423,14 +1427,20 @@ if %%errorlevel%% neq 0 goto :VCEnd'''
ET.SubElement(custombuild, 'Command').text = cmd_templ % command
if not outputs:
# Use a nonexistent file to always consider the target out-of-date.
- output_file = os.path.join(self.environment.get_scratch_dir(), 'outofdate.file')
- while os.path.exists(output_file):
- output_file += '0'
- outputs = [output_file]
+ outputs = [self.nonexistent_file(os.path.join(self.environment.get_scratch_dir(),
+ 'outofdate.file'))]
ET.SubElement(custombuild, 'Outputs').text = ';'.join(outputs)
if deps:
ET.SubElement(custombuild, 'AdditionalInputs').text = ';'.join(deps)
+ @staticmethod
+ def nonexistent_file(prefix):
+ i = 0
+ file = prefix
+ while os.path.exists(file):
+ file = '%s%d' % (prefix, i)
+ return file
+
def generate_debug_information(self, link):
# valid values for vs2015 is 'false', 'true', 'DebugFastLink'
ET.SubElement(link, 'GenerateDebugInformation').text = 'true'
diff --git a/mesonbuild/backend/xcodebackend.py b/mesonbuild/backend/xcodebackend.py
index a550d91..990b824 100644
--- a/mesonbuild/backend/xcodebackend.py
+++ b/mesonbuild/backend/xcodebackend.py
@@ -733,6 +733,8 @@ class XCodeBackend(backends.Backend):
else:
product_name = target.get_basename()
ldargs += target.link_args
+ for dep in target.get_external_deps():
+ ldargs += dep.get_link_args()
ldstr = ' '.join(ldargs)
valid = self.buildconfmap[target_name][buildtype]
langargs = {}
diff --git a/mesonbuild/compilers/cuda.py b/mesonbuild/compilers/cuda.py
index 66dcf33..d1964fd 100644
--- a/mesonbuild/compilers/cuda.py
+++ b/mesonbuild/compilers/cuda.py
@@ -47,35 +47,97 @@ class CudaCompiler(Compiler):
return []
def sanity_check(self, work_dir, environment):
- source_name = os.path.join(work_dir, 'sanitycheckcuda.cu')
- binary_name = os.path.join(work_dir, 'sanitycheckcuda')
- extra_flags = self.get_cross_extra_flags(environment, link=False)
- if self.is_cross:
- extra_flags += self.get_compile_only_args()
-
- code = '''
-__global__ void kernel (void) {
-
-}
+ mlog.debug('Sanity testing ' + self.get_display_language() + ' compiler:', ' '.join(self.exelist))
+ mlog.debug('Is cross compiler: %s.' % str(self.is_cross))
- int main(int argc,char** argv){
+ sname = 'sanitycheckcuda.cu'
+ code = r'''
+ #include <cuda_runtime.h>
+ #include <stdio.h>
+
+ __global__ void kernel (void) {}
+
+ int main(void){
+ struct cudaDeviceProp prop;
+ int count, i;
+ cudaError_t ret = cudaGetDeviceCount(&count);
+ if(ret != cudaSuccess){
+ fprintf(stderr, "%d\n", (int)ret);
+ }else{
+ for(i=0;i<count;i++){
+ if(cudaGetDeviceProperties(&prop, i) == cudaSuccess){
+ fprintf(stdout, "%d.%d\n", prop.major, prop.minor);
+ }
+ }
+ }
+ fflush(stderr);
+ fflush(stdout);
return 0;
}
'''
-
+ binname = sname.rsplit('.', 1)[0]
+ binname += '_cross' if self.is_cross else ''
+ source_name = os.path.join(work_dir, sname)
+ binary_name = os.path.join(work_dir, binname + '.exe')
with open(source_name, 'w') as ofile:
ofile.write(code)
- pc = subprocess.Popen(self.exelist + extra_flags + [source_name, '-o', binary_name])
- pc.wait()
+
+ # The Sanity Test for CUDA language will serve as both a sanity test
+ # and a native-build GPU architecture detection test, useful later.
+ #
+ # For this second purpose, NVCC has very handy flags, --run and
+ # --run-args, that allow one to run an application with the
+ # environment set up properly. Of course, this only works for native
+ # builds; For cross builds we must still use the exe_wrapper (if any).
+ self.detected_cc = ''
+ flags = ['-w', '-cudart', 'static', source_name]
+ if self.is_cross and self.exe_wrapper is None:
+ # Linking cross built apps is painful. You can't really
+ # tell if you should use -nostdlib or not and for example
+ # on OSX the compiler binary is the same but you need
+ # a ton of compiler flags to differentiate between
+ # arm and x86_64. So just compile.
+ flags += self.get_compile_only_args()
+ flags += self.get_output_args(binary_name)
+
+ # Compile sanity check
+ cmdlist = self.exelist + flags
+ mlog.debug('Sanity check compiler command line: ', ' '.join(cmdlist))
+ pc, stdo, stde = Popen_safe(cmdlist, cwd=work_dir)
+ mlog.debug('Sanity check compile stdout: ')
+ mlog.debug(stdo)
+ mlog.debug('-----\nSanity check compile stderr:')
+ mlog.debug(stde)
+ mlog.debug('-----')
if pc.returncode != 0:
- raise EnvironmentException('Cuda compiler %s can not compile programs.' % self.name_string())
+ raise EnvironmentException('Compiler {0} can not compile programs.'.format(self.name_string()))
+
+ # Run sanity check (if possible)
if self.is_cross:
- # Can't check if the binaries run so we have to assume they do
- return
- pe = subprocess.Popen(binary_name)
+ if self.exe_wrapper is None:
+ return
+ else:
+ cmdlist = self.exe_wrapper + [binary_name]
+ else:
+ cmdlist = self.exelist + ['--run', '"' + binary_name + '"']
+ mlog.debug('Sanity check run command line: ', ' '.join(cmdlist))
+ pe, stdo, stde = Popen_safe(cmdlist, cwd=work_dir)
+ mlog.debug('Sanity check run stdout: ')
+ mlog.debug(stdo)
+ mlog.debug('-----\nSanity check run stderr:')
+ mlog.debug(stde)
+ mlog.debug('-----')
pe.wait()
if pe.returncode != 0:
- raise EnvironmentException('Executables created by Cuda compiler %s are not runnable.' % self.name_string())
+ raise EnvironmentException('Executables created by {0} compiler {1} are not runnable.'.format(self.language, self.name_string()))
+
+ # Interpret the result of the sanity test.
+ # As mentionned above, it is not only a sanity test but also a GPU
+ # architecture detection test.
+ if stde == '':
+ self.detected_cc = stdo
+ else:
+ mlog.debug('cudaGetDeviceCount() returned ' + stde)
def get_compiler_check_args(self):
return super().get_compiler_check_args() + []
@@ -92,56 +154,6 @@ __global__ void kernel (void) {
int main () {{ return 0; }}'''
return self.compiles(t.format(**fargs), env, extra_args, dependencies)
- def sanity_check_impl(self, work_dir, environment, sname, code):
- mlog.debug('Sanity testing ' + self.get_display_language() + ' compiler:', ' '.join(self.exelist))
- mlog.debug('Is cross compiler: %s.' % str(self.is_cross))
-
- extra_flags = []
- source_name = os.path.join(work_dir, sname)
- binname = sname.rsplit('.', 1)[0]
- if self.is_cross:
- binname += '_cross'
- if self.exe_wrapper is None:
- # Linking cross built apps is painful. You can't really
- # tell if you should use -nostdlib or not and for example
- # on OSX the compiler binary is the same but you need
- # a ton of compiler flags to differentiate between
- # arm and x86_64. So just compile.
- extra_flags += self.get_cross_extra_flags(environment, link=False)
- extra_flags += self.get_compile_only_args()
- else:
- extra_flags += self.get_cross_extra_flags(environment, link=True)
- # Is a valid executable output for all toolchains and platforms
- binname += '.exe'
- # Write binary check source
- binary_name = os.path.join(work_dir, binname)
- with open(source_name, 'w') as ofile:
- ofile.write(code)
- # Compile sanity check
- cmdlist = self.exelist + extra_flags + [source_name] + self.get_output_args(binary_name)
- pc, stdo, stde = Popen_safe(cmdlist, cwd=work_dir)
- mlog.debug('Sanity check compiler command line:', ' '.join(cmdlist))
- mlog.debug('Sanity check compile stdout:')
- mlog.debug(stdo)
- mlog.debug('-----\nSanity check compile stderr:')
- mlog.debug(stde)
- mlog.debug('-----')
- if pc.returncode != 0:
- raise EnvironmentException('Compiler {0} can not compile programs.'.format(self.name_string()))
- # Run sanity check
- if self.is_cross:
- if self.exe_wrapper is None:
- # Can't check if the binaries run so we have to assume they do
- return
- cmdlist = self.exe_wrapper + [binary_name]
- else:
- cmdlist = [binary_name]
- mlog.debug('Running test binary command: ' + ' '.join(cmdlist))
- pe = subprocess.Popen(cmdlist)
- pe.wait()
- if pe.returncode != 0:
- raise EnvironmentException('Executables created by {0} compiler {1} are not runnable.'.format(self.language, self.name_string()))
-
@staticmethod
def _cook_link_args(args):
"""
@@ -176,7 +188,7 @@ __global__ void kernel (void) {
return cuda_debug_args[is_debug]
def get_werror_args(self):
- return ['-Werror']
+ return ['-Werror=cross-execution-space-call,deprecated-declarations,reorder']
def get_linker_exelist(self):
return self.exelist[:]
diff --git a/mesonbuild/modules/unstable_cuda.py b/mesonbuild/modules/unstable_cuda.py
index 941b15a..1a74973 100644
--- a/mesonbuild/modules/unstable_cuda.py
+++ b/mesonbuild/modules/unstable_cuda.py
@@ -77,11 +77,19 @@ class CudaModule(ExtensionModule):
@staticmethod
def _break_arch_string(s):
- s = re.sub('[ \t,;]+', ';', s)
+ s = re.sub('[ \t\r\n,;]+', ';', s)
s = s.strip(';').split(';')
return s
@staticmethod
+ def _detected_cc_from_compiler(c):
+ if isinstance(c, CompilerHolder):
+ c = c.compiler
+ if isinstance(c, CudaCompiler):
+ return c.detected_cc
+ return ''
+
+ @staticmethod
def _version_from_compiler(c):
if isinstance(c, CompilerHolder):
c = c.compiler
@@ -97,7 +105,8 @@ class CudaModule(ExtensionModule):
if len(args) < 1:
raise argerror
else:
- cuda_version = self._version_from_compiler(args[0])
+ compiler = args[0]
+ cuda_version = self._version_from_compiler(compiler)
if cuda_version == 'unknown':
raise argerror
@@ -108,7 +117,8 @@ class CudaModule(ExtensionModule):
raise InvalidArguments('''The special architectures 'All', 'Common' and 'Auto' must appear alone, as a positional argument!''')
arch_list = arch_list[0] if len(arch_list) == 1 else arch_list
- detected = flatten([kwargs.get('detected', [])])
+ detected = kwargs.get('detected', self._detected_cc_from_compiler(compiler))
+ detected = flatten([detected])
detected = [self._break_arch_string(a) for a in detected]
detected = flatten(detected)
if not set(detected).isdisjoint({'All', 'Common', 'Auto'}):
@@ -148,7 +158,7 @@ class CudaModule(ExtensionModule):
cuda_limit_gpu_architecture = '7.0' # noqa: E221
if version_compare(cuda_version, '>=9.0'):
- cuda_known_gpu_architectures += ['Volta', 'Volta+Tegra'] # noqa: E221
+ cuda_known_gpu_architectures += ['Volta', 'Xavier'] # noqa: E221
cuda_common_gpu_architectures += ['7.0', '7.0+PTX'] # noqa: E221
cuda_all_gpu_architectures += ['7.0', '7.0+PTX', '7.2', '7.2+PTX'] # noqa: E221
@@ -215,7 +225,7 @@ class CudaModule(ExtensionModule):
'Pascal': (['6.0', '6.1'], ['6.1']),
'Pascal+Tegra': (['6.2'], []),
'Volta': (['7.0'], ['7.0']),
- 'Volta+Tegra': (['7.2'], []),
+ 'Xavier': (['7.2'], []),
'Turing': (['7.5'], ['7.5']),
}.get(arch_name, (None, None))