diff options
-rw-r--r-- | docs/markdown/Cuda-module.md | 10 | ||||
-rw-r--r-- | mesonbuild/compilers/cuda.py | 152 | ||||
-rw-r--r-- | mesonbuild/modules/unstable_cuda.py | 20 |
3 files changed, 106 insertions, 76 deletions
diff --git a/docs/markdown/Cuda-module.md b/docs/markdown/Cuda-module.md index caa1756..f161eac 100644 --- a/docs/markdown/Cuda-module.md +++ b/docs/markdown/Cuda-module.md @@ -71,6 +71,14 @@ mixed with architecture names or compute capabilities. Their interpretation is: | `'Common'` | Relatively common CCs supported by given NVCC compiler. Generally excludes Tegra and Tesla devices. | | `'Auto'` | The CCs provided by the `detected:` keyword, filtered for support by given NVCC compiler. | +As a special case, when `nvcc_arch_flags()` is invoked with + +- an NVCC `compiler` object `nvcc`, +- `'Auto'` mode and +- no `detected:` keyword, + +Meson uses `nvcc`'s architecture auto-detection results. + The supported architecture names and their corresponding compute capabilities are: @@ -85,7 +93,7 @@ are: | `'Pascal'` | 6.0, 6.1 | | `'Pascal+Tegra'` | 6.2 | | `'Volta'` | 7.0 | -| `'Volta+Tegra'` | 7.2 | +| `'Xavier'` | 7.2 | | `'Turing'` | 7.5 | diff --git a/mesonbuild/compilers/cuda.py b/mesonbuild/compilers/cuda.py index 66dcf33..d1964fd 100644 --- a/mesonbuild/compilers/cuda.py +++ b/mesonbuild/compilers/cuda.py @@ -47,35 +47,97 @@ class CudaCompiler(Compiler): return [] def sanity_check(self, work_dir, environment): - source_name = os.path.join(work_dir, 'sanitycheckcuda.cu') - binary_name = os.path.join(work_dir, 'sanitycheckcuda') - extra_flags = self.get_cross_extra_flags(environment, link=False) - if self.is_cross: - extra_flags += self.get_compile_only_args() - - code = ''' -__global__ void kernel (void) { - -} + mlog.debug('Sanity testing ' + self.get_display_language() + ' compiler:', ' '.join(self.exelist)) + mlog.debug('Is cross compiler: %s.' % str(self.is_cross)) - int main(int argc,char** argv){ + sname = 'sanitycheckcuda.cu' + code = r''' + #include <cuda_runtime.h> + #include <stdio.h> + + __global__ void kernel (void) {} + + int main(void){ + struct cudaDeviceProp prop; + int count, i; + cudaError_t ret = cudaGetDeviceCount(&count); + if(ret != cudaSuccess){ + fprintf(stderr, "%d\n", (int)ret); + }else{ + for(i=0;i<count;i++){ + if(cudaGetDeviceProperties(&prop, i) == cudaSuccess){ + fprintf(stdout, "%d.%d\n", prop.major, prop.minor); + } + } + } + fflush(stderr); + fflush(stdout); return 0; } ''' - + binname = sname.rsplit('.', 1)[0] + binname += '_cross' if self.is_cross else '' + source_name = os.path.join(work_dir, sname) + binary_name = os.path.join(work_dir, binname + '.exe') with open(source_name, 'w') as ofile: ofile.write(code) - pc = subprocess.Popen(self.exelist + extra_flags + [source_name, '-o', binary_name]) - pc.wait() + + # The Sanity Test for CUDA language will serve as both a sanity test + # and a native-build GPU architecture detection test, useful later. + # + # For this second purpose, NVCC has very handy flags, --run and + # --run-args, that allow one to run an application with the + # environment set up properly. Of course, this only works for native + # builds; For cross builds we must still use the exe_wrapper (if any). + self.detected_cc = '' + flags = ['-w', '-cudart', 'static', source_name] + if self.is_cross and self.exe_wrapper is None: + # Linking cross built apps is painful. You can't really + # tell if you should use -nostdlib or not and for example + # on OSX the compiler binary is the same but you need + # a ton of compiler flags to differentiate between + # arm and x86_64. So just compile. + flags += self.get_compile_only_args() + flags += self.get_output_args(binary_name) + + # Compile sanity check + cmdlist = self.exelist + flags + mlog.debug('Sanity check compiler command line: ', ' '.join(cmdlist)) + pc, stdo, stde = Popen_safe(cmdlist, cwd=work_dir) + mlog.debug('Sanity check compile stdout: ') + mlog.debug(stdo) + mlog.debug('-----\nSanity check compile stderr:') + mlog.debug(stde) + mlog.debug('-----') if pc.returncode != 0: - raise EnvironmentException('Cuda compiler %s can not compile programs.' % self.name_string()) + raise EnvironmentException('Compiler {0} can not compile programs.'.format(self.name_string())) + + # Run sanity check (if possible) if self.is_cross: - # Can't check if the binaries run so we have to assume they do - return - pe = subprocess.Popen(binary_name) + if self.exe_wrapper is None: + return + else: + cmdlist = self.exe_wrapper + [binary_name] + else: + cmdlist = self.exelist + ['--run', '"' + binary_name + '"'] + mlog.debug('Sanity check run command line: ', ' '.join(cmdlist)) + pe, stdo, stde = Popen_safe(cmdlist, cwd=work_dir) + mlog.debug('Sanity check run stdout: ') + mlog.debug(stdo) + mlog.debug('-----\nSanity check run stderr:') + mlog.debug(stde) + mlog.debug('-----') pe.wait() if pe.returncode != 0: - raise EnvironmentException('Executables created by Cuda compiler %s are not runnable.' % self.name_string()) + raise EnvironmentException('Executables created by {0} compiler {1} are not runnable.'.format(self.language, self.name_string())) + + # Interpret the result of the sanity test. + # As mentionned above, it is not only a sanity test but also a GPU + # architecture detection test. + if stde == '': + self.detected_cc = stdo + else: + mlog.debug('cudaGetDeviceCount() returned ' + stde) def get_compiler_check_args(self): return super().get_compiler_check_args() + [] @@ -92,56 +154,6 @@ __global__ void kernel (void) { int main () {{ return 0; }}''' return self.compiles(t.format(**fargs), env, extra_args, dependencies) - def sanity_check_impl(self, work_dir, environment, sname, code): - mlog.debug('Sanity testing ' + self.get_display_language() + ' compiler:', ' '.join(self.exelist)) - mlog.debug('Is cross compiler: %s.' % str(self.is_cross)) - - extra_flags = [] - source_name = os.path.join(work_dir, sname) - binname = sname.rsplit('.', 1)[0] - if self.is_cross: - binname += '_cross' - if self.exe_wrapper is None: - # Linking cross built apps is painful. You can't really - # tell if you should use -nostdlib or not and for example - # on OSX the compiler binary is the same but you need - # a ton of compiler flags to differentiate between - # arm and x86_64. So just compile. - extra_flags += self.get_cross_extra_flags(environment, link=False) - extra_flags += self.get_compile_only_args() - else: - extra_flags += self.get_cross_extra_flags(environment, link=True) - # Is a valid executable output for all toolchains and platforms - binname += '.exe' - # Write binary check source - binary_name = os.path.join(work_dir, binname) - with open(source_name, 'w') as ofile: - ofile.write(code) - # Compile sanity check - cmdlist = self.exelist + extra_flags + [source_name] + self.get_output_args(binary_name) - pc, stdo, stde = Popen_safe(cmdlist, cwd=work_dir) - mlog.debug('Sanity check compiler command line:', ' '.join(cmdlist)) - mlog.debug('Sanity check compile stdout:') - mlog.debug(stdo) - mlog.debug('-----\nSanity check compile stderr:') - mlog.debug(stde) - mlog.debug('-----') - if pc.returncode != 0: - raise EnvironmentException('Compiler {0} can not compile programs.'.format(self.name_string())) - # Run sanity check - if self.is_cross: - if self.exe_wrapper is None: - # Can't check if the binaries run so we have to assume they do - return - cmdlist = self.exe_wrapper + [binary_name] - else: - cmdlist = [binary_name] - mlog.debug('Running test binary command: ' + ' '.join(cmdlist)) - pe = subprocess.Popen(cmdlist) - pe.wait() - if pe.returncode != 0: - raise EnvironmentException('Executables created by {0} compiler {1} are not runnable.'.format(self.language, self.name_string())) - @staticmethod def _cook_link_args(args): """ @@ -176,7 +188,7 @@ __global__ void kernel (void) { return cuda_debug_args[is_debug] def get_werror_args(self): - return ['-Werror'] + return ['-Werror=cross-execution-space-call,deprecated-declarations,reorder'] def get_linker_exelist(self): return self.exelist[:] diff --git a/mesonbuild/modules/unstable_cuda.py b/mesonbuild/modules/unstable_cuda.py index 941b15a..1a74973 100644 --- a/mesonbuild/modules/unstable_cuda.py +++ b/mesonbuild/modules/unstable_cuda.py @@ -77,11 +77,19 @@ class CudaModule(ExtensionModule): @staticmethod def _break_arch_string(s): - s = re.sub('[ \t,;]+', ';', s) + s = re.sub('[ \t\r\n,;]+', ';', s) s = s.strip(';').split(';') return s @staticmethod + def _detected_cc_from_compiler(c): + if isinstance(c, CompilerHolder): + c = c.compiler + if isinstance(c, CudaCompiler): + return c.detected_cc + return '' + + @staticmethod def _version_from_compiler(c): if isinstance(c, CompilerHolder): c = c.compiler @@ -97,7 +105,8 @@ class CudaModule(ExtensionModule): if len(args) < 1: raise argerror else: - cuda_version = self._version_from_compiler(args[0]) + compiler = args[0] + cuda_version = self._version_from_compiler(compiler) if cuda_version == 'unknown': raise argerror @@ -108,7 +117,8 @@ class CudaModule(ExtensionModule): raise InvalidArguments('''The special architectures 'All', 'Common' and 'Auto' must appear alone, as a positional argument!''') arch_list = arch_list[0] if len(arch_list) == 1 else arch_list - detected = flatten([kwargs.get('detected', [])]) + detected = kwargs.get('detected', self._detected_cc_from_compiler(compiler)) + detected = flatten([detected]) detected = [self._break_arch_string(a) for a in detected] detected = flatten(detected) if not set(detected).isdisjoint({'All', 'Common', 'Auto'}): @@ -148,7 +158,7 @@ class CudaModule(ExtensionModule): cuda_limit_gpu_architecture = '7.0' # noqa: E221 if version_compare(cuda_version, '>=9.0'): - cuda_known_gpu_architectures += ['Volta', 'Volta+Tegra'] # noqa: E221 + cuda_known_gpu_architectures += ['Volta', 'Xavier'] # noqa: E221 cuda_common_gpu_architectures += ['7.0', '7.0+PTX'] # noqa: E221 cuda_all_gpu_architectures += ['7.0', '7.0+PTX', '7.2', '7.2+PTX'] # noqa: E221 @@ -215,7 +225,7 @@ class CudaModule(ExtensionModule): 'Pascal': (['6.0', '6.1'], ['6.1']), 'Pascal+Tegra': (['6.2'], []), 'Volta': (['7.0'], ['7.0']), - 'Volta+Tegra': (['7.2'], []), + 'Xavier': (['7.2'], []), 'Turing': (['7.5'], ['7.5']), }.get(arch_name, (None, None)) |