aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJussi Pakkanen <jpakkane@gmail.com>2019-02-24 23:48:28 +0200
committerGitHub <noreply@github.com>2019-02-24 23:48:28 +0200
commit41fb5c2960b678ec7722d5f9b3c555757ba8a6bd (patch)
tree7643436b63bdf4773a3e0b3a9d012802ec01aaac
parent5b53335724c11e4443ac62bfbaea90881323d2b1 (diff)
parent104397a4293f78d3cbdd84f380cefb84ca54ec99 (diff)
downloadmeson-41fb5c2960b678ec7722d5f9b3c555757ba8a6bd.zip
meson-41fb5c2960b678ec7722d5f9b3c555757ba8a6bd.tar.gz
meson-41fb5c2960b678ec7722d5f9b3c555757ba8a6bd.tar.bz2
Merge pull request #4972 from obilaniu/cudafixes
CUDA fixes
-rw-r--r--docs/markdown/Cuda-module.md10
-rw-r--r--mesonbuild/compilers/cuda.py152
-rw-r--r--mesonbuild/modules/unstable_cuda.py20
3 files changed, 106 insertions, 76 deletions
diff --git a/docs/markdown/Cuda-module.md b/docs/markdown/Cuda-module.md
index caa1756..f161eac 100644
--- a/docs/markdown/Cuda-module.md
+++ b/docs/markdown/Cuda-module.md
@@ -71,6 +71,14 @@ mixed with architecture names or compute capabilities. Their interpretation is:
| `'Common'` | Relatively common CCs supported by given NVCC compiler. Generally excludes Tegra and Tesla devices. |
| `'Auto'` | The CCs provided by the `detected:` keyword, filtered for support by given NVCC compiler. |
+As a special case, when `nvcc_arch_flags()` is invoked with
+
+- an NVCC `compiler` object `nvcc`,
+- `'Auto'` mode and
+- no `detected:` keyword,
+
+Meson uses `nvcc`'s architecture auto-detection results.
+
The supported architecture names and their corresponding compute capabilities
are:
@@ -85,7 +93,7 @@ are:
| `'Pascal'` | 6.0, 6.1 |
| `'Pascal+Tegra'` | 6.2 |
| `'Volta'` | 7.0 |
-| `'Volta+Tegra'` | 7.2 |
+| `'Xavier'` | 7.2 |
| `'Turing'` | 7.5 |
diff --git a/mesonbuild/compilers/cuda.py b/mesonbuild/compilers/cuda.py
index 66dcf33..d1964fd 100644
--- a/mesonbuild/compilers/cuda.py
+++ b/mesonbuild/compilers/cuda.py
@@ -47,35 +47,97 @@ class CudaCompiler(Compiler):
return []
def sanity_check(self, work_dir, environment):
- source_name = os.path.join(work_dir, 'sanitycheckcuda.cu')
- binary_name = os.path.join(work_dir, 'sanitycheckcuda')
- extra_flags = self.get_cross_extra_flags(environment, link=False)
- if self.is_cross:
- extra_flags += self.get_compile_only_args()
-
- code = '''
-__global__ void kernel (void) {
-
-}
+ mlog.debug('Sanity testing ' + self.get_display_language() + ' compiler:', ' '.join(self.exelist))
+ mlog.debug('Is cross compiler: %s.' % str(self.is_cross))
- int main(int argc,char** argv){
+ sname = 'sanitycheckcuda.cu'
+ code = r'''
+ #include <cuda_runtime.h>
+ #include <stdio.h>
+
+ __global__ void kernel (void) {}
+
+ int main(void){
+ struct cudaDeviceProp prop;
+ int count, i;
+ cudaError_t ret = cudaGetDeviceCount(&count);
+ if(ret != cudaSuccess){
+ fprintf(stderr, "%d\n", (int)ret);
+ }else{
+ for(i=0;i<count;i++){
+ if(cudaGetDeviceProperties(&prop, i) == cudaSuccess){
+ fprintf(stdout, "%d.%d\n", prop.major, prop.minor);
+ }
+ }
+ }
+ fflush(stderr);
+ fflush(stdout);
return 0;
}
'''
-
+ binname = sname.rsplit('.', 1)[0]
+ binname += '_cross' if self.is_cross else ''
+ source_name = os.path.join(work_dir, sname)
+ binary_name = os.path.join(work_dir, binname + '.exe')
with open(source_name, 'w') as ofile:
ofile.write(code)
- pc = subprocess.Popen(self.exelist + extra_flags + [source_name, '-o', binary_name])
- pc.wait()
+
+ # The Sanity Test for CUDA language will serve as both a sanity test
+ # and a native-build GPU architecture detection test, useful later.
+ #
+ # For this second purpose, NVCC has very handy flags, --run and
+ # --run-args, that allow one to run an application with the
+ # environment set up properly. Of course, this only works for native
+ # builds; For cross builds we must still use the exe_wrapper (if any).
+ self.detected_cc = ''
+ flags = ['-w', '-cudart', 'static', source_name]
+ if self.is_cross and self.exe_wrapper is None:
+ # Linking cross built apps is painful. You can't really
+ # tell if you should use -nostdlib or not and for example
+ # on OSX the compiler binary is the same but you need
+ # a ton of compiler flags to differentiate between
+ # arm and x86_64. So just compile.
+ flags += self.get_compile_only_args()
+ flags += self.get_output_args(binary_name)
+
+ # Compile sanity check
+ cmdlist = self.exelist + flags
+ mlog.debug('Sanity check compiler command line: ', ' '.join(cmdlist))
+ pc, stdo, stde = Popen_safe(cmdlist, cwd=work_dir)
+ mlog.debug('Sanity check compile stdout: ')
+ mlog.debug(stdo)
+ mlog.debug('-----\nSanity check compile stderr:')
+ mlog.debug(stde)
+ mlog.debug('-----')
if pc.returncode != 0:
- raise EnvironmentException('Cuda compiler %s can not compile programs.' % self.name_string())
+ raise EnvironmentException('Compiler {0} can not compile programs.'.format(self.name_string()))
+
+ # Run sanity check (if possible)
if self.is_cross:
- # Can't check if the binaries run so we have to assume they do
- return
- pe = subprocess.Popen(binary_name)
+ if self.exe_wrapper is None:
+ return
+ else:
+ cmdlist = self.exe_wrapper + [binary_name]
+ else:
+ cmdlist = self.exelist + ['--run', '"' + binary_name + '"']
+ mlog.debug('Sanity check run command line: ', ' '.join(cmdlist))
+ pe, stdo, stde = Popen_safe(cmdlist, cwd=work_dir)
+ mlog.debug('Sanity check run stdout: ')
+ mlog.debug(stdo)
+ mlog.debug('-----\nSanity check run stderr:')
+ mlog.debug(stde)
+ mlog.debug('-----')
pe.wait()
if pe.returncode != 0:
- raise EnvironmentException('Executables created by Cuda compiler %s are not runnable.' % self.name_string())
+ raise EnvironmentException('Executables created by {0} compiler {1} are not runnable.'.format(self.language, self.name_string()))
+
+ # Interpret the result of the sanity test.
+ # As mentionned above, it is not only a sanity test but also a GPU
+ # architecture detection test.
+ if stde == '':
+ self.detected_cc = stdo
+ else:
+ mlog.debug('cudaGetDeviceCount() returned ' + stde)
def get_compiler_check_args(self):
return super().get_compiler_check_args() + []
@@ -92,56 +154,6 @@ __global__ void kernel (void) {
int main () {{ return 0; }}'''
return self.compiles(t.format(**fargs), env, extra_args, dependencies)
- def sanity_check_impl(self, work_dir, environment, sname, code):
- mlog.debug('Sanity testing ' + self.get_display_language() + ' compiler:', ' '.join(self.exelist))
- mlog.debug('Is cross compiler: %s.' % str(self.is_cross))
-
- extra_flags = []
- source_name = os.path.join(work_dir, sname)
- binname = sname.rsplit('.', 1)[0]
- if self.is_cross:
- binname += '_cross'
- if self.exe_wrapper is None:
- # Linking cross built apps is painful. You can't really
- # tell if you should use -nostdlib or not and for example
- # on OSX the compiler binary is the same but you need
- # a ton of compiler flags to differentiate between
- # arm and x86_64. So just compile.
- extra_flags += self.get_cross_extra_flags(environment, link=False)
- extra_flags += self.get_compile_only_args()
- else:
- extra_flags += self.get_cross_extra_flags(environment, link=True)
- # Is a valid executable output for all toolchains and platforms
- binname += '.exe'
- # Write binary check source
- binary_name = os.path.join(work_dir, binname)
- with open(source_name, 'w') as ofile:
- ofile.write(code)
- # Compile sanity check
- cmdlist = self.exelist + extra_flags + [source_name] + self.get_output_args(binary_name)
- pc, stdo, stde = Popen_safe(cmdlist, cwd=work_dir)
- mlog.debug('Sanity check compiler command line:', ' '.join(cmdlist))
- mlog.debug('Sanity check compile stdout:')
- mlog.debug(stdo)
- mlog.debug('-----\nSanity check compile stderr:')
- mlog.debug(stde)
- mlog.debug('-----')
- if pc.returncode != 0:
- raise EnvironmentException('Compiler {0} can not compile programs.'.format(self.name_string()))
- # Run sanity check
- if self.is_cross:
- if self.exe_wrapper is None:
- # Can't check if the binaries run so we have to assume they do
- return
- cmdlist = self.exe_wrapper + [binary_name]
- else:
- cmdlist = [binary_name]
- mlog.debug('Running test binary command: ' + ' '.join(cmdlist))
- pe = subprocess.Popen(cmdlist)
- pe.wait()
- if pe.returncode != 0:
- raise EnvironmentException('Executables created by {0} compiler {1} are not runnable.'.format(self.language, self.name_string()))
-
@staticmethod
def _cook_link_args(args):
"""
@@ -176,7 +188,7 @@ __global__ void kernel (void) {
return cuda_debug_args[is_debug]
def get_werror_args(self):
- return ['-Werror']
+ return ['-Werror=cross-execution-space-call,deprecated-declarations,reorder']
def get_linker_exelist(self):
return self.exelist[:]
diff --git a/mesonbuild/modules/unstable_cuda.py b/mesonbuild/modules/unstable_cuda.py
index 941b15a..1a74973 100644
--- a/mesonbuild/modules/unstable_cuda.py
+++ b/mesonbuild/modules/unstable_cuda.py
@@ -77,11 +77,19 @@ class CudaModule(ExtensionModule):
@staticmethod
def _break_arch_string(s):
- s = re.sub('[ \t,;]+', ';', s)
+ s = re.sub('[ \t\r\n,;]+', ';', s)
s = s.strip(';').split(';')
return s
@staticmethod
+ def _detected_cc_from_compiler(c):
+ if isinstance(c, CompilerHolder):
+ c = c.compiler
+ if isinstance(c, CudaCompiler):
+ return c.detected_cc
+ return ''
+
+ @staticmethod
def _version_from_compiler(c):
if isinstance(c, CompilerHolder):
c = c.compiler
@@ -97,7 +105,8 @@ class CudaModule(ExtensionModule):
if len(args) < 1:
raise argerror
else:
- cuda_version = self._version_from_compiler(args[0])
+ compiler = args[0]
+ cuda_version = self._version_from_compiler(compiler)
if cuda_version == 'unknown':
raise argerror
@@ -108,7 +117,8 @@ class CudaModule(ExtensionModule):
raise InvalidArguments('''The special architectures 'All', 'Common' and 'Auto' must appear alone, as a positional argument!''')
arch_list = arch_list[0] if len(arch_list) == 1 else arch_list
- detected = flatten([kwargs.get('detected', [])])
+ detected = kwargs.get('detected', self._detected_cc_from_compiler(compiler))
+ detected = flatten([detected])
detected = [self._break_arch_string(a) for a in detected]
detected = flatten(detected)
if not set(detected).isdisjoint({'All', 'Common', 'Auto'}):
@@ -148,7 +158,7 @@ class CudaModule(ExtensionModule):
cuda_limit_gpu_architecture = '7.0' # noqa: E221
if version_compare(cuda_version, '>=9.0'):
- cuda_known_gpu_architectures += ['Volta', 'Volta+Tegra'] # noqa: E221
+ cuda_known_gpu_architectures += ['Volta', 'Xavier'] # noqa: E221
cuda_common_gpu_architectures += ['7.0', '7.0+PTX'] # noqa: E221
cuda_all_gpu_architectures += ['7.0', '7.0+PTX', '7.2', '7.2+PTX'] # noqa: E221
@@ -215,7 +225,7 @@ class CudaModule(ExtensionModule):
'Pascal': (['6.0', '6.1'], ['6.1']),
'Pascal+Tegra': (['6.2'], []),
'Volta': (['7.0'], ['7.0']),
- 'Volta+Tegra': (['7.2'], []),
+ 'Xavier': (['7.2'], []),
'Turing': (['7.5'], ['7.5']),
}.get(arch_name, (None, None))