1 files changed, 44 insertions, 30 deletions
diff --git a/mesonbuild/dependencies/cuda.py b/mesonbuild/dependencies/cuda.py
index 82bf5ad..cd97127 100644
--- a/mesonbuild/dependencies/cuda.py
+++ b/mesonbuild/dependencies/cuda.py
@@ -11,9 +11,9 @@ from pathlib import Path
 
 from .. import mesonlib
 from .. import mlog
-from ..environment import detect_cpu_family
 from .base import DependencyException, SystemDependency
 from .detect import packages
+from ..mesonlib import LibType
 
 
 if T.TYPE_CHECKING:
@@ -27,8 +27,11 @@ class CudaDependency(SystemDependency):
     supported_languages = ['cpp', 'c', 'cuda'] # see also _default_language
 
     def __init__(self, environment: 'Environment', kwargs: T.Dict[str, T.Any]) -> None:
-        compilers = environment.coredata.compilers[self.get_for_machine_from_kwargs(kwargs)]
+        for_machine = self.get_for_machine_from_kwargs(kwargs)
+        compilers = environment.coredata.compilers[for_machine]
+        machine = environment.machines[for_machine]
         language = self._detect_language(compilers)
+
         if language not in self.supported_languages:
             raise DependencyException(f'Language \'{language}\' is not supported by the CUDA Toolkit. Supported languages are {self.supported_languages}.')
 
@@ -50,16 +53,26 @@ class CudaDependency(SystemDependency):
         if not os.path.isabs(self.cuda_path):
             raise DependencyException(f'CUDA Toolkit path must be absolute, got \'{self.cuda_path}\'.')
 
+        # Cuda target directory relative to cuda path.
+        if machine.is_linux():
+            # E.g. targets/x86_64-linux
+            self.target_path = os.path.join('targets', f'{machine.cpu_family}-{machine.system}')
+        else:
+            self.target_path = '.'
+
         # nvcc already knows where to find the CUDA Toolkit, but if we're compiling
         # a mixed C/C++/CUDA project, we still need to make the include dir searchable
         if self.language != 'cuda' or len(compilers) > 1:
-            self.incdir = os.path.join(self.cuda_path, 'include')
+            self.incdir = os.path.join(self.cuda_path, self.target_path, 'include')
             self.compile_args += [f'-I{self.incdir}']
 
         arch_libdir = self._detect_arch_libdir()
-        self.libdir = os.path.join(self.cuda_path, arch_libdir)
+        self.libdir = os.path.join(self.cuda_path, self.target_path, arch_libdir)
         mlog.debug('CUDA library directory is', mlog.bold(self.libdir))
 
+        if 'static' not in kwargs:
+            self.libtype = LibType.PREFER_STATIC
+
         self.is_found = self._find_requested_libraries()
 
     @classmethod
@@ -211,8 +224,8 @@ class CudaDependency(SystemDependency):
         return '.'.join(version.split('.')[:2])
 
     def _detect_arch_libdir(self) -> str:
-        arch = detect_cpu_family(self.env.coredata.compilers.host)
         machine = self.env.machines[self.for_machine]
+        arch = machine.cpu_family
         msg = '{} architecture is not supported in {} version of the CUDA Toolkit.'
         if machine.is_windows():
             libdirs = {'x86': 'Win32', 'x86_64': 'x64'}
@@ -220,10 +233,7 @@ class CudaDependency(SystemDependency):
                 raise DependencyException(msg.format(arch, 'Windows'))
             return os.path.join('lib', libdirs[arch])
         elif machine.is_linux():
-            libdirs = {'x86_64': 'lib64', 'ppc64': 'lib', 'aarch64': 'lib64', 'loongarch64': 'lib64'}
-            if arch not in libdirs:
-                raise DependencyException(msg.format(arch, 'Linux'))
-            return libdirs[arch]
+            return 'lib'
         elif machine.is_darwin():
             libdirs = {'x86_64': 'lib64'}
             if arch not in libdirs:
@@ -236,13 +246,14 @@ class CudaDependency(SystemDependency):
         all_found = True
 
         for module in self.requested_modules:
-            args = self.clib_compiler.find_library(module, self.env, [self.libdir])
-            if module == 'cudart_static' and self.language != 'cuda':
-                machine = self.env.machines[self.for_machine]
-                if machine.is_linux():
-                    # extracted by running
-                    #   nvcc -v foo.o
-                    args += ['-lrt', '-lpthread', '-ldl']
+            # You should only ever link to libraries inside the cuda tree, nothing outside of it.
+            # For instance, there is a
+            #
+            # - libnvidia-ml.so in stubs/ of the CUDA tree
+            # - libnvidia-ml.so in /usr/lib/ that is provided by the nvidia drivers
+            #
+            # Users should never link to the latter, since its ABI may change.
+            args = self.clib_compiler.find_library(module, self.env, [self.libdir, os.path.join(self.libdir, 'stubs')], self.libtype, ignore_system_dirs=True)
 
             if args is None:
                 self._report_dependency_error(f'Couldn\'t find requested CUDA module \'{module}\'')
@@ -284,23 +295,26 @@ class CudaDependency(SystemDependency):
         return candidates
 
     def get_link_args(self, language: T.Optional[str] = None, raw: bool = False) -> T.List[str]:
+        # when using nvcc to link, we should instead use the native driver options
+        REWRITE_MODULES = {
+            'cudart': ['-cudart', 'shared'],
+            'cudart_static': ['-cudart', 'static'],
+            'cudadevrt': ['-cudadevrt'],
+        }
+
         args: T.List[str] = []
         for lib in self.requested_modules:
             link_args = self.lib_modules[lib]
-            # Turn canonical arguments like
-            #   /opt/cuda/lib64/libcublas.so
-            # back into
-            #   -lcublas
-            # since this is how CUDA modules were passed to nvcc since time immemorial
-            if language == 'cuda':
-                if lib in frozenset(['cudart', 'cudart_static']):
-                    # nvcc always links these unconditionally
-                    mlog.debug(f'Not adding \'{lib}\' to dependency, since nvcc will link it implicitly')
-                    link_args = []
-                elif link_args and link_args[0].startswith(self.libdir):
-                    # module included with CUDA, nvcc knows how to find these itself
-                    mlog.debug(f'CUDA module \'{lib}\' found in CUDA libdir')
-                    link_args = ['-l' + lib]
+            if language == 'cuda' and lib in REWRITE_MODULES:
+                link_args = REWRITE_MODULES[lib]
+                mlog.debug(f'Rewriting module \'{lib}\' to \'{link_args}\'')
+            elif lib == 'cudart_static':
+                machine = self.env.machines[self.for_machine]
+                if machine.is_linux():
+                    # extracted by running
+                    #   nvcc -v foo.o
+                    link_args += ['-lrt', '-lpthread', '-ldl']
+
             args += link_args
 
         return args