backend/ninja: use a two step process for dependency scanning

This splits the scanner into two discrete steps, one that scans the source files, and one that that reads in the dependency information and produces a dyndep. The scanner uses the JSON format from https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2022/p1689r5.html, which is the same format the MSVC and Clang use for C++ modules scanning. This will allow us to more easily move to using MSVC and clang-scan-deps when possible. As an added bonus, this correctly tracks dependencies across TU and Target boundaries, unlike the previous implementation, which assumed that if it couldn't find a provider that everything was good, but could run into issues. Because of that limitation Fortran code had to fully depend on all of it's dependencies, transitive or not. Now, when using the dep scanner, we can remove that restriction, allowing more parallelism.
author: Dylan Baker <dylan@pnwbakers.com> 2024-03-11 12:35:25 -0700
committer: Dylan Baker <dylan@pnwbakers.com> 2025-04-03 12:27:07 -0700
commit: ea344be9b017042fa206cb12e9fee95c1c22fae5 (patch)
tree: c2e064ea5ffd4ff7e6dae5d8a333af1172b6db94
parent: cc815c4bcac055721ae359cbc757f50c10ed54ed (diff)
download: meson-ea344be9b017042fa206cb12e9fee95c1c22fae5.zip
meson-ea344be9b017042fa206cb12e9fee95c1c22fae5.tar.gz
meson-ea344be9b017042fa206cb12e9fee95c1c22fae5.tar.bz2
3 files changed, 254 insertions, 67 deletions
diff --git a/mesonbuild/backend/ninjabackend.py b/mesonbuild/backend/ninjabackend.py
index a75befd..00cf4ad 100644
--- a/mesonbuild/backend/ninjabackend.py
+++ b/mesonbuild/backend/ninjabackend.py
@@ -1030,7 +1030,12 @@ class NinjaBackend(backends.Backend):
         obj_targets = [t for t in od if t.uses_fortran()]
         obj_list.extend(o)
 
-        fortran_order_deps = [File(True, *os.path.split(self.get_target_filename(t))) for t in obj_targets]
+        # We don't need this order dep if we're using dyndeps, as the
+        # depscanner will handle this for us, which produces a better dependency
+        # graph
+        fortran_order_deps: T.List[File] = []
+        if not self.use_dyndeps_for_fortran():
+            fortran_order_deps = [File(True, *os.path.split(self.get_target_filename(t))) for t in obj_targets]
         fortran_inc_args: T.List[str] = []
         if target.uses_fortran():
             fortran_inc_args = mesonlib.listify([target.compilers['fortran'].get_include_args(
@@ -1144,7 +1149,7 @@ class NinjaBackend(backends.Backend):
         if not self.should_use_dyndeps_for_target(target):
             return
         self._uses_dyndeps = True
-        depscan_file = self.get_dep_scan_file_for(target)
+        json_file, depscan_file = self.get_dep_scan_file_for(target)
         pickle_base = target.name + '.dat'
         pickle_file = os.path.join(self.get_target_private_dir(target), pickle_base).replace('\\', '/')
         pickle_abs = os.path.join(self.get_target_private_dir_abs(target), pickle_base).replace('\\', '/')
@@ -1164,20 +1169,25 @@ class NinjaBackend(backends.Backend):
             with open(pickle_abs, 'wb') as p:
                 pickle.dump(scaninfo, p)
 
-        elem = NinjaBuildElement(self.all_outputs, depscan_file, rule_name, pickle_file)
+        elem = NinjaBuildElement(self.all_outputs, json_file, rule_name, pickle_file)
         # A full dependency is required on all scanned sources, if any of them
         # are updated we need to rescan, as they may have changed the modules
         # they use or export.
         for s in scan_sources:
             elem.deps.add(s[0])
-        # We need a full dependency on the output depfiles of other targets. If
-        # they change we need to completely
+        elem.orderdeps.update(object_deps)
+        elem.add_item('name', target.name)
+        self.add_build(elem)
+
+        infiles: T.Set[str] = set()
         for t in target.get_all_linked_targets():
             if self.should_use_dyndeps_for_target(t):
-                elem.deps.add(os.path.join(self.get_target_dir(t), t.get_filename()))
-        elem.deps.update({os.path.join(self.get_target_dir(t), t.get_filename())
-                          for t in self.flatten_object_list(target)[1]})
-        elem.orderdeps.update(object_deps)
+                infiles.add(self.get_dep_scan_file_for(t)[0])
+        _, od = self.flatten_object_list(target)
+        infiles.update({self.get_dep_scan_file_for(t)[0] for t in od if t.uses_fortran()})
+
+        elem = NinjaBuildElement(self.all_outputs, depscan_file, 'depaccumulate', [json_file] + sorted(infiles))
+        elem.add_item('name', target.name)
         self.add_build(elem)
 
     def select_sources_to_scan(self, compiled_sources: T.List[str],
@@ -2638,10 +2648,19 @@ https://gcc.gnu.org/bugzilla/show_bug.cgi?id=47485'''))
         if rulename in self.ruledict:
             # Scanning command is the same for native and cross compilation.
             return
+
         command = self.environment.get_build_command() + \
             ['--internal', 'depscan']
         args = ['$picklefile', '$out', '$in']
-        description = 'Scanning modules'
+        description = 'Scanning target $name for modules'
+        rule = NinjaRule(rulename, command, args, description)
+        self.add_rule(rule)
+
+        rulename = 'depaccumulate'
+        command = self.environment.get_build_command() + \
+            ['--internal', 'depaccumulate']
+        args = ['$out', '$in']
+        description = 'Generating dynamic dependency information for target $name'
         rule = NinjaRule(rulename, command, args, description)
         self.add_rule(rule)
 
@@ -3160,8 +3179,9 @@ https://gcc.gnu.org/bugzilla/show_bug.cgi?id=47485'''))
                 d = os.path.join(self.get_target_private_dir(target), d)
             element.add_orderdep(d)
         element.add_dep(pch_dep)
-        for i in self.get_fortran_module_deps(target, compiler):
-            element.add_dep(i)
+        if not self.use_dyndeps_for_fortran():
+            for i in self.get_fortran_module_deps(target, compiler):
+                element.add_dep(i)
         if dep_file:
             element.add_item('DEPFILE', dep_file)
         if compiler.get_language() == 'cuda':
@@ -3204,12 +3224,13 @@ https://gcc.gnu.org/bugzilla/show_bug.cgi?id=47485'''))
             extension = extension.lower()
         if not (extension in compilers.lang_suffixes['fortran'] or extension in compilers.lang_suffixes['cpp']):
             return
-        dep_scan_file = self.get_dep_scan_file_for(target)
+        dep_scan_file = self.get_dep_scan_file_for(target)[1]
         element.add_item('dyndep', dep_scan_file)
         element.add_orderdep(dep_scan_file)
 
-    def get_dep_scan_file_for(self, target: build.BuildTarget) -> str:
-        return os.path.join(self.get_target_private_dir(target), 'depscan.dd')
+    def get_dep_scan_file_for(self, target: build.BuildTarget) -> T.Tuple[str, str]:
+        priv = self.get_target_private_dir(target)
+        return os.path.join(priv, 'depscan.json'), os.path.join(priv, 'depscan.dd')
 
     def add_header_deps(self, target, ninja_element, header_deps):
         for d in header_deps:
@@ -3232,9 +3253,11 @@ https://gcc.gnu.org/bugzilla/show_bug.cgi?id=47485'''))
     # The real deps are then detected via dep file generation from the compiler. This breaks on compilers that
     # produce incorrect dep files but such is life. A full dependency is
     # required to ensure that if a new module is added to an existing file that
-    # we correctly rebuild.
-    def get_fortran_module_deps(self, target, compiler) -> T.List[str]:
-        if compiler.language != 'fortran':
+    # we correctly rebuild
+    def get_fortran_module_deps(self, target: build.BuildTarget, compiler: Compiler) -> T.List[str]:
+        # If we have dyndeps then we don't need this, since the depscanner will
+        # do all of things described above.
+        if compiler.language != 'fortran' or self.use_dyndeps_for_fortran():
             return []
         return [
             os.path.join(self.get_target_dir(lt), lt.get_filename())
diff --git a/mesonbuild/scripts/depaccumulate.py b/mesonbuild/scripts/depaccumulate.py
new file mode 100644
index 0000000..7576390
--- /dev/null
+++ b/mesonbuild/scripts/depaccumulate.py
@@ -0,0 +1,129 @@
+# SPDX-License-Identifier: Apache-2.0
+# Copyright © 2021-2024 Intel Corporation
+
+"""Accumulator for p1689r5 module dependencies.
+
+See: https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2022/p1689r5.html
+"""
+
+from __future__ import annotations
+import json
+import re
+import textwrap
+import typing as T
+
+if T.TYPE_CHECKING:
+    from .depscan import Description, Rule
+
+# The quoting logic has been copied from the ninjabackend to avoid having to
+# import half of Meson just to quote outputs, which is a performance problem
+_QUOTE_PAT = re.compile(r'[$ :\n]')
+
+
+def quote(text: str) -> str:
+    # Fast path for when no quoting is necessary
+    if not _QUOTE_PAT.search(text):
+        return text
+    if '\n' in text:
+        errmsg = textwrap.dedent(f'''\
+            Ninja does not support newlines in rules. The content was:
+
+            {text}
+
+            Please report this error with a test case to the Meson bug tracker.''')
+        raise RuntimeError(errmsg)
+    return _QUOTE_PAT.sub(r'$\g<0>', text)
+
+
+_PROVIDER_CACHE: T.Dict[str, str] = {}
+
+
+def get_provider(rules: T.List[Rule], name: str) -> T.Optional[str]:
+    """Get the object that a module from another Target provides
+
+    We must rely on the object file here instead of the module itself, because
+    the object rule is part of the generated build.ninja, while the module is
+    only declared inside a dyndep. This creates for the dyndep generator to
+    depend on previous dyndeps as order deps. Since the module
+    interface file will be generated when the object is generated we can rely on
+    that in proxy and simplify generation.
+
+    :param rules: The list of rules to check
+    :param name: The logical-name to look for
+    :raises RuntimeError: If no provider can be found
+    :return: The object file of the rule providing the module
+    """
+    # Cache the result for performance reasons
+    if name in _PROVIDER_CACHE:
+        return _PROVIDER_CACHE[name]
+
+    for r in rules:
+        for p in r.get('provides', []):
+            if p['logical-name'] == name:
+                obj = r['primary-output']
+                _PROVIDER_CACHE[name] = obj
+                return obj
+    return None
+
+
+def process_rules(rules: T.List[Rule],
+                  extra_rules: T.List[Rule],
+                  ) -> T.Iterable[T.Tuple[str, T.Optional[T.List[str]], T.List[str]]]:
+    """Process the rules for this Target
+
+    :param rules: the rules for this target
+    :param extra_rules: the rules for all of the targets this one links with, to use their provides
+    :yield: A tuple of the output, the exported modules, and the consumed modules
+    """
+    for rule in rules:
+        prov: T.Optional[T.List[str]] = None
+        req: T.List[str] = []
+        if 'provides' in rule:
+            prov = [p['compiled-module-path'] for p in rule['provides']]
+        if 'requires' in rule:
+            for p in rule['requires']:
+                modfile = p.get('compiled-module-path')
+                if modfile is not None:
+                    req.append(modfile)
+                else:
+                    # We can't error if this is not found because of compiler
+                    # provided modules
+                    found = get_provider(extra_rules, p['logical-name'])
+                    if found:
+                        req.append(found)
+        yield rule['primary-output'], prov, req
+
+
+def formatter(files: T.Optional[T.List[str]]) -> str:
+    if files:
+        fmt = ' '.join(quote(f) for f in files)
+        return f'| {fmt}'
+    return ''
+
+
+def gen(outfile: str, desc: Description, extra_rules: T.List[Rule]) -> int:
+    with open(outfile, 'w', encoding='utf-8') as f:
+        f.write('ninja_dyndep_version = 1\n\n')
+
+        for obj, provides, requires in process_rules(desc['rules'], extra_rules):
+            ins = formatter(requires)
+            out = formatter(provides)
+            f.write(f'build {quote(obj)} {out}: dyndep {ins}\n\n')
+
+    return 0
+
+
+def run(args: T.List[str]) -> int:
+    assert len(args) >= 2, 'got wrong number of arguments!'
+    outfile, jsonfile, *jsondeps = args
+    with open(jsonfile, 'r', encoding='utf-8') as f:
+        desc: Description = json.load(f)
+
+    # All rules, necessary for fulfilling across TU and target boundaries
+    rules = desc['rules'].copy()
+    for dep in jsondeps:
+        with open(dep, encoding='utf-8') as f:
+            d: Description = json.load(f)
+            rules.extend(d['rules'])
+
+    return gen(outfile, desc, rules)
diff --git a/mesonbuild/scripts/depscan.py b/mesonbuild/scripts/depscan.py
index 44e8054..6bd5cde 100644
--- a/mesonbuild/scripts/depscan.py
+++ b/mesonbuild/scripts/depscan.py
@@ -1,22 +1,60 @@
 # SPDX-License-Identifier: Apache-2.0
 # Copyright 2020 The Meson development team
-# Copyright © 2023 Intel Corporation
+# Copyright © 2023-2024 Intel Corporation
 
 from __future__ import annotations
 
 import collections
+import json
 import os
 import pathlib
 import pickle
 import re
 import typing as T
 
-from ..backend.ninjabackend import ninja_quote
-
 if T.TYPE_CHECKING:
-    from typing_extensions import Literal
+    from typing_extensions import Literal, TypedDict, NotRequired
     from ..backend.ninjabackend import TargetDependencyScannerInfo
 
+    Require = TypedDict(
+        'Require',
+        {
+            'logical-name': str,
+            'compiled-module-path': NotRequired[str],
+            'source-path': NotRequired[str],
+            'unique-on-source-path': NotRequired[bool],
+            'lookup-method': NotRequired[Literal['by-name', 'include-angle', 'include-quote']]
+        },
+    )
+
+    Provide = TypedDict(
+        'Provide',
+        {
+            'logical-name': str,
+            'compiled-module-path': NotRequired[str],
+            'source-path': NotRequired[str],
+            'unique-on-source-path': NotRequired[bool],
+            'is-interface': NotRequired[bool],
+        },
+    )
+
+    Rule = TypedDict(
+        'Rule',
+        {
+            'primary-output': NotRequired[str],
+            'outputs': NotRequired[T.List[str]],
+            'provides': NotRequired[T.List[Provide]],
+            'requires': NotRequired[T.List[Require]],
+        }
+    )
+
+    class Description(TypedDict):
+
+        version: int
+        revision: int
+        rules: T.List[Rule]
+
+
 CPP_IMPORT_RE = re.compile(r'\w*import ([a-zA-Z0-9]+);')
 CPP_EXPORT_RE = re.compile(r'\w*export module ([a-zA-Z0-9]+);')
 
@@ -37,7 +75,7 @@ class DependencyScanner:
         self.sources = self.target_data.sources
         self.provided_by: T.Dict[str, str] = {}
         self.exports: T.Dict[str, str] = {}
-        self.needs: collections.defaultdict[str, T.List[str]] = collections.defaultdict(list)
+        self.imports: collections.defaultdict[str, T.List[str]] = collections.defaultdict(list)
         self.sources_with_exports: T.List[str] = []
 
     def scan_file(self, fname: str, lang: Literal['cpp', 'fortran']) -> None:
@@ -58,7 +96,7 @@ class DependencyScanner:
                 # In Fortran you have an using declaration also for the module
                 # you define in the same file. Prevent circular dependencies.
                 if needed not in modules_in_this_file:
-                    self.needs[fname].append(needed)
+                    self.imports[fname].append(needed)
             if export_match:
                 exported_module = export_match.group(1).lower()
                 assert exported_module not in modules_in_this_file
@@ -89,7 +127,7 @@ class DependencyScanner:
                 # submodule (a1:a2) a3        <- requires a1@a2.smod
                 #
                 # a3 does not depend on the a1 parent module directly, only transitively.
-                self.needs[fname].append(parent_module_name_full)
+                self.imports[fname].append(parent_module_name_full)
 
     def scan_cpp_file(self, fname: str) -> None:
         fpath = pathlib.Path(fname)
@@ -98,7 +136,7 @@ class DependencyScanner:
             export_match = CPP_EXPORT_RE.match(line)
             if import_match:
                 needed = import_match.group(1)
-                self.needs[fname].append(needed)
+                self.imports[fname].append(needed)
             if export_match:
                 exported_module = export_match.group(1)
                 if exported_module in self.provided_by:
@@ -123,47 +161,44 @@ class DependencyScanner:
     def scan(self) -> int:
         for s, lang in self.sources:
             self.scan_file(s, lang)
-        with open(self.outfile, 'w', encoding='utf-8') as ofile:
-            ofile.write('ninja_dyndep_version = 1\n')
-            for src, lang in self.sources:
-                objfilename = self.target_data.source2object[src]
-                mods_and_submods_needed = []
-                module_files_generated = []
-                module_files_needed = []
-                if src in self.sources_with_exports:
-                    module_files_generated.append(self.module_name_for(src, lang))
-                if src in self.needs:
-                    for modname in self.needs[src]:
-                        if modname not in self.provided_by:
-                            # Nothing provides this module, we assume that it
-                            # comes from a dependency library somewhere and is
-                            # already built by the time this compilation starts.
-                            pass
-                        else:
-                            mods_and_submods_needed.append(modname)
-
-                for modname in mods_and_submods_needed:
-                    provider_src = self.provided_by[modname]
-                    provider_modfile = self.module_name_for(provider_src, lang)
-                    # Prune self-dependencies
-                    if provider_src != src:
-                        module_files_needed.append(provider_modfile)
-
-                quoted_objfilename = ninja_quote(objfilename, True)
-                quoted_module_files_generated = [ninja_quote(x, True) for x in module_files_generated]
-                quoted_module_files_needed = [ninja_quote(x, True) for x in module_files_needed]
-                if quoted_module_files_generated:
-                    mod_gen = '| ' + ' '.join(quoted_module_files_generated)
-                else:
-                    mod_gen = ''
-                if quoted_module_files_needed:
-                    mod_dep = '| ' + ' '.join(quoted_module_files_needed)
-                else:
-                    mod_dep = ''
-                build_line = 'build {} {}: dyndep {}'.format(quoted_objfilename,
-                                                             mod_gen,
-                                                             mod_dep)
-                ofile.write(build_line + '\n')
+        description: Description = {
+            'version': 1,
+            'revision': 0,
+            'rules': [],
+        }
+        for src, lang in self.sources:
+            rule: Rule = {
+                'primary-output': self.target_data.source2object[src],
+                'requires': [],
+                'provides': [],
+            }
+            if src in self.sources_with_exports:
+                rule['outputs'] = [self.module_name_for(src, lang)]
+            if src in self.imports:
+                for modname in self.imports[src]:
+                    provider_src = self.provided_by.get(modname)
+                    if provider_src == src:
+                        continue
+                    rule['requires'].append({
+                        'logical-name': modname,
+                    })
+                    if provider_src:
+                        rule['requires'][-1].update({
+                            'source-path': provider_src,
+                            'compiled-module-path': self.module_name_for(provider_src, lang),
+                        })
+            if src in self.exports:
+                modname = self.exports[src]
+                rule['provides'].append({
+                    'logical-name': modname,
+                    'source-path': src,
+                    'compiled-module-path': self.module_name_for(src, lang),
+                })
+            description['rules'].append(rule)
+
+        with open(self.outfile, 'w', encoding='utf-8') as f:
+            json.dump(description, f)
+
         return 0
 
 def run(args: T.List[str]) -> int:
author	Dylan Baker <dylan@pnwbakers.com>	2024-03-11 12:35:25 -0700
committer	Dylan Baker <dylan@pnwbakers.com>	2025-04-03 12:27:07 -0700
commit	ea344be9b017042fa206cb12e9fee95c1c22fae5 (patch)
tree	c2e064ea5ffd4ff7e6dae5d8a333af1172b6db94
parent	cc815c4bcac055721ae359cbc757f50c10ed54ed (diff)
download	meson-ea344be9b017042fa206cb12e9fee95c1c22fae5.zip meson-ea344be9b017042fa206cb12e9fee95c1c22fae5.tar.gz meson-ea344be9b017042fa206cb12e9fee95c1c22fae5.tar.bz2