aboutsummaryrefslogtreecommitdiff
path: root/libcxx/test/libcxx-03/transitive_includes/to_csv.py
blob: 69d94deedf6f5076ba89f099113b4fad337ec92c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
#!/usr/bin/env python
# ===----------------------------------------------------------------------===##
#
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
# ===----------------------------------------------------------------------===##

from typing import List, Tuple, Optional
import argparse
import io
import itertools
import os
import pathlib
import re
import sys

libcxx_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
sys.path.append(os.path.join(libcxx_root, "utils"))
from libcxx.header_information import Header

def parse_line(line: str) -> Tuple[int, str]:
    """
    Parse a single line of --trace-includes output.

    Returns the inclusion level and the raw file name being included.
    """
    match = re.match(r"(\.+) (.+)", line)
    if not match:
        raise ArgumentError(f"Line {line} contains invalid data.")

    # The number of periods in front of the header name is the nesting level of
    # that header.
    return (len(match.group(1)), match.group(2))

def make_cxx_v1_relative(header: str) -> Optional[str]:
    """
    Returns the path of the header as relative to <whatever>/c++/v1, or None if the path
    doesn't contain c++/v1.

    We use that heuristic to figure out which headers are libc++ headers.
    """
    # On Windows, the path separators can either be forward slash or backslash.
    # If it is a backslash, Clang prints it escaped as two consecutive
    # backslashes, and they need to be escaped in the RE. (Use a raw string for
    # the pattern to avoid needing another level of escaping on the Python string
    # literal level.)
    pathsep = r"(?:/|\\\\)"
    CXX_V1_REGEX = r"^.*c\+\+" + pathsep + r"v[0-9]+" + pathsep + r"(.+)$"
    match = re.match(CXX_V1_REGEX, header)
    if not match:
        return None
    else:
        return match.group(1)

def parse_file(file: io.TextIOBase) -> List[Tuple[Header, Header]]:
    """
    Parse a file containing --trace-includes output to generate a list of the
    transitive includes contained in it.
    """
    result = []
    includer = None
    for line in file.readlines():
        (level, header) = parse_line(line)
        relative = make_cxx_v1_relative(header)

        # Not a libc++ header
        if relative is None:
            continue

        # If we're at the first level, remember this header as being the one who includes other headers.
        # There's usually exactly one, except if the compiler is passed a file with `-include`.
        if level == 1:
            includer = Header(relative)
            continue

        # Otherwise, take note that this header is being included by the top-level includer.
        else:
            assert includer is not None
            result.append((includer, Header(relative)))
    return result

def print_csv(includes: List[Tuple[Header, Header]]) -> None:
    """
    Print the transitive includes as space-delimited CSV.

    This function only prints public libc++ headers that are not C compatibility headers.
    """
    # Sort and group by includer
    by_includer = lambda t: t[0]
    includes = itertools.groupby(sorted(includes, key=by_includer), key=by_includer)

    for (includer, includees) in includes:
        includees = map(lambda t: t[1], includees)
        for h in sorted(set(includees)):
            if h.is_public() and not h.is_C_compatibility():
                print(f"{includer} {h}")

def main(argv):
    parser = argparse.ArgumentParser(
        description="""
        Given a list of headers produced by --trace-includes, produce a list of libc++ headers in that output.

        Note that -fshow-skipped-includes must also be passed to the compiler in order to get sufficient
        information for this script to run.

        The output of this script is provided in space-delimited CSV format where each line contains:

            <header performing inclusion> <header being included>
        """)
    parser.add_argument("inputs", type=argparse.FileType("r"), nargs='+', default=None,
        help="One or more files containing the result of --trace-includes")
    args = parser.parse_args(argv)

    includes = [line for file in args.inputs for line in parse_file(file)]
    print_csv(includes)

if __name__ == "__main__":
    main(sys.argv[1:])