diff options
author | Dylan Baker <dylan@pnwbakers.com> | 2022-02-24 14:18:14 -0800 |
---|---|---|
committer | Dylan Baker <dylan@pnwbakers.com> | 2023-06-07 19:20:30 -0700 |
commit | 4017dab4847da392f7eb1dcdc2cb07bd69eb7863 (patch) | |
tree | 049f62df4e0898a35b1069e4243748f7d37b8c9e | |
parent | 5e59e5a9e49e9eee4e551f4078f32edd67659da4 (diff) | |
download | meson-4017dab4847da392f7eb1dcdc2cb07bd69eb7863.zip meson-4017dab4847da392f7eb1dcdc2cb07bd69eb7863.tar.gz meson-4017dab4847da392f7eb1dcdc2cb07bd69eb7863.tar.bz2 |
cargo/cfg: Add a parser for the rust/cargo cfg() expressions
This uses a recursive descent parser + lexer to create an IR from cfg()
expressions, which it then converts into meson IR.
-rw-r--r-- | mesonbuild/cargo/cfg.py | 276 | ||||
-rwxr-xr-x | run_unittests.py | 2 | ||||
-rw-r--r-- | unittests/cargotests.py | 125 |
3 files changed, 402 insertions, 1 deletions
diff --git a/mesonbuild/cargo/cfg.py b/mesonbuild/cargo/cfg.py new file mode 100644 index 0000000..ed6fd53 --- /dev/null +++ b/mesonbuild/cargo/cfg.py @@ -0,0 +1,276 @@ +# SPDX-License-Identifier: Apache-2.0 +# Copyright © 2022-2023 Intel Corporation + +"""Rust CFG parser. + +Rust uses its `cfg()` format in cargo. + +This may have the following functions: + - all() + - any() + - not() + +And additionally is made up of `identifier [ = str]`. Where the str is optional, +so you could have examples like: +``` +[target.`cfg(unix)`.dependencies] +[target.'cfg(target_arch = "x86_64")'.dependencies] +[target.'cfg(all(target_arch = "x86_64", target_arch = "x86"))'.dependencies] +``` +""" + +from __future__ import annotations +import dataclasses +import enum +import functools +import typing as T + + +from . import builder +from .. import mparser +from ..mesonlib import MesonBugException + +if T.TYPE_CHECKING: + _T = T.TypeVar('_T') + _LEX_TOKEN = T.Tuple['TokenType', T.Optional[str]] + _LEX_STREAM = T.Iterable[_LEX_TOKEN] + _LEX_STREAM_AH = T.Iterator[T.Tuple[_LEX_TOKEN, T.Optional[_LEX_TOKEN]]] + + +class TokenType(enum.Enum): + + LPAREN = enum.auto() + RPAREN = enum.auto() + STRING = enum.auto() + IDENTIFIER = enum.auto() + ALL = enum.auto() + ANY = enum.auto() + NOT = enum.auto() + COMMA = enum.auto() + EQUAL = enum.auto() + + +def lexer(raw: str) -> _LEX_STREAM: + """Lex a cfg() expression. + + :param raw: The raw cfg() expression + :return: An iterable of tokens + """ + buffer: T.List[str] = [] + is_string: bool = False + for s in raw: + if s.isspace() or s in {')', '(', ',', '='} or (s == '"' and buffer): + val = ''.join(buffer) + buffer.clear() + if is_string: + yield (TokenType.STRING, val) + elif val == 'any': + yield (TokenType.ANY, None) + elif val == 'all': + yield (TokenType.ALL, None) + elif val == 'not': + yield (TokenType.NOT, None) + elif val: + yield (TokenType.IDENTIFIER, val) + + if s == '(': + yield (TokenType.LPAREN, None) + continue + elif s == ')': + yield (TokenType.RPAREN, None) + continue + elif s == ',': + yield (TokenType.COMMA, None) + continue + elif s == '=': + yield (TokenType.EQUAL, None) + continue + elif s.isspace(): + continue + + if s == '"': + is_string = not is_string + else: + buffer.append(s) + if buffer: + # This should always be an identifier + yield (TokenType.IDENTIFIER, ''.join(buffer)) + + +def lookahead(iter: T.Iterator[_T]) -> T.Iterator[T.Tuple[_T, T.Optional[_T]]]: + """Get the current value of the iterable, and the next if possible. + + :param iter: The iterable to look into + :yield: A tuple of the current value, and, if possible, the next + :return: nothing + """ + current: _T + next_: T.Optional[_T] + try: + next_ = next(iter) + except StopIteration: + # This is an empty iterator, there's nothing to look ahead to + return + + while True: + current = next_ + try: + next_ = next(iter) + except StopIteration: + next_ = None + + yield current, next_ + + if next_ is None: + break + + +@dataclasses.dataclass +class IR: + + """Base IR node for Cargo CFG.""" + + filename: str + +@dataclasses.dataclass +class String(IR): + + value: str + + +@dataclasses.dataclass +class Identifier(IR): + + value: str + + +@dataclasses.dataclass +class Equal(IR): + + lhs: IR + rhs: IR + + +@dataclasses.dataclass +class Any(IR): + + args: T.List[IR] + + +@dataclasses.dataclass +class All(IR): + + args: T.List[IR] + + +@dataclasses.dataclass +class Not(IR): + + value: IR + + +def _parse(ast: _LEX_STREAM_AH, filename: str) -> IR: + (token, value), n_stream = next(ast) + if n_stream is not None: + ntoken, _ = n_stream + else: + ntoken, _ = (None, None) + + stream: T.List[_LEX_TOKEN] + if token is TokenType.IDENTIFIER: + if ntoken is TokenType.EQUAL: + return Equal(filename, Identifier(filename, value), _parse(ast, filename)) + if token is TokenType.STRING: + return String(filename, value) + if token is TokenType.EQUAL: + # In this case the previous caller already has handled the equal + return _parse(ast, filename) + if token in {TokenType.ANY, TokenType.ALL}: + type_ = All if token is TokenType.ALL else Any + assert ntoken is TokenType.LPAREN + next(ast) # advance the iterator to get rid of the LPAREN + stream = [] + args: T.List[IR] = [] + while token is not TokenType.RPAREN: + (token, value), _ = next(ast) + if token is TokenType.COMMA: + args.append(_parse(lookahead(iter(stream)), filename)) + stream.clear() + else: + stream.append((token, value)) + if stream: + args.append(_parse(lookahead(iter(stream)), filename)) + return type_(filename, args) + if token is TokenType.NOT: + next(ast) # advance the iterator to get rid of the LPAREN + stream = [] + # Mypy can't figure out that token is overridden inside the while loop + while token is not TokenType.RPAREN: # type: ignore + (token, value), _ = next(ast) + stream.append((token, value)) + return Not(filename, _parse(lookahead(iter(stream)), filename)) + + raise MesonBugException(f'Unhandled Cargo token: {token}') + + +def parse(ast: _LEX_STREAM, filename: str) -> IR: + """Parse the tokenized list into Meson AST. + + :param ast: An iterable of Tokens + :param filename: The name of the file being parsed + :return: An mparser Node to be used as a conditional + """ + ast_i: _LEX_STREAM_AH = lookahead(iter(ast)) + return _parse(ast_i, filename) + + +@functools.singledispatch +def ir_to_meson(ir: T.Any) -> mparser.BaseNode: + raise NotImplementedError + + +@ir_to_meson.register +def _(ir: String) -> mparser.BaseNode: + return builder.string(ir.value, ir.filename) + + +@ir_to_meson.register +def _(ir: Identifier) -> mparser.BaseNode: + host_machine = builder.identifier('host_machine', ir.filename) + if ir.value == "target_arch": + return builder.method('cpu_family', host_machine) + elif ir.value in {"target_os", "target_family"}: + return builder.method('system', host_machine) + elif ir.value == "target_endian": + return builder.method('endian', host_machine) + raise MesonBugException(f"Unhandled Cargo identifier: {ir.value}") + + +@ir_to_meson.register +def _(ir: Equal) -> mparser.BaseNode: + return builder.equal(ir_to_meson(ir.lhs), ir_to_meson(ir.rhs)) + + +@ir_to_meson.register +def _(ir: Not) -> mparser.BaseNode: + return builder.not_(ir_to_meson(ir.value), ir.filename) + + +@ir_to_meson.register +def _(ir: Any) -> mparser.BaseNode: + args = iter(reversed(ir.args)) + last = next(args) + cur = builder.or_(ir_to_meson(next(args)), ir_to_meson(last)) + for a in args: + cur = builder.or_(ir_to_meson(a), cur) + return cur + + +@ir_to_meson.register +def _(ir: All) -> mparser.BaseNode: + args = iter(reversed(ir.args)) + last = next(args) + cur = builder.and_(ir_to_meson(next(args)), ir_to_meson(last)) + for a in args: + cur = builder.and_(ir_to_meson(a), cur) + return cur diff --git a/run_unittests.py b/run_unittests.py index a820acc..4dd674d 100755 --- a/run_unittests.py +++ b/run_unittests.py @@ -36,7 +36,7 @@ from mesonbuild.mesonlib import python_command, setup_vsenv import mesonbuild.modules.pkgconfig from unittests.allplatformstests import AllPlatformTests -from unittests.cargotests import CargoVersionTest +from unittests.cargotests import CargoVersionTest, CargoCfgTest from unittests.darwintests import DarwinTests from unittests.failuretests import FailureTests from unittests.linuxcrosstests import LinuxCrossArmTests, LinuxCrossMingwTests diff --git a/unittests/cargotests.py b/unittests/cargotests.py index 884052b..61b64b1 100644 --- a/unittests/cargotests.py +++ b/unittests/cargotests.py @@ -5,6 +5,8 @@ from __future__ import annotations import unittest import typing as T +from mesonbuild.cargo import builder, cfg +from mesonbuild.cargo.cfg import TokenType from mesonbuild.cargo.version import convert @@ -59,3 +61,126 @@ class CargoVersionTest(unittest.TestCase): with self.subTest(): self.assertListEqual(convert(data), expected) + +class CargoCfgTest(unittest.TestCase): + + def test_lex(self) -> None: + cases: T.List[T.Tuple[str, T.List[T.Tuple[TokenType, T.Optional[str]]]]] = [ + ('"unix"', [(TokenType.STRING, 'unix')]), + ('unix', [(TokenType.IDENTIFIER, 'unix')]), + ('not(unix)', [ + (TokenType.NOT, None), + (TokenType.LPAREN, None), + (TokenType.IDENTIFIER, 'unix'), + (TokenType.RPAREN, None), + ]), + ('any(unix, windows)', [ + (TokenType.ANY, None), + (TokenType.LPAREN, None), + (TokenType.IDENTIFIER, 'unix'), + (TokenType.COMMA, None), + (TokenType.IDENTIFIER, 'windows'), + (TokenType.RPAREN, None), + ]), + ('target_arch = "x86_64"', [ + (TokenType.IDENTIFIER, 'target_arch'), + (TokenType.EQUAL, None), + (TokenType.STRING, 'x86_64'), + ]), + ('all(target_arch = "x86_64", unix)', [ + (TokenType.ALL, None), + (TokenType.LPAREN, None), + (TokenType.IDENTIFIER, 'target_arch'), + (TokenType.EQUAL, None), + (TokenType.STRING, 'x86_64'), + (TokenType.COMMA, None), + (TokenType.IDENTIFIER, 'unix'), + (TokenType.RPAREN, None), + ]), + ] + for data, expected in cases: + with self.subTest(): + self.assertListEqual(list(cfg.lexer(data)), expected) + + def test_parse(self) -> None: + cases = [ + ('target_os = "windows"', cfg.Equal('', cfg.Identifier('', "target_os"), cfg.String('', "windows"))), + ('target_arch = "x86"', cfg.Equal('', cfg.Identifier('', "target_arch"), cfg.String('', "x86"))), + ('target_family = "unix"', cfg.Equal('', cfg.Identifier('', "target_family"), cfg.String('', "unix"))), + ('any(target_arch = "x86", target_arch = "x86_64")', + cfg.Any( + '', [ + cfg.Equal('', cfg.Identifier('', "target_arch"), cfg.String('', "x86")), + cfg.Equal('', cfg.Identifier('', "target_arch"), cfg.String('', "x86_64")), + ])), + ('all(target_arch = "x86", target_os = "linux")', + cfg.All( + '', [ + cfg.Equal('', cfg.Identifier('', "target_arch"), cfg.String('', "x86")), + cfg.Equal('', cfg.Identifier('', "target_os"), cfg.String('', "linux")), + ])), + ('not(all(target_arch = "x86", target_os = "linux"))', + cfg.Not( + '', + cfg.All( + '', [ + cfg.Equal('', cfg.Identifier('', "target_arch"), cfg.String('', "x86")), + cfg.Equal('', cfg.Identifier('', "target_os"), cfg.String('', "linux")), + ]))), + ] + for data, expected in cases: + with self.subTest(): + self.assertEqual(cfg.parse(iter(cfg.lexer(data)), ''), expected) + + def test_ir_to_meson(self) -> None: + HOST_MACHINE = builder.identifier('host_machine', '') + + cases = [ + ('target_os = "windows"', + builder.equal(builder.method('system', HOST_MACHINE), + builder.string('windows', ''))), + ('target_arch = "x86"', + builder.equal(builder.method('cpu_family', HOST_MACHINE), + builder.string('x86', ''))), + ('target_family = "unix"', + builder.equal(builder.method('system', HOST_MACHINE), + builder.string('unix', ''))), + ('not(target_arch = "x86")', + builder.not_(builder.equal( + builder.method('cpu_family', HOST_MACHINE), + builder.string('x86', '')), '')), + ('any(target_arch = "x86", target_arch = "x86_64")', + builder.or_( + builder.equal(builder.method('cpu_family', HOST_MACHINE), + builder.string('x86', '')), + builder.equal(builder.method('cpu_family', HOST_MACHINE), + builder.string('x86_64', '')))), + ('any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64")', + builder.or_( + builder.equal(builder.method('cpu_family', HOST_MACHINE), + builder.string('x86', '')), + builder.or_( + builder.equal(builder.method('cpu_family', HOST_MACHINE), + builder.string('x86_64', '')), + builder.equal(builder.method('cpu_family', HOST_MACHINE), + builder.string('aarch64', ''))))), + ('all(target_arch = "x86", target_arch = "x86_64")', + builder.and_( + builder.equal(builder.method('cpu_family', HOST_MACHINE), + builder.string('x86', '')), + builder.equal(builder.method('cpu_family', HOST_MACHINE), + builder.string('x86_64', '')))), + ('all(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64")', + builder.and_( + builder.equal(builder.method('cpu_family', HOST_MACHINE), + builder.string('x86', '')), + builder.and_( + builder.equal(builder.method('cpu_family', HOST_MACHINE), + builder.string('x86_64', '')), + builder.equal(builder.method('cpu_family', HOST_MACHINE), + builder.string('aarch64', ''))))), + ] + for data, expected in cases: + with self.subTest(): + value = cfg.ir_to_meson(cfg.parse(iter(cfg.lexer(data)), '')) + self.assertEqual(value, expected) |