diff options
author | Jussi Pakkanen <jpakkane@gmail.com> | 2023-09-12 17:50:13 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-09-12 17:50:13 +0300 |
commit | d2dfef5205f031ab10591899a1fc5048ea6f4134 (patch) | |
tree | 3e1be43920441f9167e9ee5498a357a1d378aec8 | |
parent | 1b6c9ad02ab88d92fd055ccc1307456105c064fe (diff) | |
parent | 14e35b63c02a16f69dd1ad5bde775e6868965eb2 (diff) | |
download | meson-d2dfef5205f031ab10591899a1fc5048ea6f4134.zip meson-d2dfef5205f031ab10591899a1fc5048ea6f4134.tar.gz meson-d2dfef5205f031ab10591899a1fc5048ea6f4134.tar.bz2 |
Merge pull request #12152 from bruchar1/ast-preserve-all
Preserve whitespaces and comments in AST
-rw-r--r-- | docs/markdown/Syntax.md | 2 | ||||
-rw-r--r-- | mesonbuild/ast/interpreter.py | 27 | ||||
-rw-r--r-- | mesonbuild/ast/introspection.py | 10 | ||||
-rw-r--r-- | mesonbuild/ast/printer.py | 251 | ||||
-rw-r--r-- | mesonbuild/ast/visitor.py | 26 | ||||
-rw-r--r-- | mesonbuild/cargo/builder.py | 22 | ||||
-rw-r--r-- | mesonbuild/cmake/interpreter.py | 18 | ||||
-rw-r--r-- | mesonbuild/coredata.py | 2 | ||||
-rw-r--r-- | mesonbuild/interpreter/interpreter.py | 6 | ||||
-rw-r--r-- | mesonbuild/interpreterbase/helpers.py | 4 | ||||
-rw-r--r-- | mesonbuild/interpreterbase/interpreterbase.py | 42 | ||||
-rw-r--r-- | mesonbuild/mintro.py | 6 | ||||
-rw-r--r-- | mesonbuild/mparser.py | 509 | ||||
-rw-r--r-- | mesonbuild/optinterpreter.py | 8 | ||||
-rw-r--r-- | mesonbuild/rewriter.py | 47 | ||||
-rw-r--r-- | run_format_tests.py | 1 | ||||
-rw-r--r-- | test cases/unit/118 rewrite/meson.build | 189 | ||||
-rw-r--r-- | unittests/rewritetests.py | 22 |
18 files changed, 920 insertions, 272 deletions
diff --git a/docs/markdown/Syntax.md b/docs/markdown/Syntax.md index b69ad00..59ec5f7 100644 --- a/docs/markdown/Syntax.md +++ b/docs/markdown/Syntax.md @@ -109,7 +109,7 @@ Strings in Meson are declared with single quotes. To enter a literal single quote do it like this: ```meson -single quote = 'contains a \' character' +single_quote = 'contains a \' character' ``` The full list of escape sequences is: diff --git a/mesonbuild/ast/interpreter.py b/mesonbuild/ast/interpreter.py index 9e098d0..c51af09 100644 --- a/mesonbuild/ast/interpreter.py +++ b/mesonbuild/ast/interpreter.py @@ -239,7 +239,7 @@ class AstInterpreter(InterpreterBase): def evaluate_dictstatement(self, node: mparser.DictNode) -> TYPE_nkwargs: def resolve_key(node: mparser.BaseNode) -> str: - if isinstance(node, mparser.StringNode): + if isinstance(node, mparser.BaseStringNode): return node.value return '__AST_UNKNOWN__' arguments, kwargs = self.reduce_arguments(node.args, key_resolver=resolve_key) @@ -254,10 +254,10 @@ class AstInterpreter(InterpreterBase): def evaluate_plusassign(self, node: PlusAssignmentNode) -> None: assert isinstance(node, PlusAssignmentNode) # Cheat by doing a reassignment - self.assignments[node.var_name] = node.value # Save a reference to the value node + self.assignments[node.var_name.value] = node.value # Save a reference to the value node if node.value.ast_id: self.reverse_assignment[node.value.ast_id] = node - self.assign_vals[node.var_name] = self.evaluate_statement(node.value) + self.assign_vals[node.var_name.value] = self.evaluate_statement(node.value) def evaluate_indexing(self, node: IndexNode) -> int: return 0 @@ -312,17 +312,17 @@ class AstInterpreter(InterpreterBase): for i in node.ifs: self.evaluate_codeblock(i.block) if not isinstance(node.elseblock, EmptyNode): - self.evaluate_codeblock(node.elseblock) + self.evaluate_codeblock(node.elseblock.block) def get_variable(self, varname: str) -> int: return 0 def assignment(self, node: AssignmentNode) -> None: assert isinstance(node, AssignmentNode) - self.assignments[node.var_name] = node.value # Save a reference to the value node + self.assignments[node.var_name.value] = node.value # Save a reference to the value node if node.value.ast_id: self.reverse_assignment[node.value.ast_id] = node - self.assign_vals[node.var_name] = self.evaluate_statement(node.value) # Evaluate the value just in case + self.assign_vals[node.var_name.value] = self.evaluate_statement(node.value) # Evaluate the value just in case def resolve_node(self, node: BaseNode, include_unknown_args: bool = False, id_loop_detect: T.Optional[T.List[str]] = None) -> T.Optional[T.Any]: def quick_resolve(n: BaseNode, loop_detect: T.Optional[T.List[str]] = None) -> T.Any: @@ -371,8 +371,8 @@ class AstInterpreter(InterpreterBase): elif isinstance(node, ArithmeticNode): if node.operation != 'add': return None # Only handle string and array concats - l = quick_resolve(node.left) - r = quick_resolve(node.right) + l = self.resolve_node(node.left, include_unknown_args, id_loop_detect) + r = self.resolve_node(node.right, include_unknown_args, id_loop_detect) if isinstance(l, str) and isinstance(r, str): result = l + r # String concatenation detected else: @@ -382,17 +382,18 @@ class AstInterpreter(InterpreterBase): src = quick_resolve(node.source_object) margs = self.flatten_args(node.args.arguments, include_unknown_args, id_loop_detect) mkwargs: T.Dict[str, TYPE_nvar] = {} + method_name = node.name.value try: if isinstance(src, str): - result = StringHolder(src, T.cast('Interpreter', self)).method_call(node.name, margs, mkwargs) + result = StringHolder(src, T.cast('Interpreter', self)).method_call(method_name, margs, mkwargs) elif isinstance(src, bool): - result = BooleanHolder(src, T.cast('Interpreter', self)).method_call(node.name, margs, mkwargs) + result = BooleanHolder(src, T.cast('Interpreter', self)).method_call(method_name, margs, mkwargs) elif isinstance(src, int): - result = IntegerHolder(src, T.cast('Interpreter', self)).method_call(node.name, margs, mkwargs) + result = IntegerHolder(src, T.cast('Interpreter', self)).method_call(method_name, margs, mkwargs) elif isinstance(src, list): - result = ArrayHolder(src, T.cast('Interpreter', self)).method_call(node.name, margs, mkwargs) + result = ArrayHolder(src, T.cast('Interpreter', self)).method_call(method_name, margs, mkwargs) elif isinstance(src, dict): - result = DictHolder(src, T.cast('Interpreter', self)).method_call(node.name, margs, mkwargs) + result = DictHolder(src, T.cast('Interpreter', self)).method_call(method_name, margs, mkwargs) except mesonlib.MesonException: return None diff --git a/mesonbuild/ast/introspection.py b/mesonbuild/ast/introspection.py index 987e355..1525a2d 100644 --- a/mesonbuild/ast/introspection.py +++ b/mesonbuild/ast/introspection.py @@ -27,7 +27,7 @@ from ..build import Executable, Jar, SharedLibrary, SharedModule, StaticLibrary from ..compilers import detect_compiler_for from ..interpreterbase import InvalidArguments from ..mesonlib import MachineChoice, OptionKey -from ..mparser import BaseNode, ArithmeticNode, ArrayNode, ElementaryNode, IdNode, FunctionNode, StringNode +from ..mparser import BaseNode, ArithmeticNode, ArrayNode, ElementaryNode, IdNode, FunctionNode, BaseStringNode from .interpreter import AstInterpreter if T.TYPE_CHECKING: @@ -128,7 +128,7 @@ class IntrospectionInterpreter(AstInterpreter): if not self.is_subproject() and 'subproject_dir' in kwargs: spdirname = kwargs['subproject_dir'] - if isinstance(spdirname, StringNode): + if isinstance(spdirname, BaseStringNode): assert isinstance(spdirname.value, str) self.subproject_dir = spdirname.value if not self.is_subproject(): @@ -174,7 +174,7 @@ class IntrospectionInterpreter(AstInterpreter): for l in self.flatten_args(raw_langs): if isinstance(l, str): langs.append(l) - elif isinstance(l, StringNode): + elif isinstance(l, BaseStringNode): langs.append(l.value) for lang in sorted(langs, key=compilers.sort_clink): @@ -261,9 +261,9 @@ class IntrospectionInterpreter(AstInterpreter): continue arg_nodes = arg_node.arguments.copy() # Pop the first element if the function is a build target function - if isinstance(curr, FunctionNode) and curr.func_name in BUILD_TARGET_FUNCTIONS: + if isinstance(curr, FunctionNode) and curr.func_name.value in BUILD_TARGET_FUNCTIONS: arg_nodes.pop(0) - elementary_nodes = [x for x in arg_nodes if isinstance(x, (str, StringNode))] + elementary_nodes = [x for x in arg_nodes if isinstance(x, (str, BaseStringNode))] inqueue += [x for x in arg_nodes if isinstance(x, (FunctionNode, ArrayNode, IdNode, ArithmeticNode))] if elementary_nodes: res += [curr] diff --git a/mesonbuild/ast/printer.py b/mesonbuild/ast/printer.py index ebf63af..155b5fc 100644 --- a/mesonbuild/ast/printer.py +++ b/mesonbuild/ast/printer.py @@ -18,6 +18,8 @@ from __future__ import annotations from .. import mparser from .visitor import AstVisitor + +from itertools import zip_longest import re import typing as T @@ -84,7 +86,17 @@ class AstPrinter(AstVisitor): def visit_FormatStringNode(self, node: mparser.FormatStringNode) -> None: assert isinstance(node.value, str) - self.append("f'" + node.value + "'", node) + self.append("f'" + self.escape(node.value) + "'", node) + node.lineno = self.curr_line or node.lineno + + def visit_MultilineStringNode(self, node: mparser.StringNode) -> None: + assert isinstance(node.value, str) + self.append("'''" + node.value + "'''", node) + node.lineno = self.curr_line or node.lineno + + def visit_FormatMultilineStringNode(self, node: mparser.FormatStringNode) -> None: + assert isinstance(node.value, str) + self.append("f'''" + node.value + "'''", node) node.lineno = self.curr_line or node.lineno def visit_ContinueNode(self, node: mparser.ContinueNode) -> None: @@ -152,30 +164,30 @@ class AstPrinter(AstVisitor): def visit_MethodNode(self, node: mparser.MethodNode) -> None: node.lineno = self.curr_line or node.lineno node.source_object.accept(self) - self.append('.' + node.name + '(', node) + self.append('.' + node.name.value + '(', node) node.args.accept(self) self.append(')', node) def visit_FunctionNode(self, node: mparser.FunctionNode) -> None: node.lineno = self.curr_line or node.lineno - self.append(node.func_name + '(', node) + self.append(node.func_name.value + '(', node) node.args.accept(self) self.append(')', node) def visit_AssignmentNode(self, node: mparser.AssignmentNode) -> None: node.lineno = self.curr_line or node.lineno - self.append(node.var_name + ' = ', node) + self.append(node.var_name.value + ' = ', node) node.value.accept(self) def visit_PlusAssignmentNode(self, node: mparser.PlusAssignmentNode) -> None: node.lineno = self.curr_line or node.lineno - self.append(node.var_name + ' += ', node) + self.append(node.var_name.value + ' += ', node) node.value.accept(self) def visit_ForeachClauseNode(self, node: mparser.ForeachClauseNode) -> None: node.lineno = self.curr_line or node.lineno self.append_padded('foreach', node) - self.append_padded(', '.join(node.varnames), node) + self.append_padded(', '.join(varname.value for varname in node.varnames), node) self.append_padded(':', node) node.items.accept(self) self.newline() @@ -238,6 +250,223 @@ class AstPrinter(AstVisitor): else: self.result = re.sub(r', $', '', self.result) +class RawPrinter(AstVisitor): + + def __init__(self): + self.result = '' + + def visit_default_func(self, node: mparser.BaseNode): + self.result += node.value + if node.whitespaces: + node.whitespaces.accept(self) + + def visit_unary_operator(self, node: mparser.UnaryOperatorNode): + node.operator.accept(self) + node.value.accept(self) + if node.whitespaces: + node.whitespaces.accept(self) + + def visit_binary_operator(self, node: mparser.BinaryOperatorNode): + node.left.accept(self) + node.operator.accept(self) + node.right.accept(self) + if node.whitespaces: + node.whitespaces.accept(self) + + def visit_BooleanNode(self, node: mparser.BooleanNode) -> None: + self.result += 'true' if node.value else 'false' + if node.whitespaces: + node.whitespaces.accept(self) + + def visit_NumberNode(self, node: mparser.NumberNode) -> None: + self.result += node.raw_value + if node.whitespaces: + node.whitespaces.accept(self) + + def visit_StringNode(self, node: mparser.StringNode) -> None: + self.result += f"'{node.raw_value}'" + if node.whitespaces: + node.whitespaces.accept(self) + + def visit_MultilineStringNode(self, node: mparser.MultilineStringNode) -> None: + self.result += f"'''{node.value}'''" + if node.whitespaces: + node.whitespaces.accept(self) + + def visit_FormatStringNode(self, node: mparser.FormatStringNode) -> None: + self.result += 'f' + self.visit_StringNode(node) + + def visit_MultilineFormatStringNode(self, node: mparser.MultilineFormatStringNode) -> None: + self.result += 'f' + self.visit_MultilineStringNode(node) + + def visit_ContinueNode(self, node: mparser.ContinueNode) -> None: + self.result += 'continue' + if node.whitespaces: + node.whitespaces.accept(self) + + def visit_BreakNode(self, node: mparser.BreakNode) -> None: + self.result += 'break' + if node.whitespaces: + node.whitespaces.accept(self) + + def visit_ArrayNode(self, node: mparser.ArrayNode) -> None: + node.lbracket.accept(self) + node.args.accept(self) + node.rbracket.accept(self) + if node.whitespaces: + node.whitespaces.accept(self) + + def visit_DictNode(self, node: mparser.DictNode) -> None: + node.lcurl.accept(self) + node.args.accept(self) + node.rcurl.accept(self) + if node.whitespaces: + node.whitespaces.accept(self) + + def visit_ParenthesizedNode(self, node: mparser.ParenthesizedNode) -> None: + node.lpar.accept(self) + node.inner.accept(self) + node.rpar.accept(self) + if node.whitespaces: + node.whitespaces.accept(self) + + def visit_OrNode(self, node: mparser.OrNode) -> None: + self.visit_binary_operator(node) + + def visit_AndNode(self, node: mparser.AndNode) -> None: + self.visit_binary_operator(node) + + def visit_ComparisonNode(self, node: mparser.ComparisonNode) -> None: + self.visit_binary_operator(node) + + def visit_ArithmeticNode(self, node: mparser.ArithmeticNode) -> None: + self.visit_binary_operator(node) + + def visit_NotNode(self, node: mparser.NotNode) -> None: + self.visit_unary_operator(node) + + def visit_CodeBlockNode(self, node: mparser.CodeBlockNode) -> None: + if node.pre_whitespaces: + node.pre_whitespaces.accept(self) + for i in node.lines: + i.accept(self) + if node.whitespaces: + node.whitespaces.accept(self) + + def visit_IndexNode(self, node: mparser.IndexNode) -> None: + node.iobject.accept(self) + node.lbracket.accept(self) + node.index.accept(self) + node.rbracket.accept(self) + if node.whitespaces: + node.whitespaces.accept(self) + + def visit_MethodNode(self, node: mparser.MethodNode) -> None: + node.source_object.accept(self) + node.dot.accept(self) + node.name.accept(self) + node.lpar.accept(self) + node.args.accept(self) + node.rpar.accept(self) + if node.whitespaces: + node.whitespaces.accept(self) + + def visit_FunctionNode(self, node: mparser.FunctionNode) -> None: + node.func_name.accept(self) + node.lpar.accept(self) + node.args.accept(self) + node.rpar.accept(self) + if node.whitespaces: + node.whitespaces.accept(self) + + def visit_AssignmentNode(self, node: mparser.AssignmentNode) -> None: + node.var_name.accept(self) + node.operator.accept(self) + node.value.accept(self) + if node.whitespaces: + node.whitespaces.accept(self) + + def visit_PlusAssignmentNode(self, node: mparser.PlusAssignmentNode) -> None: + node.var_name.accept(self) + node.operator.accept(self) + node.value.accept(self) + if node.whitespaces: + node.whitespaces.accept(self) + + def visit_ForeachClauseNode(self, node: mparser.ForeachClauseNode) -> None: + node.foreach_.accept(self) + for varname, comma in zip_longest(node.varnames, node.commas): + varname.accept(self) + if comma is not None: + comma.accept(self) + node.column.accept(self) + node.items.accept(self) + node.block.accept(self) + node.endforeach.accept(self) + if node.whitespaces: + node.whitespaces.accept(self) + + def visit_IfClauseNode(self, node: mparser.IfClauseNode) -> None: + for i in node.ifs: + i.accept(self) + if not isinstance(node.elseblock, mparser.EmptyNode): + node.elseblock.accept(self) + node.endif.accept(self) + if node.whitespaces: + node.whitespaces.accept(self) + + def visit_UMinusNode(self, node: mparser.UMinusNode) -> None: + self.visit_unary_operator(node) + + def visit_IfNode(self, node: mparser.IfNode) -> None: + node.if_.accept(self) + node.condition.accept(self) + node.block.accept(self) + if node.whitespaces: + node.whitespaces.accept(self) + + def visit_ElseNode(self, node: mparser.ElseNode) -> None: + node.else_.accept(self) + node.block.accept(self) + if node.whitespaces: + node.whitespaces.accept(self) + + def visit_TernaryNode(self, node: mparser.TernaryNode) -> None: + node.condition.accept(self) + node.questionmark.accept(self) + node.trueblock.accept(self) + node.column.accept(self) + node.falseblock.accept(self) + if node.whitespaces: + node.whitespaces.accept(self) + + def visit_ArgumentNode(self, node: mparser.ArgumentNode) -> None: + commas_iter = iter(node.commas) + + for arg in node.arguments: + arg.accept(self) + try: + comma = next(commas_iter) + comma.accept(self) + except StopIteration: + pass + + assert len(node.columns) == len(node.kwargs) + for (key, val), column in zip(node.kwargs.items(), node.columns): + key.accept(self) + column.accept(self) + val.accept(self) + try: + comma = next(commas_iter) + comma.accept(self) + except StopIteration: + pass + + if node.whitespaces: + node.whitespaces.accept(self) + class AstJSONPrinter(AstVisitor): def __init__(self) -> None: self.result: T.Dict[str, T.Any] = {} @@ -342,28 +571,28 @@ class AstJSONPrinter(AstVisitor): def visit_MethodNode(self, node: mparser.MethodNode) -> None: self._accept('object', node.source_object) self._accept('args', node.args) - self.current['name'] = node.name + self.current['name'] = node.name.value self.setbase(node) def visit_FunctionNode(self, node: mparser.FunctionNode) -> None: self._accept('args', node.args) - self.current['name'] = node.func_name + self.current['name'] = node.func_name.value self.setbase(node) def visit_AssignmentNode(self, node: mparser.AssignmentNode) -> None: self._accept('value', node.value) - self.current['var_name'] = node.var_name + self.current['var_name'] = node.var_name.value self.setbase(node) def visit_PlusAssignmentNode(self, node: mparser.PlusAssignmentNode) -> None: self._accept('value', node.value) - self.current['var_name'] = node.var_name + self.current['var_name'] = node.var_name.value self.setbase(node) def visit_ForeachClauseNode(self, node: mparser.ForeachClauseNode) -> None: self._accept('items', node.items) self._accept('block', node.block) - self.current['varnames'] = node.varnames + self.current['varnames'] = [varname.value for varname in node.varnames] self.setbase(node) def visit_IfClauseNode(self, node: mparser.IfClauseNode) -> None: diff --git a/mesonbuild/ast/visitor.py b/mesonbuild/ast/visitor.py index 8a0e77b..d05d3ff 100644 --- a/mesonbuild/ast/visitor.py +++ b/mesonbuild/ast/visitor.py @@ -43,12 +43,24 @@ class AstVisitor: def visit_FormatStringNode(self, node: mparser.FormatStringNode) -> None: self.visit_default_func(node) + def visit_MultilineStringNode(self, node: mparser.StringNode) -> None: + self.visit_default_func(node) + + def visit_FormatMultilineStringNode(self, node: mparser.FormatStringNode) -> None: + self.visit_default_func(node) + def visit_ContinueNode(self, node: mparser.ContinueNode) -> None: self.visit_default_func(node) def visit_BreakNode(self, node: mparser.BreakNode) -> None: self.visit_default_func(node) + def visit_SymbolNode(self, node: mparser.SymbolNode) -> None: + self.visit_default_func(node) + + def visit_WhitespaceNode(self, node: mparser.WhitespaceNode) -> None: + self.visit_default_func(node) + def visit_ArrayNode(self, node: mparser.ArrayNode) -> None: self.visit_default_func(node) node.args.accept(self) @@ -97,22 +109,28 @@ class AstVisitor: def visit_MethodNode(self, node: mparser.MethodNode) -> None: self.visit_default_func(node) node.source_object.accept(self) + node.name.accept(self) node.args.accept(self) def visit_FunctionNode(self, node: mparser.FunctionNode) -> None: self.visit_default_func(node) + node.func_name.accept(self) node.args.accept(self) def visit_AssignmentNode(self, node: mparser.AssignmentNode) -> None: self.visit_default_func(node) + node.var_name.accept(self) node.value.accept(self) def visit_PlusAssignmentNode(self, node: mparser.PlusAssignmentNode) -> None: self.visit_default_func(node) + node.var_name.accept(self) node.value.accept(self) def visit_ForeachClauseNode(self, node: mparser.ForeachClauseNode) -> None: self.visit_default_func(node) + for varname in node.varnames: + varname.accept(self) node.items.accept(self) node.block.accept(self) @@ -131,6 +149,10 @@ class AstVisitor: node.condition.accept(self) node.block.accept(self) + def visit_ElseNode(self, node: mparser.IfNode) -> None: + self.visit_default_func(node) + node.block.accept(self) + def visit_TernaryNode(self, node: mparser.TernaryNode) -> None: self.visit_default_func(node) node.condition.accept(self) @@ -144,3 +166,7 @@ class AstVisitor: for key, val in node.kwargs.items(): key.accept(self) val.accept(self) + + def visit_ParenthesizedNode(self, node: mparser.ParenthesizedNode) -> None: + self.visit_default_func(node) + node.inner.accept(self) diff --git a/mesonbuild/cargo/builder.py b/mesonbuild/cargo/builder.py index fb086d1..3f7f688 100644 --- a/mesonbuild/cargo/builder.py +++ b/mesonbuild/cargo/builder.py @@ -28,6 +28,10 @@ def _token(tid: str, filename: str, value: mparser.TV_TokenTypes) -> mparser.Tok return mparser.Token(tid, filename, -1, -1, -1, (-1, -1), value) +def _symbol(filename: str, val: str) -> mparser.SymbolNode: + return mparser.SymbolNode(_token('', filename, val)) + + def string(value: str, filename: str) -> mparser.StringNode: """Build A StringNode @@ -45,7 +49,7 @@ def number(value: int, filename: str) -> mparser.NumberNode: :param filename: the file that the value came from :return: A NumberNode """ - return mparser.NumberNode(_token('number', filename, value)) + return mparser.NumberNode(_token('number', filename, str(value))) def bool(value: builtins.bool, filename: str) -> mparser.BooleanNode: @@ -67,7 +71,7 @@ def array(value: T.List[mparser.BaseNode], filename: str) -> mparser.ArrayNode: """ args = mparser.ArgumentNode(_token('array', filename, 'unused')) args.arguments = value - return mparser.ArrayNode(args, -1, -1, -1, -1) + return mparser.ArrayNode(_symbol(filename, '['), args, _symbol(filename, ']')) def identifier(value: str, filename: str) -> mparser.IdNode: @@ -97,7 +101,7 @@ def method(name: str, id_: mparser.IdNode, args.arguments = pos if kw is not None: args.kwargs = {identifier(k, id_.filename): v for k, v in kw.items()} - return mparser.MethodNode(id_.filename, -1, -1, id_, name, args) + return mparser.MethodNode(id_, _symbol(id_.filename, '.'), identifier(name, id_.filename), _symbol(id_.filename, '('), args, _symbol(id_.filename, ')')) def function(name: str, filename: str, @@ -117,7 +121,7 @@ def function(name: str, filename: str, args.arguments = pos if kw is not None: args.kwargs = {identifier(k, filename): v for k, v in kw.items()} - return mparser.FunctionNode(filename, -1, -1, -1, -1, name, args) + return mparser.FunctionNode(identifier(name, filename), _symbol(filename, '('), args, _symbol(filename, ')')) def equal(lhs: mparser.BaseNode, rhs: mparser.BaseNode) -> mparser.ComparisonNode: @@ -127,7 +131,7 @@ def equal(lhs: mparser.BaseNode, rhs: mparser.BaseNode) -> mparser.ComparisonNod :param rhs: the right hand side of the equal :return: A compraison node """ - return mparser.ComparisonNode('==', lhs, rhs) + return mparser.ComparisonNode('==', lhs, _symbol(lhs.filename, '=='), rhs) def or_(lhs: mparser.BaseNode, rhs: mparser.BaseNode) -> mparser.OrNode: @@ -137,7 +141,7 @@ def or_(lhs: mparser.BaseNode, rhs: mparser.BaseNode) -> mparser.OrNode: :param rhs: The Right of the Node :return: The OrNode """ - return mparser.OrNode(lhs, rhs) + return mparser.OrNode(lhs, _symbol(lhs.filename, 'or'), rhs) def and_(lhs: mparser.BaseNode, rhs: mparser.BaseNode) -> mparser.AndNode: @@ -147,7 +151,7 @@ def and_(lhs: mparser.BaseNode, rhs: mparser.BaseNode) -> mparser.AndNode: :param rhs: The right of the And :return: The AndNode """ - return mparser.AndNode(lhs, rhs) + return mparser.AndNode(lhs, _symbol(lhs.filename, 'and'), rhs) def not_(value: mparser.BaseNode, filename: str) -> mparser.NotNode: @@ -157,7 +161,7 @@ def not_(value: mparser.BaseNode, filename: str) -> mparser.NotNode: :param filename: the string filename :return: The NotNode """ - return mparser.NotNode(_token('not', filename, ''), value) + return mparser.NotNode(_token('not', filename, ''), _symbol(filename, 'not'), value) def assign(value: mparser.BaseNode, varname: str, filename: str) -> mparser.AssignmentNode: @@ -168,7 +172,7 @@ def assign(value: mparser.BaseNode, varname: str, filename: str) -> mparser.Assi :param filename: The filename :return: An AssignmentNode """ - return mparser.AssignmentNode(filename, -1, -1, varname, value) + return mparser.AssignmentNode(identifier(varname, filename), _symbol(filename, '='), value) def block(filename: str) -> mparser.CodeBlockNode: diff --git a/mesonbuild/cmake/interpreter.py b/mesonbuild/cmake/interpreter.py index f88d091..2f7cb69 100644 --- a/mesonbuild/cmake/interpreter.py +++ b/mesonbuild/cmake/interpreter.py @@ -48,6 +48,7 @@ from ..mparser import ( IndexNode, MethodNode, NumberNode, + SymbolNode, ) @@ -959,14 +960,17 @@ class CMakeInterpreter: def token(tid: str = 'string', val: TYPE_mixed = '') -> Token: return Token(tid, self.subdir.as_posix(), 0, 0, 0, None, val) + def symbol(val: str) -> SymbolNode: + return SymbolNode(token('', val)) + def string(value: str) -> StringNode: - return StringNode(token(val=value)) + return StringNode(token(val=value), escape=False) def id_node(value: str) -> IdNode: return IdNode(token(val=value)) def number(value: int) -> NumberNode: - return NumberNode(token(val=value)) + return NumberNode(token(val=str(value))) def nodeify(value: TYPE_mixed_list) -> BaseNode: if isinstance(value, str): @@ -984,14 +988,14 @@ class CMakeInterpreter: raise RuntimeError('invalid type of value: {} ({})'.format(type(value).__name__, str(value))) def indexed(node: BaseNode, index: int) -> IndexNode: - return IndexNode(node, nodeify(index)) + return IndexNode(node, symbol('['), nodeify(index), symbol(']')) def array(elements: TYPE_mixed_list) -> ArrayNode: args = ArgumentNode(token()) if not isinstance(elements, list): elements = [args] args.arguments += [nodeify(x) for x in elements if x is not None] - return ArrayNode(args, 0, 0, 0, 0) + return ArrayNode(symbol('['), args, symbol(']')) def function(name: str, args: T.Optional[TYPE_mixed_list] = None, kwargs: T.Optional[TYPE_mixed_kwargs] = None) -> FunctionNode: args = [] if args is None else args @@ -1002,7 +1006,7 @@ class CMakeInterpreter: args = [args] args_n.arguments = [nodeify(x) for x in args if x is not None] args_n.kwargs = {id_node(k): nodeify(v) for k, v in kwargs.items() if v is not None} - func_n = FunctionNode(self.subdir.as_posix(), 0, 0, 0, 0, name, args_n) + func_n = FunctionNode(id_node(name), symbol('('), args_n, symbol(')')) return func_n def method(obj: BaseNode, name: str, args: T.Optional[TYPE_mixed_list] = None, kwargs: T.Optional[TYPE_mixed_kwargs] = None) -> MethodNode: @@ -1014,10 +1018,10 @@ class CMakeInterpreter: args = [args] args_n.arguments = [nodeify(x) for x in args if x is not None] args_n.kwargs = {id_node(k): nodeify(v) for k, v in kwargs.items() if v is not None} - return MethodNode(self.subdir.as_posix(), 0, 0, obj, name, args_n) + return MethodNode(obj, symbol('.'), id_node(name), symbol('('), args_n, symbol(')')) def assign(var_name: str, value: BaseNode) -> AssignmentNode: - return AssignmentNode(self.subdir.as_posix(), 0, 0, var_name, value) + return AssignmentNode(id_node(var_name), symbol('='), value) # Generate the root code block and the project function call root_cb = CodeBlockNode(token()) diff --git a/mesonbuild/coredata.py b/mesonbuild/coredata.py index f151c7b..f91c583 100644 --- a/mesonbuild/coredata.py +++ b/mesonbuild/coredata.py @@ -1098,7 +1098,7 @@ class MachineFileParser(): return section def _evaluate_statement(self, node: mparser.BaseNode) -> T.Union[str, bool, int, T.List[str]]: - if isinstance(node, (mparser.StringNode)): + if isinstance(node, (mparser.BaseStringNode)): return node.value elif isinstance(node, mparser.BooleanNode): return node.value diff --git a/mesonbuild/interpreter/interpreter.py b/mesonbuild/interpreter/interpreter.py index 9b005cb..c637087 100644 --- a/mesonbuild/interpreter/interpreter.py +++ b/mesonbuild/interpreter/interpreter.py @@ -536,7 +536,7 @@ class Interpreter(InterpreterBase, HoldableObject): assert isinstance(kw, mparser.IdNode), 'for mypy' if kw.value == 'meson_version': # mypy does not understand "and isinstance" - if isinstance(val, mparser.StringNode): + if isinstance(val, mparser.BaseStringNode): self.handle_meson_version(val.value, val) def get_build_def_files(self) -> mesonlib.OrderedSet[str]: @@ -2942,7 +2942,7 @@ class Interpreter(InterpreterBase, HoldableObject): def _add_global_arguments(self, node: mparser.FunctionNode, argsdict: T.Dict[str, T.List[str]], args: T.List[str], kwargs: 'kwtypes.FuncAddProjectArgs') -> None: if self.is_subproject(): - msg = f'Function \'{node.func_name}\' cannot be used in subprojects because ' \ + msg = f'Function \'{node.func_name.value}\' cannot be used in subprojects because ' \ 'there is no way to make that reliable.\nPlease only call ' \ 'this if is_subproject() returns false. Alternatively, ' \ 'define a variable that\ncontains your language-specific ' \ @@ -2962,7 +2962,7 @@ class Interpreter(InterpreterBase, HoldableObject): def _add_arguments(self, node: mparser.FunctionNode, argsdict: T.Dict[str, T.List[str]], args_frozen: bool, args: T.List[str], kwargs: 'kwtypes.FuncAddProjectArgs') -> None: if args_frozen: - msg = f'Tried to use \'{node.func_name}\' after a build target has been declared.\n' \ + msg = f'Tried to use \'{node.func_name.value}\' after a build target has been declared.\n' \ 'This is not permitted. Please declare all arguments before your targets.' raise InvalidCode(msg) diff --git a/mesonbuild/interpreterbase/helpers.py b/mesonbuild/interpreterbase/helpers.py index f2ee1b1..917969b 100644 --- a/mesonbuild/interpreterbase/helpers.py +++ b/mesonbuild/interpreterbase/helpers.py @@ -25,7 +25,7 @@ if T.TYPE_CHECKING: from .baseobjects import TYPE_var, TYPE_kwargs, SubProject def flatten(args: T.Union['TYPE_var', T.List['TYPE_var']]) -> T.List['TYPE_var']: - if isinstance(args, mparser.StringNode): + if isinstance(args, mparser.BaseStringNode): assert isinstance(args.value, str) return [args.value] if not isinstance(args, collections.abc.Sequence): @@ -35,7 +35,7 @@ def flatten(args: T.Union['TYPE_var', T.List['TYPE_var']]) -> T.List['TYPE_var'] if isinstance(a, list): rest = flatten(a) result = result + rest - elif isinstance(a, mparser.StringNode): + elif isinstance(a, mparser.BaseStringNode): result.append(a.value) else: result.append(a) diff --git a/mesonbuild/interpreterbase/interpreterbase.py b/mesonbuild/interpreterbase/interpreterbase.py index 902f84a..ea6e37c 100644 --- a/mesonbuild/interpreterbase/interpreterbase.py +++ b/mesonbuild/interpreterbase/interpreterbase.py @@ -137,7 +137,7 @@ class InterpreterBase: if not self.ast.lines: raise InvalidCode('No statements in code.') first = self.ast.lines[0] - if not isinstance(first, mparser.FunctionNode) or first.func_name != 'project': + if not isinstance(first, mparser.FunctionNode) or first.func_name.value != 'project': p = pathlib.Path(self.source_root).resolve() found = p for parent in p.parents: @@ -192,12 +192,19 @@ class InterpreterBase: self.current_node = cur if isinstance(cur, mparser.FunctionNode): return self.function_call(cur) + elif isinstance(cur, mparser.PlusAssignmentNode): + self.evaluate_plusassign(cur) elif isinstance(cur, mparser.AssignmentNode): self.assignment(cur) elif isinstance(cur, mparser.MethodNode): return self.method_call(cur) - elif isinstance(cur, mparser.StringNode): - return self._holderify(cur.value) + elif isinstance(cur, mparser.BaseStringNode): + if isinstance(cur, mparser.MultilineFormatStringNode): + return self.evaluate_multiline_fstring(cur) + elif isinstance(cur, mparser.FormatStringNode): + return self.evaluate_fstring(cur) + else: + return self._holderify(cur.value) elif isinstance(cur, mparser.BooleanNode): return self._holderify(cur.value) elif isinstance(cur, mparser.IfClauseNode): @@ -224,21 +231,16 @@ class InterpreterBase: return self.evaluate_arithmeticstatement(cur) elif isinstance(cur, mparser.ForeachClauseNode): self.evaluate_foreach(cur) - elif isinstance(cur, mparser.PlusAssignmentNode): - self.evaluate_plusassign(cur) elif isinstance(cur, mparser.IndexNode): return self.evaluate_indexing(cur) elif isinstance(cur, mparser.TernaryNode): return self.evaluate_ternary(cur) - elif isinstance(cur, mparser.FormatStringNode): - if isinstance(cur, mparser.MultilineFormatStringNode): - return self.evaluate_multiline_fstring(cur) - else: - return self.evaluate_fstring(cur) elif isinstance(cur, mparser.ContinueNode): raise ContinueRequest() elif isinstance(cur, mparser.BreakNode): raise BreakRequest() + elif isinstance(cur, mparser.ParenthesizedNode): + return self.evaluate_statement(cur.inner) elif isinstance(cur, mparser.TestCaseClauseNode): return self.evaluate_testcase(cur) else: @@ -254,7 +256,7 @@ class InterpreterBase: @FeatureNew('dict', '0.47.0') def evaluate_dictstatement(self, cur: mparser.DictNode) -> InterpreterObject: def resolve_key(key: mparser.BaseNode) -> str: - if not isinstance(key, mparser.StringNode): + if not isinstance(key, mparser.BaseStringNode): FeatureNew.single_use('Dictionary entry using non literal key', '0.53.0', self.subproject) key_holder = self.evaluate_statement(key) if key_holder is None: @@ -301,7 +303,7 @@ class InterpreterBase: mesonlib.project_meson_versions[self.subproject] = prev_meson_version return None if not isinstance(node.elseblock, mparser.EmptyNode): - self.evaluate_codeblock(node.elseblock) + self.evaluate_codeblock(node.elseblock.block) return None def evaluate_testcase(self, node: mparser.TestCaseClauseNode) -> T.Optional[Disabler]: @@ -426,9 +428,7 @@ class InterpreterBase: return self.evaluate_fstring(node) @FeatureNew('format strings', '0.58.0') - def evaluate_fstring(self, node: mparser.FormatStringNode) -> InterpreterObject: - assert isinstance(node, mparser.FormatStringNode) - + def evaluate_fstring(self, node: T.Union[mparser.FormatStringNode, mparser.MultilineFormatStringNode]) -> InterpreterObject: def replace(match: T.Match[str]) -> str: var = str(match.group(1)) try: @@ -459,14 +459,14 @@ class InterpreterBase: if tsize is None: if isinstance(i, tuple): raise mesonlib.MesonBugException(f'Iteration of {items} returned a tuple even though iter_tuple_size() is None') - self.set_variable(node.varnames[0], self._holderify(i)) + self.set_variable(node.varnames[0].value, self._holderify(i)) else: if not isinstance(i, tuple): raise mesonlib.MesonBugException(f'Iteration of {items} did not return a tuple even though iter_tuple_size() is {tsize}') if len(i) != tsize: raise mesonlib.MesonBugException(f'Iteration of {items} did not return a tuple even though iter_tuple_size() is {tsize}') for j in range(tsize): - self.set_variable(node.varnames[j], self._holderify(i[j])) + self.set_variable(node.varnames[j].value, self._holderify(i[j])) try: self.evaluate_codeblock(node.block) except ContinueRequest: @@ -476,7 +476,7 @@ class InterpreterBase: def evaluate_plusassign(self, node: mparser.PlusAssignmentNode) -> None: assert isinstance(node, mparser.PlusAssignmentNode) - varname = node.var_name + varname = node.var_name.value addition = self.evaluate_statement(node.value) if addition is None: raise InvalidCodeOnVoid('plus assign') @@ -504,7 +504,7 @@ class InterpreterBase: return self._holderify(iobject.operator_call(MesonOperator.INDEX, index)) def function_call(self, node: mparser.FunctionNode) -> T.Optional[InterpreterObject]: - func_name = node.func_name + func_name = node.func_name.value (h_posargs, h_kwargs) = self.reduce_arguments(node.args) (posargs, kwargs) = self._unholder_args(h_posargs, h_kwargs) if is_disabled(posargs, kwargs) and func_name not in {'get_variable', 'set_variable', 'unset_variable', 'is_disabler'}: @@ -532,7 +532,7 @@ class InterpreterBase: else: object_display_name = invocable.__class__.__name__ obj = self.evaluate_statement(invocable) - method_name = node.name + method_name = node.name.value (h_args, h_kwargs) = self.reduce_arguments(node.args) (args, kwargs) = self._unholder_args(h_args, h_kwargs) if is_disabled(args, kwargs): @@ -628,7 +628,7 @@ class InterpreterBase: Tried to assign values inside an argument list. To specify a keyword argument, use : instead of =. ''')) - var_name = node.var_name + var_name = node.var_name.value if not isinstance(var_name, str): raise InvalidArguments('Tried to assign value to a non-variable.') value = self.evaluate_statement(node.value) diff --git a/mesonbuild/mintro.py b/mesonbuild/mintro.py index 0391535..7bd4205 100644 --- a/mesonbuild/mintro.py +++ b/mesonbuild/mintro.py @@ -36,7 +36,7 @@ from .dependencies import Dependency from . import environment from .interpreterbase import ObjectHolder from .mesonlib import OptionKey -from .mparser import FunctionNode, ArrayNode, ArgumentNode, StringNode +from .mparser import FunctionNode, ArrayNode, ArgumentNode, BaseStringNode if T.TYPE_CHECKING: import argparse @@ -187,14 +187,14 @@ def list_targets_from_source(intr: IntrospectionInterpreter) -> T.List[T.Dict[st args: T.List[BaseNode] = [] if isinstance(n, FunctionNode): args = list(n.args.arguments) - if n.func_name in BUILD_TARGET_FUNCTIONS: + if n.func_name.value in BUILD_TARGET_FUNCTIONS: args.pop(0) elif isinstance(n, ArrayNode): args = n.args.arguments elif isinstance(n, ArgumentNode): args = n.arguments for j in args: - if isinstance(j, StringNode): + if isinstance(j, BaseStringNode): assert isinstance(j.value, str) res += [Path(j.value)] elif isinstance(j, str): diff --git a/mesonbuild/mparser.py b/mesonbuild/mparser.py index fb4e433..0f63c9e 100644 --- a/mesonbuild/mparser.py +++ b/mesonbuild/mparser.py @@ -26,6 +26,8 @@ if T.TYPE_CHECKING: from .ast import AstVisitor + BaseNodeT = T.TypeVar('BaseNodeT', bound='BaseNode') + # This is the regex for the supported escape sequences of a regular string # literal, like 'abc\x00' ESCAPE_SEQUENCE_SINGLE_RE = re.compile(r''' @@ -114,12 +116,12 @@ class Lexer: self.keywords.update({'testcase', 'endtestcase'}) self.token_specification = [ # Need to be sorted longest to shortest. - ('ignore', re.compile(r'[ \t]')), + ('whitespace', re.compile(r'[ \t]+')), ('multiline_fstring', re.compile(r"f'''(.|\n)*?'''", re.M)), ('fstring', re.compile(r"f'([^'\\]|(\\.))*'")), ('id', re.compile('[_a-zA-Z][_0-9a-zA-Z]*')), ('number', re.compile(r'0[bB][01]+|0[oO][0-7]+|0[xX][0-9a-fA-F]+|0|[1-9]\d*')), - ('eol_cont', re.compile(r'\\\n')), + ('eol_cont', re.compile(r'\\[ \t]*(#.*)?\n')), ('eol', re.compile(r'\n')), ('multiline_string', re.compile(r"'''(.|\n)*?'''", re.M)), ('comment', re.compile(r'#.*')), @@ -163,7 +165,7 @@ class Lexer: col = 0 while loc < len(self.code): matched = False - value: T.Union[str, bool, int] = None + value: str = '' for (tid, reg) in self.token_specification: mo = reg.match(self.code, loc) if mo: @@ -175,10 +177,8 @@ class Lexer: loc = mo.end() span_end = loc bytespan = (span_start, span_end) - match_text = mo.group() - if tid in {'ignore', 'comment'}: - break - elif tid == 'lparen': + value = mo.group() + if tid == 'lparen': par_count += 1 elif tid == 'rparen': par_count -= 1 @@ -193,48 +193,34 @@ class Lexer: elif tid == 'dblquote': raise ParseException('Double quotes are not supported. Use single quotes.', self.getline(line_start), lineno, col) elif tid in {'string', 'fstring'}: - # Handle here and not on the regexp to give a better error message. - if match_text.find("\n") != -1: + if value.find("\n") != -1: msg = ("Newline character in a string detected, use ''' (three single quotes) " "for multiline strings instead.\n" "This will become a hard error in a future Meson release.") mlog.warning(mlog.code_line(msg, self.getline(line_start), col), location=BaseNode(lineno, col, filename)) - value = match_text[2 if tid == 'fstring' else 1:-1] - value = ESCAPE_SEQUENCE_SINGLE_RE.sub(decode_match, value) + value = value[2 if tid == 'fstring' else 1:-1] elif tid in {'multiline_string', 'multiline_fstring'}: - # For multiline strings, parse out the value and pass - # through the normal string logic. - # For multiline format strings, we have to emit a - # different AST node so we can add a feature check, - # but otherwise, it follows the normal fstring logic. - if tid == 'multiline_string': - value = match_text[3:-3] - tid = 'string' - else: - value = match_text[4:-3] - lines = match_text.split('\n') + value = value[4 if tid == 'multiline_fstring' else 3:-3] + lines = value.split('\n') if len(lines) > 1: lineno += len(lines) - 1 line_start = mo.end() - len(lines[-1]) - elif tid == 'number': - value = int(match_text, base=0) elif tid == 'eol_cont': lineno += 1 line_start = loc - break + tid = 'whitespace' elif tid == 'eol': lineno += 1 line_start = loc if par_count > 0 or bracket_count > 0 or curl_count > 0: - break + tid = 'whitespace' elif tid == 'id': - if match_text in self.keywords: - tid = match_text + if value in self.keywords: + tid = value else: - if match_text in self.future_keywords: - mlog.warning(f"Identifier '{match_text}' will become a reserved keyword in a future release. Please rename it.", + if value in self.future_keywords: + mlog.warning(f"Identifier '{value}' will become a reserved keyword in a future release. Please rename it.", location=BaseNode(lineno, col, filename)) - value = match_text yield Token(tid, filename, curline_start, curline, col, bytespan, value) break if not matched: @@ -247,13 +233,16 @@ class BaseNode: filename: str = field(hash=False) end_lineno: int = field(hash=False) end_colno: int = field(hash=False) + whitespaces: T.Optional[WhitespaceNode] = field(hash=False) - def __init__(self, lineno: int, colno: int, filename: str, end_lineno: T.Optional[int] = None, end_colno: T.Optional[int] = None) -> None: + def __init__(self, lineno: int, colno: int, filename: str, + end_lineno: T.Optional[int] = None, end_colno: T.Optional[int] = None) -> None: self.lineno = lineno self.colno = colno self.filename = filename self.end_lineno = end_lineno if end_lineno is not None else lineno self.end_colno = end_colno if end_colno is not None else colno + self.whitespaces = None # Attributes for the visitors self.level = 0 @@ -267,6 +256,26 @@ class BaseNode: if callable(func): func(self) + def append_whitespaces(self, token: Token) -> None: + if self.whitespaces is None: + self.whitespaces = WhitespaceNode(token) + else: + self.whitespaces.append(token) + + +@dataclass(unsafe_hash=True) +class WhitespaceNode(BaseNode): + + value: str + + def __init__(self, token: Token[str]): + super().__init__(token.lineno, token.colno, token.filename) + self.value = '' + self.append(token) + + def append(self, token: Token[str]) -> None: + self.value += token.value + @dataclass(unsafe_hash=True) class ElementaryNode(T.Generic[TV_TokenTypes], BaseNode): @@ -282,23 +291,44 @@ class BooleanNode(ElementaryNode[bool]): pass class IdNode(ElementaryNode[str]): - def __str__(self) -> str: - return "Id node: '%s' (%d, %d)." % (self.value, self.lineno, self.colno) + pass +@dataclass(unsafe_hash=True) class NumberNode(ElementaryNode[int]): + + raw_value: str = field(hash=False) + + def __init__(self, token: Token[str]): + BaseNode.__init__(self, token.lineno, token.colno, token.filename) + self.raw_value = token.value + self.value = int(token.value, base=0) + self.bytespan = token.bytespan + +class BaseStringNode(ElementaryNode[str]): pass -class StringNode(ElementaryNode[str]): - def __str__(self) -> str: - return "String node: '%s' (%d, %d)." % (self.value, self.lineno, self.colno) +@dataclass(unsafe_hash=True) +class StringNode(BaseStringNode): + + raw_value: str = field(hash=False) -class FormatStringNode(ElementaryNode[str]): - def __str__(self) -> str: - return f"Format string node: '{self.value}' ({self.lineno}, {self.colno})." + def __init__(self, token: Token[str], escape: bool = True): + super().__init__(token) + self.value = ESCAPE_SEQUENCE_SINGLE_RE.sub(decode_match, token.value) if escape else token.value + self.raw_value = token.value -class MultilineFormatStringNode(FormatStringNode): - def __str__(self) -> str: - return f"Multiline Format string node: '{self.value}' ({self.lineno}, {self.colno})." +class FormatStringNode(StringNode): + pass + +@dataclass(unsafe_hash=True) +class MultilineStringNode(BaseStringNode): + + def __init__(self, token: Token[str]): + super().__init__(token) + self.value = token.value + +class MultilineFormatStringNode(MultilineStringNode): + pass class ContinueNode(ElementaryNode): pass @@ -306,17 +336,22 @@ class ContinueNode(ElementaryNode): class BreakNode(ElementaryNode): pass +class SymbolNode(ElementaryNode[str]): + pass + @dataclass(unsafe_hash=True) class ArgumentNode(BaseNode): arguments: T.List[BaseNode] = field(hash=False) - commas: T.List[Token] = field(hash=False) + commas: T.List[SymbolNode] = field(hash=False) + columns: T.List[SymbolNode] = field(hash=False) kwargs: T.Dict[BaseNode, BaseNode] = field(hash=False) def __init__(self, token: Token[TV_TokenTypes]): super().__init__(token.lineno, token.colno, token.filename) self.arguments = [] self.commas = [] + self.columns = [] self.kwargs = {} self.order_error = False @@ -356,227 +391,277 @@ class ArgumentNode(BaseNode): @dataclass(unsafe_hash=True) class ArrayNode(BaseNode): + lbracket: SymbolNode args: ArgumentNode + rbracket: SymbolNode - def __init__(self, args: ArgumentNode, lineno: int, colno: int, end_lineno: int, end_colno: int): - super().__init__(lineno, colno, args.filename, end_lineno=end_lineno, end_colno=end_colno) + def __init__(self, lbracket: SymbolNode, args: ArgumentNode, rbracket: SymbolNode): + super().__init__(lbracket.lineno, lbracket.colno, args.filename, end_lineno=rbracket.lineno, end_colno=rbracket.colno+1) + self.lbracket = lbracket self.args = args + self.rbracket = rbracket @dataclass(unsafe_hash=True) class DictNode(BaseNode): + lcurl: SymbolNode args: ArgumentNode + rcurl: SymbolNode - def __init__(self, args: ArgumentNode, lineno: int, colno: int, end_lineno: int, end_colno: int): - super().__init__(lineno, colno, args.filename, end_lineno=end_lineno, end_colno=end_colno) + def __init__(self, lcurl: SymbolNode, args: ArgumentNode, rcurl: SymbolNode): + super().__init__(lcurl.lineno, lcurl.colno, args.filename, end_lineno=rcurl.lineno, end_colno=rcurl.colno+1) + self.lcurl = lcurl self.args = args + self.rcurl = rcurl class EmptyNode(BaseNode): pass @dataclass(unsafe_hash=True) -class OrNode(BaseNode): +class BinaryOperatorNode(BaseNode): left: BaseNode + operator: SymbolNode right: BaseNode - def __init__(self, left: BaseNode, right: BaseNode): + def __init__(self, left: BaseNode, operator: SymbolNode, right: BaseNode): super().__init__(left.lineno, left.colno, left.filename) self.left = left + self.operator = operator self.right = right -@dataclass(unsafe_hash=True) -class AndNode(BaseNode): - - left: BaseNode - right: BaseNode +class OrNode(BinaryOperatorNode): + pass - def __init__(self, left: BaseNode, right: BaseNode): - super().__init__(left.lineno, left.colno, left.filename) - self.left = left - self.right = right +class AndNode(BinaryOperatorNode): + pass @dataclass(unsafe_hash=True) -class ComparisonNode(BaseNode): +class ComparisonNode(BinaryOperatorNode): - left: BaseNode - right: BaseNode ctype: COMPARISONS - def __init__(self, ctype: COMPARISONS, left: BaseNode, right: BaseNode): - super().__init__(left.lineno, left.colno, left.filename) - self.left = left - self.right = right + def __init__(self, ctype: COMPARISONS, left: BaseNode, operator: SymbolNode, right: BaseNode): + super().__init__(left, operator, right) self.ctype = ctype @dataclass(unsafe_hash=True) -class ArithmeticNode(BaseNode): +class ArithmeticNode(BinaryOperatorNode): - left: BaseNode - right: BaseNode # TODO: use a Literal for operation operation: str - def __init__(self, operation: str, left: BaseNode, right: BaseNode): - super().__init__(left.lineno, left.colno, left.filename) - self.left = left - self.right = right + def __init__(self, operation: str, left: BaseNode, operator: SymbolNode, right: BaseNode): + super().__init__(left, operator, right) self.operation = operation - @dataclass(unsafe_hash=True) -class NotNode(BaseNode): +class UnaryOperatorNode(BaseNode): + operator: SymbolNode value: BaseNode - def __init__(self, token: Token[TV_TokenTypes], value: BaseNode): + def __init__(self, token: Token[TV_TokenTypes], operator: SymbolNode, value: BaseNode): super().__init__(token.lineno, token.colno, token.filename) + self.operator = operator self.value = value +class NotNode(UnaryOperatorNode): + pass + +class UMinusNode(UnaryOperatorNode): + pass + @dataclass(unsafe_hash=True) class CodeBlockNode(BaseNode): + pre_whitespaces: T.Optional[WhitespaceNode] = field(hash=False) lines: T.List[BaseNode] = field(hash=False) def __init__(self, token: Token[TV_TokenTypes]): super().__init__(token.lineno, token.colno, token.filename) + self.pre_whitespaces = None self.lines = [] + def append_whitespaces(self, token: Token) -> None: + if self.lines: + self.lines[-1].append_whitespaces(token) + elif self.pre_whitespaces is None: + self.pre_whitespaces = WhitespaceNode(token) + else: + self.pre_whitespaces.append(token) + @dataclass(unsafe_hash=True) class IndexNode(BaseNode): iobject: BaseNode + lbracket: SymbolNode index: BaseNode + rbracket: SymbolNode - def __init__(self, iobject: BaseNode, index: BaseNode): + def __init__(self, iobject: BaseNode, lbracket: SymbolNode, index: BaseNode, rbracket: SymbolNode): super().__init__(iobject.lineno, iobject.colno, iobject.filename) self.iobject = iobject + self.lbracket = lbracket self.index = index + self.rbracket = rbracket @dataclass(unsafe_hash=True) class MethodNode(BaseNode): source_object: BaseNode - name: str + dot: SymbolNode + name: IdNode + lpar: SymbolNode args: ArgumentNode + rpar: SymbolNode - def __init__(self, filename: str, lineno: int, colno: int, source_object: BaseNode, name: str, args: ArgumentNode): - super().__init__(lineno, colno, filename) + def __init__(self, source_object: BaseNode, dot: SymbolNode, name: IdNode, lpar: SymbolNode, args: ArgumentNode, rpar: SymbolNode): + super().__init__(name.lineno, name.colno, name.filename, end_lineno=rpar.lineno, end_colno=rpar.colno+1) self.source_object = source_object + self.dot = dot self.name = name - assert isinstance(self.name, str) + self.lpar = lpar self.args = args + self.rpar = rpar @dataclass(unsafe_hash=True) class FunctionNode(BaseNode): - func_name: str + func_name: IdNode + lpar: SymbolNode args: ArgumentNode + rpar: SymbolNode - def __init__(self, filename: str, lineno: int, colno: int, end_lineno: int, end_colno: int, func_name: str, args: ArgumentNode): - super().__init__(lineno, colno, filename, end_lineno=end_lineno, end_colno=end_colno) + def __init__(self, func_name: IdNode, lpar: SymbolNode, args: ArgumentNode, rpar: SymbolNode): + super().__init__(func_name.lineno, func_name.colno, func_name.filename, end_lineno=rpar.end_lineno, end_colno=rpar.end_colno+1) self.func_name = func_name - assert isinstance(func_name, str) + self.lpar = lpar self.args = args - + self.rpar = rpar @dataclass(unsafe_hash=True) class AssignmentNode(BaseNode): - var_name: str + var_name: IdNode + operator: SymbolNode value: BaseNode - def __init__(self, filename: str, lineno: int, colno: int, var_name: str, value: BaseNode): - super().__init__(lineno, colno, filename) + def __init__(self, var_name: IdNode, operator: SymbolNode, value: BaseNode): + super().__init__(var_name.lineno, var_name.colno, var_name.filename) self.var_name = var_name - assert isinstance(var_name, str) - self.value = value - - -@dataclass(unsafe_hash=True) -class PlusAssignmentNode(BaseNode): - - var_name: str - value: BaseNode - - def __init__(self, filename: str, lineno: int, colno: int, var_name: str, value: BaseNode): - super().__init__(lineno, colno, filename) - self.var_name = var_name - assert isinstance(var_name, str) + self.operator = operator self.value = value +class PlusAssignmentNode(AssignmentNode): + pass @dataclass(unsafe_hash=True) class ForeachClauseNode(BaseNode): - varnames: T.List[str] = field(hash=False) + foreach_: SymbolNode = field(hash=False) + varnames: T.List[IdNode] = field(hash=False) + commas: T.List[SymbolNode] = field(hash=False) + column: SymbolNode = field(hash=False) items: BaseNode block: CodeBlockNode + endforeach: SymbolNode = field(hash=False) - def __init__(self, token: Token, varnames: T.List[str], items: BaseNode, block: CodeBlockNode): - super().__init__(token.lineno, token.colno, token.filename) + def __init__(self, foreach_: SymbolNode, varnames: T.List[IdNode], commas: T.List[SymbolNode], column: SymbolNode, items: BaseNode, block: CodeBlockNode, endforeach: SymbolNode): + super().__init__(foreach_.lineno, foreach_.colno, foreach_.filename) + self.foreach_ = foreach_ self.varnames = varnames + self.commas = commas + self.column = column self.items = items self.block = block + self.endforeach = endforeach @dataclass(unsafe_hash=True) class IfNode(BaseNode): + if_: SymbolNode condition: BaseNode block: CodeBlockNode - def __init__(self, linenode: BaseNode, condition: BaseNode, block: CodeBlockNode): + def __init__(self, linenode: BaseNode, if_node: SymbolNode, condition: BaseNode, block: CodeBlockNode): super().__init__(linenode.lineno, linenode.colno, linenode.filename) + self.if_ = if_node self.condition = condition self.block = block +@dataclass(unsafe_hash=True) +class ElseNode(BaseNode): + + else_: SymbolNode + block: CodeBlockNode + + def __init__(self, else_: SymbolNode, block: CodeBlockNode): + super().__init__(block.lineno, block.colno, block.filename) + self.else_ = else_ + self.block = block @dataclass(unsafe_hash=True) class IfClauseNode(BaseNode): ifs: T.List[IfNode] = field(hash=False) - elseblock: T.Union[EmptyNode, CodeBlockNode] + elseblock: T.Union[EmptyNode, ElseNode] + endif: SymbolNode def __init__(self, linenode: BaseNode): super().__init__(linenode.lineno, linenode.colno, linenode.filename) self.ifs = [] - self.elseblock = None + self.elseblock = EmptyNode(linenode.lineno, linenode.colno, linenode.filename) + self.endif = None @dataclass(unsafe_hash=True) class TestCaseClauseNode(BaseNode): + testcase: SymbolNode condition: BaseNode block: CodeBlockNode + endtestcase: SymbolNode - def __init__(self, condition: BaseNode, block: CodeBlockNode): + def __init__(self, testcase: SymbolNode, condition: BaseNode, block: CodeBlockNode, endtestcase: SymbolNode): super().__init__(condition.lineno, condition.colno, condition.filename) + self.testcase = testcase self.condition = condition self.block = block - -@dataclass(unsafe_hash=True) -class UMinusNode(BaseNode): - - value: BaseNode - - def __init__(self, current_location: Token, value: BaseNode): - super().__init__(current_location.lineno, current_location.colno, current_location.filename) - self.value = value - + self.endtestcase = endtestcase @dataclass(unsafe_hash=True) class TernaryNode(BaseNode): condition: BaseNode + questionmark: SymbolNode trueblock: BaseNode + column: SymbolNode falseblock: BaseNode - def __init__(self, condition: BaseNode, trueblock: BaseNode, falseblock: BaseNode): + def __init__(self, condition: BaseNode, questionmark: SymbolNode, trueblock: BaseNode, column: SymbolNode, falseblock: BaseNode): super().__init__(condition.lineno, condition.colno, condition.filename) self.condition = condition + self.questionmark = questionmark self.trueblock = trueblock + self.column = column self.falseblock = falseblock + +@dataclass(unsafe_hash=True) +class ParenthesizedNode(BaseNode): + + lpar: SymbolNode = field(hash=False) + inner: BaseNode + rpar: SymbolNode = field(hash=False) + + def __init__(self, lpar: SymbolNode, inner: BaseNode, rpar: SymbolNode): + super().__init__(lpar.lineno, lpar.colno, inner.filename, end_lineno=rpar.lineno, end_colno=rpar.colno+1) + self.lpar = lpar + self.inner = inner + self.rpar = rpar + + if T.TYPE_CHECKING: COMPARISONS = Literal['==', '!=', '<', '<=', '>=', '>', 'in', 'notin'] @@ -611,12 +696,30 @@ class Parser: self.lexer = Lexer(code) self.stream = self.lexer.lex(filename) self.current: Token = Token('eof', '', 0, 0, 0, (0, 0), None) + self.previous = self.current + self.current_ws: T.List[Token] = [] + self.getsym() self.in_ternary = False + def create_node(self, node_type: T.Type[BaseNodeT], *args: T.Any, **kwargs: T.Any) -> BaseNodeT: + node = node_type(*args, **kwargs) + for ws_token in self.current_ws: + node.append_whitespaces(ws_token) + self.current_ws = [] + return node + def getsym(self) -> None: + self.previous = self.current try: self.current = next(self.stream) + + while self.current.tid in {'eol', 'comment', 'whitespace'}: + self.current_ws.append(self.current) + if self.current.tid == 'eol': + break + self.current = next(self.stream) + except StopIteration: self.current = Token('eof', '', self.current.line_start, self.current.lineno, self.current.colno + self.current.bytespan[1] - self.current.bytespan[0], (0, 0), None) @@ -661,55 +764,75 @@ class Parser: def e1(self) -> BaseNode: left = self.e2() if self.accept('plusassign'): + operator = self.create_node(SymbolNode, self.previous) value = self.e1() if not isinstance(left, IdNode): raise ParseException('Plusassignment target must be an id.', self.getline(), left.lineno, left.colno) assert isinstance(left.value, str) - return PlusAssignmentNode(left.filename, left.lineno, left.colno, left.value, value) + return self.create_node(PlusAssignmentNode, left, operator, value) elif self.accept('assign'): + operator = self.create_node(SymbolNode, self.previous) value = self.e1() if not isinstance(left, IdNode): raise ParseException('Assignment target must be an id.', self.getline(), left.lineno, left.colno) assert isinstance(left.value, str) - return AssignmentNode(left.filename, left.lineno, left.colno, left.value, value) + return self.create_node(AssignmentNode, left, operator, value) elif self.accept('questionmark'): if self.in_ternary: raise ParseException('Nested ternary operators are not allowed.', self.getline(), left.lineno, left.colno) + + qm_node = self.create_node(SymbolNode, self.previous) self.in_ternary = True trueblock = self.e1() self.expect('colon') + column_node = self.create_node(SymbolNode, self.previous) falseblock = self.e1() self.in_ternary = False - return TernaryNode(left, trueblock, falseblock) + return self.create_node(TernaryNode, left, qm_node, trueblock, column_node, falseblock) return left def e2(self) -> BaseNode: left = self.e3() while self.accept('or'): + operator = self.create_node(SymbolNode, self.previous) if isinstance(left, EmptyNode): raise ParseException('Invalid or clause.', self.getline(), left.lineno, left.colno) - left = OrNode(left, self.e3()) + left = self.create_node(OrNode, left, operator, self.e3()) return left def e3(self) -> BaseNode: left = self.e4() while self.accept('and'): + operator = self.create_node(SymbolNode, self.previous) if isinstance(left, EmptyNode): raise ParseException('Invalid and clause.', self.getline(), left.lineno, left.colno) - left = AndNode(left, self.e4()) + left = self.create_node(AndNode, left, operator, self.e4()) return left def e4(self) -> BaseNode: left = self.e5() for nodename, operator_type in comparison_map.items(): if self.accept(nodename): - return ComparisonNode(operator_type, left, self.e5()) - if self.accept('not') and self.accept('in'): - return ComparisonNode('notin', left, self.e5()) + operator = self.create_node(SymbolNode, self.previous) + return self.create_node(ComparisonNode, operator_type, left, operator, self.e5()) + if self.accept('not'): + ws = self.current_ws.copy() + not_token = self.previous + if self.accept('in'): + in_token = self.previous + self.current_ws = self.current_ws[len(ws):] # remove whitespaces between not and in + temp_node = EmptyNode(in_token.lineno, in_token.colno, in_token.filename) + for w in ws: + temp_node.append_whitespaces(w) + + not_token.bytespan = (not_token.bytespan[0], in_token.bytespan[1]) + not_token.value += temp_node.whitespaces.value + in_token.value + operator = self.create_node(SymbolNode, not_token) + return self.create_node(ComparisonNode, 'notin', left, operator, self.e5()) return left def e5(self) -> BaseNode: @@ -724,7 +847,8 @@ class Parser: while True: op = self.accept_any(tuple(op_map.keys())) if op: - left = ArithmeticNode(op_map[op], left, self.e5muldiv()) + operator = self.create_node(SymbolNode, self.previous) + left = self.create_node(ArithmeticNode, op_map[op], left, operator, self.e5muldiv()) else: break return left @@ -739,29 +863,34 @@ class Parser: while True: op = self.accept_any(tuple(op_map.keys())) if op: - left = ArithmeticNode(op_map[op], left, self.e6()) + operator = self.create_node(SymbolNode, self.previous) + left = self.create_node(ArithmeticNode, op_map[op], left, operator, self.e6()) else: break return left def e6(self) -> BaseNode: if self.accept('not'): - return NotNode(self.current, self.e7()) + operator = self.create_node(SymbolNode, self.previous) + return self.create_node(NotNode, self.current, operator, self.e7()) if self.accept('dash'): - return UMinusNode(self.current, self.e7()) + operator = self.create_node(SymbolNode, self.previous) + return self.create_node(UMinusNode, self.current, operator, self.e7()) return self.e7() def e7(self) -> BaseNode: left = self.e8() block_start = self.current if self.accept('lparen'): + lpar = self.create_node(SymbolNode, block_start) args = self.args() self.block_expect('rparen', block_start) + rpar = self.create_node(SymbolNode, self.previous) if not isinstance(left, IdNode): raise ParseException('Function call must be applied to plain id', self.getline(), left.lineno, left.colno) assert isinstance(left.value, str) - left = FunctionNode(left.filename, left.lineno, left.colno, self.current.lineno, self.current.colno, left.value, args) + left = self.create_node(FunctionNode, left, lpar, args, rpar) go_again = True while go_again: go_again = False @@ -776,17 +905,23 @@ class Parser: def e8(self) -> BaseNode: block_start = self.current if self.accept('lparen'): + lpar = self.create_node(SymbolNode, block_start) e = self.statement() self.block_expect('rparen', block_start) - return e + rpar = self.create_node(SymbolNode, self.previous) + return ParenthesizedNode(lpar, e, rpar) elif self.accept('lbracket'): + lbracket = self.create_node(SymbolNode, block_start) args = self.args() self.block_expect('rbracket', block_start) - return ArrayNode(args, block_start.lineno, block_start.colno, self.current.lineno, self.current.colno) + rbracket = self.create_node(SymbolNode, self.previous) + return self.create_node(ArrayNode, lbracket, args, rbracket) elif self.accept('lcurl'): + lcurl = self.create_node(SymbolNode, block_start) key_values = self.key_values() self.block_expect('rcurl', block_start) - return DictNode(key_values, block_start.lineno, block_start.colno, self.current.lineno, self.current.colno) + rcurl = self.create_node(SymbolNode, self.previous) + return self.create_node(DictNode, lcurl, key_values, rcurl) else: return self.e9() @@ -794,33 +929,35 @@ class Parser: t = self.current if self.accept('true'): t.value = True - return BooleanNode(t) + return self.create_node(BooleanNode, t) if self.accept('false'): t.value = False - return BooleanNode(t) + return self.create_node(BooleanNode, t) if self.accept('id'): - return IdNode(t) + return self.create_node(IdNode, t) if self.accept('number'): - return NumberNode(t) + return self.create_node(NumberNode, t) if self.accept('string'): - return StringNode(t) + return self.create_node(StringNode, t) if self.accept('fstring'): - return FormatStringNode(t) + return self.create_node(FormatStringNode, t) + if self.accept('multiline_string'): + return self.create_node(MultilineStringNode, t) if self.accept('multiline_fstring'): - return MultilineFormatStringNode(t) + return self.create_node(MultilineFormatStringNode, t) return EmptyNode(self.current.lineno, self.current.colno, self.current.filename) def key_values(self) -> ArgumentNode: s = self.statement() - a = ArgumentNode(self.current) + a = self.create_node(ArgumentNode, self.current) while not isinstance(s, EmptyNode): if self.accept('colon'): + a.columns.append(self.create_node(SymbolNode, self.previous)) a.set_kwarg_no_check(s, self.statement()) - potential = self.current if not self.accept('comma'): return a - a.commas.append(potential) + a.commas.append(self.create_node(SymbolNode, self.previous)) else: raise ParseException('Only key:value pairs are valid in dict construction.', self.getline(), s.lineno, s.colno) @@ -829,22 +966,21 @@ class Parser: def args(self) -> ArgumentNode: s = self.statement() - a = ArgumentNode(self.current) + a = self.create_node(ArgumentNode, self.current) while not isinstance(s, EmptyNode): - potential = self.current if self.accept('comma'): - a.commas.append(potential) + a.commas.append(self.create_node(SymbolNode, self.previous)) a.append(s) elif self.accept('colon'): + a.columns.append(self.create_node(SymbolNode, self.previous)) if not isinstance(s, IdNode): raise ParseException('Dictionary key must be a plain identifier.', self.getline(), s.lineno, s.colno) a.set_kwarg(s, self.statement()) - potential = self.current if not self.accept('comma'): return a - a.commas.append(potential) + a.commas.append(self.create_node(SymbolNode, self.previous)) else: a.append(s) return a @@ -852,70 +988,87 @@ class Parser: return a def method_call(self, source_object: BaseNode) -> MethodNode: + dot = self.create_node(SymbolNode, self.previous) methodname = self.e9() if not isinstance(methodname, IdNode): + if isinstance(source_object, NumberNode) and isinstance(methodname, NumberNode): + raise ParseException('meson does not support float numbers', + self.getline(), source_object.lineno, source_object.colno) raise ParseException('Method name must be plain id', self.getline(), self.current.lineno, self.current.colno) assert isinstance(methodname.value, str) self.expect('lparen') + lpar = self.create_node(SymbolNode, self.previous) args = self.args() + rpar = self.create_node(SymbolNode, self.current) self.expect('rparen') - method = MethodNode(methodname.filename, methodname.lineno, methodname.colno, source_object, methodname.value, args) + method = self.create_node(MethodNode, source_object, dot, methodname, lpar, args, rpar) if self.accept('dot'): return self.method_call(method) return method def index_call(self, source_object: BaseNode) -> IndexNode: + lbracket = self.create_node(SymbolNode, self.previous) index_statement = self.statement() self.expect('rbracket') - return IndexNode(source_object, index_statement) + rbracket = self.create_node(SymbolNode, self.previous) + return self.create_node(IndexNode, source_object, lbracket, index_statement, rbracket) def foreachblock(self) -> ForeachClauseNode: - t = self.current + foreach_ = self.create_node(SymbolNode, self.previous) self.expect('id') - assert isinstance(t.value, str) - varname = t - varnames = [t.value] + assert isinstance(self.previous.value, str) + varnames = [self.create_node(IdNode, self.previous)] + commas = [] if self.accept('comma'): - t = self.current + commas.append(self.create_node(SymbolNode, self.previous)) self.expect('id') - assert isinstance(t.value, str) - varnames.append(t.value) + assert isinstance(self.previous.value, str) + varnames.append(self.create_node(IdNode, self.previous)) self.expect('colon') + column = self.create_node(SymbolNode, self.previous) items = self.statement() block = self.codeblock() - return ForeachClauseNode(varname, varnames, items, block) + endforeach = self.create_node(SymbolNode, self.current) + return self.create_node(ForeachClauseNode, foreach_, varnames, commas, column, items, block, endforeach) def ifblock(self) -> IfClauseNode: + if_node = self.create_node(SymbolNode, self.previous) condition = self.statement() - clause = IfClauseNode(condition) + clause = self.create_node(IfClauseNode, condition) self.expect('eol') block = self.codeblock() - clause.ifs.append(IfNode(clause, condition, block)) + clause.ifs.append(self.create_node(IfNode, clause, if_node, condition, block)) self.elseifblock(clause) clause.elseblock = self.elseblock() + clause.endif = self.create_node(SymbolNode, self.current) return clause def elseifblock(self, clause: IfClauseNode) -> None: while self.accept('elif'): + elif_ = self.create_node(SymbolNode, self.previous) s = self.statement() self.expect('eol') b = self.codeblock() - clause.ifs.append(IfNode(s, s, b)) + clause.ifs.append(self.create_node(IfNode, s, elif_, s, b)) - def elseblock(self) -> T.Union[CodeBlockNode, EmptyNode]: + def elseblock(self) -> T.Union[ElseNode, EmptyNode]: if self.accept('else'): + else_ = self.create_node(SymbolNode, self.previous) self.expect('eol') - return self.codeblock() + block = self.codeblock() + return ElseNode(else_, block) return EmptyNode(self.current.lineno, self.current.colno, self.current.filename) def testcaseblock(self) -> TestCaseClauseNode: + testcase = self.create_node(SymbolNode, self.previous) condition = self.statement() self.expect('eol') block = self.codeblock() - return TestCaseClauseNode(condition, block) + endtestcase = SymbolNode(self.current) + return self.create_node(TestCaseClauseNode, testcase, condition, block, endtestcase) def line(self) -> BaseNode: block_start = self.current @@ -930,9 +1083,9 @@ class Parser: self.block_expect('endforeach', block_start) return forblock if self.accept('continue'): - return ContinueNode(self.current) + return self.create_node(ContinueNode, self.current) if self.accept('break'): - return BreakNode(self.current) + return self.create_node(BreakNode, self.current) if self.lexer.in_unit_test and self.accept('testcase'): block = self.testcaseblock() self.block_expect('endtestcase', block_start) @@ -940,15 +1093,29 @@ class Parser: return self.statement() def codeblock(self) -> CodeBlockNode: - block = CodeBlockNode(self.current) + block = self.create_node(CodeBlockNode, self.current) cond = True + try: while cond: + for ws_token in self.current_ws: + block.append_whitespaces(ws_token) + self.current_ws = [] + curline = self.line() + if not isinstance(curline, EmptyNode): block.lines.append(curline) + cond = self.accept('eol') + except ParseException as e: e.ast = block raise + + # Remaining whitespaces will not be catched since there are no more nodes + for ws_token in self.current_ws: + block.append_whitespaces(ws_token) + self.current_ws = [] + return block diff --git a/mesonbuild/optinterpreter.py b/mesonbuild/optinterpreter.py index 8ad84aa..895cada 100644 --- a/mesonbuild/optinterpreter.py +++ b/mesonbuild/optinterpreter.py @@ -113,7 +113,9 @@ class OptionInterpreter: def reduce_single(self, arg: T.Union[str, mparser.BaseNode]) -> 'TYPE_var': if isinstance(arg, str): return arg - elif isinstance(arg, (mparser.StringNode, mparser.BooleanNode, + if isinstance(arg, mparser.ParenthesizedNode): + return self.reduce_single(arg.inner) + elif isinstance(arg, (mparser.BaseStringNode, mparser.BooleanNode, mparser.NumberNode)): return arg.value elif isinstance(arg, mparser.ArrayNode): @@ -121,7 +123,7 @@ class OptionInterpreter: elif isinstance(arg, mparser.DictNode): d = {} for k, v in arg.args.kwargs.items(): - if not isinstance(k, mparser.StringNode): + if not isinstance(k, mparser.BaseStringNode): raise OptionException('Dictionary keys must be a string literal') d[k.value] = self.reduce_single(v) return d @@ -162,7 +164,7 @@ class OptionInterpreter: def evaluate_statement(self, node: mparser.BaseNode) -> None: if not isinstance(node, mparser.FunctionNode): raise OptionException('Option file may only contain option definitions') - func_name = node.func_name + func_name = node.func_name.value if func_name != 'option': raise OptionException('Only calls to option() are allowed in option files.') (posargs, kwargs) = self.reduce_arguments(node.args) diff --git a/mesonbuild/rewriter.py b/mesonbuild/rewriter.py index a9b2e88..a86b97e 100644 --- a/mesonbuild/rewriter.py +++ b/mesonbuild/rewriter.py @@ -28,7 +28,7 @@ from .ast import IntrospectionInterpreter, BUILD_TARGET_FUNCTIONS, AstConditionL from mesonbuild.mesonlib import MesonException, setup_vsenv from . import mlog, environment from functools import wraps -from .mparser import Token, ArrayNode, ArgumentNode, AssignmentNode, BooleanNode, ElementaryNode, IdNode, FunctionNode, StringNode +from .mparser import Token, ArrayNode, ArgumentNode, AssignmentNode, BaseStringNode, BooleanNode, ElementaryNode, IdNode, FunctionNode, StringNode, SymbolNode import json, os, re, sys import typing as T @@ -104,6 +104,9 @@ class RequiredKeys: return wrapped +def _symbol(val: str) -> SymbolNode: + return SymbolNode(Token('', '', 0, 0, 0, (0, 0), val)) + class MTypeBase: def __init__(self, node: T.Optional[BaseNode] = None): if node is None: @@ -189,7 +192,7 @@ class MTypeList(MTypeBase): super().__init__(node) def _new_node(self): - return ArrayNode(ArgumentNode(Token('', '', 0, 0, 0, None, '')), 0, 0, 0, 0) + return ArrayNode(_symbol('['), ArgumentNode(Token('', '', 0, 0, 0, None, '')), _symbol(']')) def _new_element_node(self, value): # Overwrite in derived class @@ -267,12 +270,12 @@ class MTypeStrList(MTypeList): return StringNode(Token('', '', 0, 0, 0, None, str(value))) def _check_is_equal(self, node, value) -> bool: - if isinstance(node, StringNode): + if isinstance(node, BaseStringNode): return node.value == value return False def _check_regex_matches(self, node, regex: str) -> bool: - if isinstance(node, StringNode): + if isinstance(node, BaseStringNode): return re.match(regex, node.value) is not None return False @@ -292,7 +295,7 @@ class MTypeIDList(MTypeList): return False def _check_regex_matches(self, node, regex: str) -> bool: - if isinstance(node, StringNode): + if isinstance(node, BaseStringNode): return re.match(regex, node.value) is not None return False @@ -420,7 +423,7 @@ class Rewriter: if target in self.interpreter.assignments: node = self.interpreter.assignments[target] if isinstance(node, FunctionNode): - if node.func_name in {'executable', 'jar', 'library', 'shared_library', 'shared_module', 'static_library', 'both_libraries'}: + if node.func_name.value in {'executable', 'jar', 'library', 'shared_library', 'shared_module', 'static_library', 'both_libraries'}: tgt = self.interpreter.assign_vals[target] return tgt @@ -440,7 +443,7 @@ class Rewriter: if dependency in self.interpreter.assignments: node = self.interpreter.assignments[dependency] if isinstance(node, FunctionNode): - if node.func_name == 'dependency': + if node.func_name.value == 'dependency': name = self.interpreter.flatten_args(node.args)[0] dep = check_list(name) @@ -630,7 +633,7 @@ class Rewriter: args = [] if isinstance(n, FunctionNode): args = list(n.args.arguments) - if n.func_name in BUILD_TARGET_FUNCTIONS: + if n.func_name.value in BUILD_TARGET_FUNCTIONS: args.pop(0) elif isinstance(n, ArrayNode): args = n.args.arguments @@ -652,7 +655,7 @@ class Rewriter: src_list = [] for i in target['sources']: for j in arg_list_from_node(i): - if isinstance(j, StringNode): + if isinstance(j, BaseStringNode): src_list += [j.value] # Generate the new String nodes @@ -686,7 +689,7 @@ class Rewriter: def find_node(src): for i in target['sources']: for j in arg_list_from_node(i): - if isinstance(j, StringNode): + if isinstance(j, BaseStringNode): if j.value == src: return i, j return None, None @@ -728,7 +731,7 @@ class Rewriter: node = tgt_function.args.kwargs[extra_files_key] except StopIteration: # Target has no extra_files kwarg, create one - node = ArrayNode(ArgumentNode(Token('', tgt_function.filename, 0, 0, 0, None, '[]')), tgt_function.end_lineno, tgt_function.end_colno, tgt_function.end_lineno, tgt_function.end_colno) + node = ArrayNode(_symbol('['), ArgumentNode(Token('', tgt_function.filename, 0, 0, 0, None, '[]')), _symbol(']')) tgt_function.args.kwargs[IdNode(Token('string', tgt_function.filename, 0, 0, 0, None, 'extra_files'))] = node mark_array = False if tgt_function not in self.modified_nodes: @@ -745,7 +748,7 @@ class Rewriter: extra_files_list = [] for i in target['extra_files']: for j in arg_list_from_node(i): - if isinstance(j, StringNode): + if isinstance(j, BaseStringNode): extra_files_list += [j.value] # Generate the new String nodes @@ -776,7 +779,7 @@ class Rewriter: def find_node(src): for i in target['extra_files']: for j in arg_list_from_node(i): - if isinstance(j, StringNode): + if isinstance(j, BaseStringNode): if j.value == src: return i, j return None, None @@ -812,17 +815,17 @@ class Rewriter: # Build src list src_arg_node = ArgumentNode(Token('string', filename, 0, 0, 0, None, '')) - src_arr_node = ArrayNode(src_arg_node, 0, 0, 0, 0) + src_arr_node = ArrayNode(_symbol('['), src_arg_node, _symbol(']')) src_far_node = ArgumentNode(Token('string', filename, 0, 0, 0, None, '')) - src_fun_node = FunctionNode(filename, 0, 0, 0, 0, 'files', src_far_node) - src_ass_node = AssignmentNode(filename, 0, 0, source_id, src_fun_node) + src_fun_node = FunctionNode(IdNode(Token('id', filename, 0, 0, 0, (0, 0), 'files')), _symbol('('), src_far_node, _symbol(')')) + src_ass_node = AssignmentNode(IdNode(Token('id', filename, 0, 0, 0, (0, 0), source_id)), _symbol('='), src_fun_node) src_arg_node.arguments = [StringNode(Token('string', filename, 0, 0, 0, None, x)) for x in cmd['sources']] src_far_node.arguments = [src_arr_node] # Build target tgt_arg_node = ArgumentNode(Token('string', filename, 0, 0, 0, None, '')) - tgt_fun_node = FunctionNode(filename, 0, 0, 0, 0, cmd['target_type'], tgt_arg_node) - tgt_ass_node = AssignmentNode(filename, 0, 0, target_id, tgt_fun_node) + tgt_fun_node = FunctionNode(IdNode(Token('id', filename, 0, 0, 0, (0, 0), cmd['target_type'])), _symbol('('), tgt_arg_node, _symbol(')')) + tgt_ass_node = AssignmentNode(IdNode(Token('id', filename, 0, 0, 0, (0, 0), target_id)), _symbol('='), tgt_fun_node) tgt_arg_node.arguments = [ StringNode(Token('string', filename, 0, 0, 0, None, cmd['target'])), IdNode(Token('string', filename, 0, 0, 0, None, source_id)) @@ -845,12 +848,12 @@ class Rewriter: src_list = [] for i in target['sources']: for j in arg_list_from_node(i): - if isinstance(j, StringNode): + if isinstance(j, BaseStringNode): src_list += [j.value] extra_files_list = [] for i in target['extra_files']: for j in arg_list_from_node(i): - if isinstance(j, StringNode): + if isinstance(j, BaseStringNode): extra_files_list += [j.value] test_data = { 'name': target['name'], @@ -865,8 +868,8 @@ class Rewriter: alphanum_key = lambda key: [convert(c) for c in re.split('([0-9]+)', key)] path_sorter = lambda key: ([(key.count('/') <= idx, alphanum_key(x)) for idx, x in enumerate(key.split('/'))]) - unknown = [x for x in i.arguments if not isinstance(x, StringNode)] - sources = [x for x in i.arguments if isinstance(x, StringNode)] + unknown = [x for x in i.arguments if not isinstance(x, BaseStringNode)] + sources = [x for x in i.arguments if isinstance(x, BaseStringNode)] sources = sorted(sources, key=lambda x: path_sorter(x.value)) i.arguments = unknown + sources diff --git a/run_format_tests.py b/run_format_tests.py index 1f41f3d..1bf997c 100644 --- a/run_format_tests.py +++ b/run_format_tests.py @@ -63,6 +63,7 @@ def check_format() -> None: 'work area', '.eggs', '_cache', # e.g. .mypy_cache 'venv', # virtualenvs have DOS line endings + '118 rewrite', # we explicitly test for tab in meson.build file } for (root, _, filenames) in os.walk('.'): if any([x in root for x in skip_dirs]): diff --git a/test cases/unit/118 rewrite/meson.build b/test cases/unit/118 rewrite/meson.build new file mode 100644 index 0000000..7d0330b --- /dev/null +++ b/test cases/unit/118 rewrite/meson.build @@ -0,0 +1,189 @@ +# This file should expose all possible meson syntaxes + # and ensure the AstInterpreter and RawPrinter are able + + # to parse and write a file identical to the original. + + project ( # project comment 1 + # project comment 2 + 'rewrite' , # argument comment + # project comment 3 + 'cpp', + 'c', + default_options: [ + 'unity=on', + 'unity_size=50', # number of cpp / unity. default is 4... + 'warning_level=2', # eqv to /W3 + 'werror=true', # treat warnings as errors + 'b_ndebug=if-release', # disable assert in Release + 'cpp_eh=a', # /EHa exception handling + 'cpp_std=c++17', + 'cpp_winlibs=' + ','.join([ # array comment + # in array + # comment + 'kernel32.lib', + 'user32.lib', + 'gdi32.lib', + 'winspool.lib', + 'comdlg32.lib', + 'advapi32.lib', + 'shell32.lib' + # before comma comment + , + # after comma comment + 'ole32.lib', + 'oleaut32.lib', + 'uuid.lib', + 'odbc32.lib', + 'odbccp32.lib', + 'Delayimp.lib', # For delay loaded dll + 'OLDNAMES.lib', + 'dbghelp.lib', + 'psapi.lib', + ]), + ], + meson_version: '>=1.2', + version: '1.0.0', + ) # project comment 4 + +cppcoro_dep = dependency('andreasbuhr-cppcoro-cppcoro') +cppcoro = declare_dependency( + dependencies: [cppcoro_dep.partial_dependency( + includes: true, + link_args: true, + links: true, + sources: true, + )], + # '/await:strict' allows to use <coroutine> rather than <experimental/coroutine> with C++17. + # We can remove '/await:strict' once we update to C++20. + compile_args: ['/await:strict'], + # includes:true doesn't work for now in partial_dependency() + # This line could be removed once https://github.com/mesonbuild/meson/pull/10122 is released. + include_directories: cppcoro_dep.get_variable('includedir1'), +) + + +if get_option('unicode') #if comment +#if comment 2 + mfc=cpp_compiler.find_library(get_option('debug')?'mfc140ud':'mfc140u') + # if comment 3 +else#elsecommentnowhitespaces + # else comment 1 + mfc = cpp_compiler.find_library( get_option( 'debug' ) ? 'mfc140d' : 'mfc140') +# else comment 2 +endif #endif comment + + +assert(1 in [1, 2], '''1 should be in [1, 2]''') +assert(3 not in [1, 2], '''3 shouldn't be in [1, 2]''') +assert(not (3 in [1, 2]), '''3 shouldn't be in [1, 2]''') + +assert('b' in ['a', 'b'], ''''b' should be in ['a', 'b']''') +assert('c' not in ['a', 'b'], ''''c' shouldn't be in ['a', 'b']''') + +assert(exe1 in [exe1, exe2], ''''exe1 should be in [exe1, exe2]''') +assert(exe3 not in [exe1, exe2], ''''exe3 shouldn't be in [exe1, exe2]''') + +assert('a' in {'a': 'b'}, '''1 should be in {'a': 'b'}''') +assert('b'not in{'a':'b'}, '''1 should be in {'a': 'b'}''') + +assert('a'in'abc') +assert('b' not in 'def') + + +w = 'world' +d = {'a': 1, 'b': 0b10101010, 'c': 'pi', 'd': '''a +b +c''', 'e': f'hello @w@', 'f': f'''triple + formatted + string # this is not a comment + hello @w@ +''', 'g': [1, 2, 3], + + 'h' # comment a + : # comment b +0xDEADBEEF # comment c +, # comment d +'hh': 0xfeedc0de, # lowercase hexa +'hhh': 0XaBcD0123, # mixed case hexa +'oo': 0O123456, # upper O octa +'bb': 0B1111, # upper B binary +'i': {'aa': 11, # this is a comment + 'bb': 22}, # a comment inside a dict +'o': 0o754, +'m': -12, # minus number +'eq': 1 + 3 - 3 % 4 + -( 7 * 8 ), +} # end of dict comment + +hw = d['e'] +one = d['g'][0] + w += '!' + + +components = { + 'foo': ['foo.c'], + 'bar': ['bar.c'], + 'baz': ['baz.c'], # this line is indented with a tab! +} + +# compute a configuration based on system dependencies, custom logic +conf = configuration_data() +conf.set('USE_FOO', 1) + +# Determine the sources to compile +sources_to_compile = [] +foreach name, sources : components + if conf.get('USE_@0@'.format(name.to_upper()), 0) == 1 + sources_to_compile += sources + endif +endforeach + + +items = ['a', 'continue', 'b', 'break', 'c'] +result = [] +foreach i : items + if i == 'continue' + continue + elif i == 'break' + break + endif + result += i +endforeach +# result is ['a', 'b'] + + + +if a and b + # do something +endif +if c or d + # do something +endif +if not e + # do something +endif +if not (f or g) + # do something +endif + +single_quote = 'contains a \' character' +string_escapes = '\\\'\a\b\f\n\r\t\v\046\x26\u2D4d\U00002d4d\N{GREEK CAPITAL LETTER DELTA}' +no_string_escapes = '''\\\'\a\b\f\n\r\t\v\046\x26\u2D4d\U00002d4d\N{GREEK CAPITAL LETTER DELTA}''' + +# FIXME: is it supposed to work? (cont_eol inside string) +# cont_string = 'blablabla\ +# blablabla' + +# cont_eol with whitespace and comments after +if a \ # comment in cont 1 + and b \ # comment in cont 2 + or c # comment in cont 3 + message('ok') +endif + +if a \ + or b + debug('help!') +endif + + +# End of file comment with no linebreak
\ No newline at end of file diff --git a/unittests/rewritetests.py b/unittests/rewritetests.py index ca30fe9..c338844 100644 --- a/unittests/rewritetests.py +++ b/unittests/rewritetests.py @@ -13,11 +13,15 @@ # limitations under the License. import subprocess +from itertools import zip_longest import json import os +from pathlib import Path import shutil import unittest +from mesonbuild.ast import IntrospectionInterpreter, AstIDGenerator +from mesonbuild.ast.printer import RawPrinter from mesonbuild.mesonlib import windows_proof_rmtree from .baseplatformtests import BasePlatformTests @@ -396,3 +400,21 @@ class RewriterTests(BasePlatformTests): # Check the written file out = self.rewrite(self.builddir, os.path.join(self.builddir, 'info.json')) self.assertDictEqual(out, expected) + + def test_raw_printer_is_idempotent(self): + test_path = Path(self.unit_test_dir, '118 rewrite') + meson_build_file = test_path / 'meson.build' + # original_contents = meson_build_file.read_bytes() + original_contents = meson_build_file.read_text(encoding='utf-8') + + interpreter = IntrospectionInterpreter(test_path, '', 'ninja', visitors = [AstIDGenerator()]) + interpreter.analyze() + + printer = RawPrinter() + interpreter.ast.accept(printer) + # new_contents = printer.result.encode('utf-8') + new_contents = printer.result + + # Do it line per line because it is easier to debug like that + for orig_line, new_line in zip_longest(original_contents.splitlines(), new_contents.splitlines()): + self.assertEqual(orig_line, new_line) |