# Copyright 2014-2017 The Meson development team # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # http://www.apache.org/licenses/LICENSE-2.0 # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from __future__ import annotations from dataclasses import dataclass, field import re import codecs import os import typing as T from .mesonlib import MesonException from . import mlog if T.TYPE_CHECKING: from typing_extensions import Literal from .ast import AstVisitor BaseNodeT = T.TypeVar('BaseNodeT', bound='BaseNode') # This is the regex for the supported escape sequences of a regular string # literal, like 'abc\x00' ESCAPE_SEQUENCE_SINGLE_RE = re.compile(r''' ( \\U[A-Fa-f0-9]{8} # 8-digit hex escapes | \\u[A-Fa-f0-9]{4} # 4-digit hex escapes | \\x[A-Fa-f0-9]{2} # 2-digit hex escapes | \\[0-7]{1,3} # Octal escapes | \\N\{[^}]+\} # Unicode characters by name | \\[\\'abfnrtv] # Single-character escapes )''', re.UNICODE | re.VERBOSE) def decode_match(match: T.Match[str]) -> str: return codecs.decode(match.group(0).encode(), 'unicode_escape') class ParseException(MesonException): ast: T.Optional[CodeBlockNode] = None def __init__(self, text: str, line: str, lineno: int, colno: int) -> None: # Format as error message, followed by the line with the error, followed by a caret to show the error column. super().__init__(mlog.code_line(text, line, colno)) self.lineno = lineno self.colno = colno class BlockParseException(ParseException): def __init__( self, text: str, line: str, lineno: int, colno: int, start_line: str, start_lineno: int, start_colno: int, ) -> None: # This can be formatted in two ways - one if the block start and end are on the same line, and a different way if they are on different lines. if lineno == start_lineno: # If block start and end are on the same line, it is formatted as: # Error message # Followed by the line with the error # Followed by a caret to show the block start # Followed by underscores # Followed by a caret to show the block end. MesonException.__init__(self, "{}\n{}\n{}".format(text, line, '{}^{}^'.format(' ' * start_colno, '_' * (colno - start_colno - 1)))) else: # If block start and end are on different lines, it is formatted as: # Error message # Followed by the line with the error # Followed by a caret to show the error column. # Followed by a message saying where the block started. # Followed by the line of the block start. # Followed by a caret for the block start. MesonException.__init__(self, "%s\n%s\n%s\nFor a block that started at %d,%d\n%s\n%s" % (text, line, '%s^' % (' ' * colno), start_lineno, start_colno, start_line, "%s^" % (' ' * start_colno))) self.lineno = lineno self.colno = colno TV_TokenTypes = T.TypeVar('TV_TokenTypes', int, str, bool) @dataclass(eq=False) class Token(T.Generic[TV_TokenTypes]): tid: str filename: str line_start: int lineno: int colno: int bytespan: T.Tuple[int, int] value: TV_TokenTypes def __eq__(self, other: object) -> bool: if isinstance(other, str): return self.tid == other elif isinstance(other, Token): return self.tid == other.tid return NotImplemented class Lexer: def __init__(self, code: str): self.code = code self.keywords = {'true', 'false', 'if', 'else', 'elif', 'endif', 'and', 'or', 'not', 'foreach', 'endforeach', 'in', 'continue', 'break'} self.future_keywords = {'return'} self.in_unit_test = 'MESON_RUNNING_IN_PROJECT_TESTS' in os.environ if self.in_unit_test: self.keywords.update({'testcase', 'endtestcase'}) self.token_specification = [ # Need to be sorted longest to shortest. ('whitespace', re.compile(r'[ \t]+')), ('multiline_fstring', re.compile(r"f'''(.|\n)*?'''", re.M)), ('fstring', re.compile(r"f'([^'\\]|(\\.))*'")), ('id', re.compile('[_a-zA-Z][_0-9a-zA-Z]*')), ('number', re.compile(r'0[bB][01]+|0[oO][0-7]+|0[xX][0-9a-fA-F]+|0|[1-9]\d*')), ('eol_cont', re.compile(r'\\[ \t]*(#.*)?\n')), ('eol', re.compile(r'\n')), ('multiline_string', re.compile(r"'''(.|\n)*?'''", re.M)), ('comment', re.compile(r'#.*')), ('lparen', re.compile(r'\(')), ('rparen', re.compile(r'\)')), ('lbracket', re.compile(r'\[')), ('rbracket', re.compile(r'\]')), ('lcurl', re.compile(r'\{')), ('rcurl', re.compile(r'\}')), ('dblquote', re.compile(r'"')), ('string', re.compile(r"'([^'\\]|(\\.))*'")), ('comma', re.compile(r',')), ('plusassign', re.compile(r'\+=')), ('dot', re.compile(r'\.')), ('plus', re.compile(r'\+')), ('dash', re.compile(r'-')), ('star', re.compile(r'\*')), ('percent', re.compile(r'%')), ('fslash', re.compile(r'/')), ('colon', re.compile(r':')), ('equal', re.compile(r'==')), ('nequal', re.compile(r'!=')), ('assign', re.compile(r'=')), ('le', re.compile(r'<=')), ('lt', re.compile(r'<')), ('ge', re.compile(r'>=')), ('gt', re.compile(r'>')), ('questionmark', re.compile(r'\?')), ] def getline(self, line_start: int) -> str: return self.code[line_start:self.code.find('\n', line_start)] def lex(self, filename: str) -> T.Generator[Token, None, None]: line_start = 0 lineno = 1 loc = 0 par_count = 0 bracket_count = 0 curl_count = 0 col = 0 while loc < len(self.code): matched = False value: str = '' for (tid, reg) in self.token_specification: mo = reg.match(self.code, loc) if mo: curline = lineno curline_start = line_start col = mo.start() - line_start matched = True span_start = loc loc = mo.end() span_end = loc bytespan = (span_start, span_end) value = mo.group() if tid == 'lparen': par_count += 1 elif tid == 'rparen': par_count -= 1 elif tid == 'lbracket': bracket_count += 1 elif tid == 'rbracket': bracket_count -= 1 elif tid == 'lcurl': curl_count += 1 elif tid == 'rcurl': curl_count -= 1 elif tid == 'dblquote': raise ParseException('Double quotes are not supported. Use single quotes.', self.getline(line_start), lineno, col) elif tid in {'string', 'fstring'}: if value.find("\n") != -1: msg = ("Newline character in a string detected, use ''' (three single quotes) " "for multiline strings instead.\n" "This will become a hard error in a future Meson release.") mlog.warning(mlog.code_line(msg, self.getline(line_start), col), location=BaseNode(lineno, col, filename)) value = value[2 if tid == 'fstring' else 1:-1] elif tid in {'multiline_string', 'multiline_fstring'}: value = value[4 if tid == 'multiline_fstring' else 3:-3] lines = value.split('\n') if len(lines) > 1: lineno += len(lines) - 1 line_start = mo.end() - len(lines[-1]) elif tid == 'eol_cont': lineno += 1 line_start = loc tid = 'whitespace' elif tid == 'eol': lineno += 1 line_start = loc if par_count > 0 or bracket_count > 0 or curl_count > 0: tid = 'whitespace' elif tid == 'id': if value in self.keywords: tid = value else: if value in self.future_keywords: mlog.warning(f"Identifier '{value}' will become a reserved keyword in a future release. Please rename it.", location=BaseNode(lineno, col, filename)) yield Token(tid, filename, curline_start, curline, col, bytespan, value) break if not matched: raise ParseException('lexer', self.getline(line_start), lineno, col) @dataclass class BaseNode: lineno: int colno: int filename: str = field(hash=False) end_lineno: int = field(hash=False) end_colno: int = field(hash=False) whitespaces: T.Optional[WhitespaceNode] = field(hash=False) def __init__(self, lineno: int, colno: int, filename: str, end_lineno: T.Optional[int] = None, end_colno: T.Optional[int] = None) -> None: self.lineno = lineno self.colno = colno self.filename = filename self.end_lineno = end_lineno if end_lineno is not None else lineno self.end_colno = end_colno if end_colno is not None else colno self.whitespaces = None # Attributes for the visitors self.level = 0 self.ast_id = '' self.condition_level = 0 def accept(self, visitor: 'AstVisitor') -> None: fname = 'visit_{}'.format(type(self).__name__) if hasattr(visitor, fname): func = getattr(visitor, fname) if callable(func): func(self) def append_whitespaces(self, token: Token) -> None: if self.whitespaces is None: self.whitespaces = WhitespaceNode(token) else: self.whitespaces.append(token) @dataclass(unsafe_hash=True) class WhitespaceNode(BaseNode): value: str def __init__(self, token: Token[str]): super().__init__(token.lineno, token.colno, token.filename) self.value = '' self.append(token) def append(self, token: Token[str]) -> None: self.value += token.value @dataclass(unsafe_hash=True) class ElementaryNode(T.Generic[TV_TokenTypes], BaseNode): value: TV_TokenTypes bytespan: T.Tuple[int, int] = field(hash=False) def __init__(self, token: Token[TV_TokenTypes]): super().__init__(token.lineno, token.colno, token.filename) self.value = token.value self.bytespan = token.bytespan class BooleanNode(ElementaryNode[bool]): pass class IdNode(ElementaryNode[str]): pass @dataclass(unsafe_hash=True) class NumberNode(ElementaryNode[int]): raw_value: str = field(hash=False) def __init__(self, token: Token[str]): BaseNode.__init__(self, token.lineno, token.colno, token.filename) self.raw_value = token.value self.value = int(token.value, base=0) self.bytespan = token.bytespan class BaseStringNode(ElementaryNode[str]): pass @dataclass(unsafe_hash=True) class StringNode(BaseStringNode): raw_value: str = field(hash=False) def __init__(self, token: Token[str], escape: bool = True): super().__init__(token) self.value = ESCAPE_SEQUENCE_SINGLE_RE.sub(decode_match, token.value) if escape else token.value self.raw_value = token.value class FormatStringNode(StringNode): pass @dataclass(unsafe_hash=True) class MultilineStringNode(BaseStringNode): def __init__(self, token: Token[str]): super().__init__(token) self.value = token.value class MultilineFormatStringNode(MultilineStringNode): pass class ContinueNode(ElementaryNode): pass class BreakNode(ElementaryNode): pass class SymbolNode(ElementaryNode[str]): pass @dataclass(unsafe_hash=True) class ArgumentNode(BaseNode): arguments: T.List[BaseNode] = field(hash=False) commas: T.List[SymbolNode] = field(hash=False) columns: T.List[SymbolNode] = field(hash=False) kwargs: T.Dict[BaseNode, BaseNode] = field(hash=False) def __init__(self, token: Token[TV_TokenTypes]): super().__init__(token.lineno, token.colno, token.filename) self.arguments = [] self.commas = [] self.columns = [] self.kwargs = {} self.order_error = False def prepend(self, statement: BaseNode) -> None: if self.num_kwargs() > 0: self.order_error = True if not isinstance(statement, EmptyNode): self.arguments = [statement] + self.arguments def append(self, statement: BaseNode) -> None: if self.num_kwargs() > 0: self.order_error = True if not isinstance(statement, EmptyNode): self.arguments += [statement] def set_kwarg(self, name: IdNode, value: BaseNode) -> None: if any((isinstance(x, IdNode) and name.value == x.value) for x in self.kwargs): mlog.warning(f'Keyword argument "{name.value}" defined multiple times.', location=self) mlog.warning('This will be an error in future Meson releases.') self.kwargs[name] = value def set_kwarg_no_check(self, name: BaseNode, value: BaseNode) -> None: self.kwargs[name] = value def num_args(self) -> int: return len(self.arguments) def num_kwargs(self) -> int: return len(self.kwargs) def incorrect_order(self) -> bool: return self.order_error def __len__(self) -> int: return self.num_args() # Fixme @dataclass(unsafe_hash=True) class ArrayNode(BaseNode): lbracket: SymbolNode args: ArgumentNode rbracket: SymbolNode def __init__(self, lbracket: SymbolNode, args: ArgumentNode, rbracket: SymbolNode): super().__init__(lbracket.lineno, lbracket.colno, args.filename, end_lineno=rbracket.lineno, end_colno=rbracket.colno+1) self.lbracket = lbracket self.args = args self.rbracket = rbracket @dataclass(unsafe_hash=True) class DictNode(BaseNode): lcurl: SymbolNode args: ArgumentNode rcurl: SymbolNode def __init__(self, lcurl: SymbolNode, args: ArgumentNode, rcurl: SymbolNode): super().__init__(lcurl.lineno, lcurl.colno, args.filename, end_lineno=rcurl.lineno, end_colno=rcurl.colno+1) self.lcurl = lcurl self.args = args self.rcurl = rcurl class EmptyNode(BaseNode): pass @dataclass(unsafe_hash=True) class BinaryOperatorNode(BaseNode): left: BaseNode operator: SymbolNode right: BaseNode def __init__(self, left: BaseNode, operator: SymbolNode, right: BaseNode): super().__init__(left.lineno, left.colno, left.filename) self.left = left self.operator = operator self.right = right class OrNode(BinaryOperatorNode): pass class AndNode(BinaryOperatorNode): pass @dataclass(unsafe_hash=True) class ComparisonNode(BinaryOperatorNode): ctype: COMPARISONS def __init__(self, ctype: COMPARISONS, left: BaseNode, operator: SymbolNode, right: BaseNode): super().__init__(left, operator, right) self.ctype = ctype @dataclass(unsafe_hash=True) class ArithmeticNode(BinaryOperatorNode): # TODO: use a Literal for operation operation: str def __init__(self, operation: str, left: BaseNode, operator: SymbolNode, right: BaseNode): super().__init__(left, operator, right) self.operation = operation @dataclass(unsafe_hash=True) class UnaryOperatorNode(BaseNode): operator: SymbolNode value: BaseNode def __init__(self, token: Token[TV_TokenTypes], operator: SymbolNode, value: BaseNode): super().__init__(token.lineno, token.colno, token.filename) self.operator = operator self.value = value class NotNode(UnaryOperatorNode): pass class UMinusNode(UnaryOperatorNode): pass @dataclass(unsafe_hash=True) class CodeBlockNode(BaseNode): pre_whitespaces: T.Optional[WhitespaceNode] = field(hash=False) lines: T.List[BaseNode] = field(hash=False) def __init__(self, token: Token[TV_TokenTypes]): super().__init__(token.lineno, token.colno, token.filename) self.pre_whitespaces = None self.lines = [] def append_whitespaces(self, token: Token) -> None: if self.lines: self.lines[-1].append_whitespaces(token) elif self.pre_whitespaces is None: self.pre_whitespaces = WhitespaceNode(token) else: self.pre_whitespaces.append(token) @dataclass(unsafe_hash=True) class IndexNode(BaseNode): iobject: BaseNode lbracket: SymbolNode index: BaseNode rbracket: SymbolNode def __init__(self, iobject: BaseNode, lbracket: SymbolNode, index: BaseNode, rbracket: SymbolNode): super().__init__(iobject.lineno, iobject.colno, iobject.filename) self.iobject = iobject self.lbracket = lbracket self.index = index self.rbracket = rbracket @dataclass(unsafe_hash=True) class MethodNode(BaseNode): source_object: BaseNode dot: SymbolNode name: IdNode lpar: SymbolNode args: ArgumentNode rpar: SymbolNode def __init__(self, source_object: BaseNode, dot: SymbolNode, name: IdNode, lpar: SymbolNode, args: ArgumentNode, rpar: SymbolNode): super().__init__(name.lineno, name.colno, name.filename, end_lineno=rpar.lineno, end_colno=rpar.colno+1) self.source_object = source_object self.dot = dot self.name = name self.lpar = lpar self.args = args self.rpar = rpar @dataclass(unsafe_hash=True) class FunctionNode(BaseNode): func_name: IdNode lpar: SymbolNode args: ArgumentNode rpar: SymbolNode def __init__(self, func_name: IdNode, lpar: SymbolNode, args: ArgumentNode, rpar: SymbolNode): super().__init__(func_name.lineno, func_name.colno, func_name.filename, end_lineno=rpar.end_lineno, end_colno=rpar.end_colno+1) self.func_name = func_name self.lpar = lpar self.args = args self.rpar = rpar @dataclass(unsafe_hash=True) class AssignmentNode(BaseNode): var_name: IdNode operator: SymbolNode value: BaseNode def __init__(self, var_name: IdNode, operator: SymbolNode, value: BaseNode): super().__init__(var_name.lineno, var_name.colno, var_name.filename) self.var_name = var_name self.operator = operator self.value = value class PlusAssignmentNode(AssignmentNode): pass @dataclass(unsafe_hash=True) class ForeachClauseNode(BaseNode): foreach_: SymbolNode = field(hash=False) varnames: T.List[IdNode] = field(hash=False) commas: T.List[SymbolNode] = field(hash=False) column: SymbolNode = field(hash=False) items: BaseNode block: CodeBlockNode endforeach: SymbolNode = field(hash=False) def __init__(self, foreach_: SymbolNode, varnames: T.List[IdNode], commas: T.List[SymbolNode], column: SymbolNode, items: BaseNode, block: CodeBlockNode, endforeach: SymbolNode): super().__init__(foreach_.lineno, foreach_.colno, foreach_.filename) self.foreach_ = foreach_ self.varnames = varnames self.commas = commas self.column = column self.items = items self.block = block self.endforeach = endforeach @dataclass(unsafe_hash=True) class IfNode(BaseNode): if_: SymbolNode condition: BaseNode block: CodeBlockNode def __init__(self, linenode: BaseNode, if_node: SymbolNode, condition: BaseNode, block: CodeBlockNode): super().__init__(linenode.lineno, linenode.colno, linenode.filename) self.if_ = if_node self.condition = condition self.block = block @dataclass(unsafe_hash=True) class ElseNode(BaseNode): else_: SymbolNode block: CodeBlockNode def __init__(self, else_: SymbolNode, block: CodeBlockNode): super().__init__(block.lineno, block.colno, block.filename) self.else_ = else_ self.block = block @dataclass(unsafe_hash=True) class IfClauseNode(BaseNode): ifs: T.List[IfNode] = field(hash=False) elseblock: T.Union[EmptyNode, ElseNode] endif: SymbolNode def __init__(self, linenode: BaseNode): super().__init__(linenode.lineno, linenode.colno, linenode.filename) self.ifs = [] self.elseblock = EmptyNode(linenode.lineno, linenode.colno, linenode.filename) self.endif = None @dataclass(unsafe_hash=True) class TestCaseClauseNode(BaseNode): testcase: SymbolNode condition: BaseNode block: CodeBlockNode endtestcase: SymbolNode def __init__(self, testcase: SymbolNode, condition: BaseNode, block: CodeBlockNode, endtestcase: SymbolNode): super().__init__(condition.lineno, condition.colno, condition.filename) self.testcase = testcase self.condition = condition self.block = block self.endtestcase = endtestcase @dataclass(unsafe_hash=True) class TernaryNode(BaseNode): condition: BaseNode questionmark: SymbolNode trueblock: BaseNode column: SymbolNode falseblock: BaseNode def __init__(self, condition: BaseNode, questionmark: SymbolNode, trueblock: BaseNode, column: SymbolNode, falseblock: BaseNode): super().__init__(condition.lineno, condition.colno, condition.filename) self.condition = condition self.questionmark = questionmark self.trueblock = trueblock self.column = column self.falseblock = falseblock @dataclass(unsafe_hash=True) class ParenthesizedNode(BaseNode): lpar: SymbolNode = field(hash=False) inner: BaseNode rpar: SymbolNode = field(hash=False) def __init__(self, lpar: SymbolNode, inner: BaseNode, rpar: SymbolNode): super().__init__(lpar.lineno, lpar.colno, inner.filename, end_lineno=rpar.lineno, end_colno=rpar.colno+1) self.lpar = lpar self.inner = inner self.rpar = rpar if T.TYPE_CHECKING: COMPARISONS = Literal['==', '!=', '<', '<=', '>=', '>', 'in', 'notin'] comparison_map: T.Mapping[str, COMPARISONS] = { 'equal': '==', 'nequal': '!=', 'lt': '<', 'le': '<=', 'gt': '>', 'ge': '>=', 'in': 'in', 'not in': 'notin', } # Recursive descent parser for Meson's definition language. # Very basic apart from the fact that we have many precedence # levels so there are not enough words to describe them all. # Enter numbering: # # 1 assignment # 2 or # 3 and # 4 comparison # 5 arithmetic # 6 negation # 7 funcall, method call # 8 parentheses # 9 plain token class Parser: def __init__(self, code: str, filename: str): self.lexer = Lexer(code) self.stream = self.lexer.lex(filename) self.current: Token = Token('eof', '', 0, 0, 0, (0, 0), None) self.previous = self.current self.current_ws: T.List[Token] = [] self.getsym() self.in_ternary = False def create_node(self, node_type: T.Type[BaseNodeT], *args: T.Any, **kwargs: T.Any) -> BaseNodeT: node = node_type(*args, **kwargs) for ws_token in self.current_ws: node.append_whitespaces(ws_token) self.current_ws = [] return node def getsym(self) -> None: self.previous = self.current try: self.current = next(self.stream) while self.current.tid in {'eol', 'comment', 'whitespace'}: self.current_ws.append(self.current) if self.current.tid == 'eol': break self.current = next(self.stream) except StopIteration: self.current = Token('eof', '', self.current.line_start, self.current.lineno, self.current.colno + self.current.bytespan[1] - self.current.bytespan[0], (0, 0), None) def getline(self) -> str: return self.lexer.getline(self.current.line_start) def accept(self, s: str) -> bool: if self.current.tid == s: self.getsym() return True return False def accept_any(self, tids: T.Tuple[str, ...]) -> str: tid = self.current.tid if tid in tids: self.getsym() return tid return '' def expect(self, s: str) -> bool: if self.accept(s): return True raise ParseException(f'Expecting {s} got {self.current.tid}.', self.getline(), self.current.lineno, self.current.colno) def block_expect(self, s: str, block_start: Token) -> bool: if self.accept(s): return True raise BlockParseException(f'Expecting {s} got {self.current.tid}.', self.getline(), self.current.lineno, self.current.colno, self.lexer.getline(block_start.line_start), block_start.lineno, block_start.colno) def parse(self) -> CodeBlockNode: block = self.codeblock() try: self.expect('eof') except ParseException as e: e.ast = block raise return block def statement(self) -> BaseNode: return self.e1() def e1(self) -> BaseNode: left = self.e2() if self.accept('plusassign'): operator = self.create_node(SymbolNode, self.previous) value = self.e1() if not isinstance(left, IdNode): raise ParseException('Plusassignment target must be an id.', self.getline(), left.lineno, left.colno) assert isinstance(left.value, str) return self.create_node(PlusAssignmentNode, left, operator, value) elif self.accept('assign'): operator = self.create_node(SymbolNode, self.previous) value = self.e1() if not isinstance(left, IdNode): raise ParseException('Assignment target must be an id.', self.getline(), left.lineno, left.colno) assert isinstance(left.value, str) return self.create_node(AssignmentNode, left, operator, value) elif self.accept('questionmark'): if self.in_ternary: raise ParseException('Nested ternary operators are not allowed.', self.getline(), left.lineno, left.colno) qm_node = self.create_node(SymbolNode, self.previous) self.in_ternary = True trueblock = self.e1() self.expect('colon') column_node = self.create_node(SymbolNode, self.previous) falseblock = self.e1() self.in_ternary = False return self.create_node(TernaryNode, left, qm_node, trueblock, column_node, falseblock) return left def e2(self) -> BaseNode: left = self.e3() while self.accept('or'): operator = self.create_node(SymbolNode, self.previous) if isinstance(left, EmptyNode): raise ParseException('Invalid or clause.', self.getline(), left.lineno, left.colno) left = self.create_node(OrNode, left, operator, self.e3()) return left def e3(self) -> BaseNode: left = self.e4() while self.accept('and'): operator = self.create_node(SymbolNode, self.previous) if isinstance(left, EmptyNode): raise ParseException('Invalid and clause.', self.getline(), left.lineno, left.colno) left = self.create_node(AndNode, left, operator, self.e4()) return left def e4(self) -> BaseNode: left = self.e5() for nodename, operator_type in comparison_map.items(): if self.accept(nodename): operator = self.create_node(SymbolNode, self.previous) return self.create_node(ComparisonNode, operator_type, left, operator, self.e5()) if self.accept('not'): ws = self.current_ws.copy() not_token = self.previous if self.accept('in'): in_token = self.previous self.current_ws = self.current_ws[len(ws):] # remove whitespaces between not and in temp_node = EmptyNode(in_token.lineno, in_token.colno, in_token.filename) for w in ws: temp_node.append_whitespaces(w) not_token.bytespan = (not_token.bytespan[0], in_token.bytespan[1]) not_token.value += temp_node.whitespaces.value + in_token.value operator = self.create_node(SymbolNode, not_token) return self.create_node(ComparisonNode, 'notin', left, operator, self.e5()) return left def e5(self) -> BaseNode: return self.e5addsub() def e5addsub(self) -> BaseNode: op_map = { 'plus': 'add', 'dash': 'sub', } left = self.e5muldiv() while True: op = self.accept_any(tuple(op_map.keys())) if op: operator = self.create_node(SymbolNode, self.previous) left = self.create_node(ArithmeticNode, op_map[op], left, operator, self.e5muldiv()) else: break return left def e5muldiv(self) -> BaseNode: op_map = { 'percent': 'mod', 'star': 'mul', 'fslash': 'div', } left = self.e6() while True: op = self.accept_any(tuple(op_map.keys())) if op: operator = self.create_node(SymbolNode, self.previous) left = self.create_node(ArithmeticNode, op_map[op], left, operator, self.e6()) else: break return left def e6(self) -> BaseNode: if self.accept('not'): operator = self.create_node(SymbolNode, self.previous) return self.create_node(NotNode, self.current, operator, self.e7()) if self.accept('dash'): operator = self.create_node(SymbolNode, self.previous) return self.create_node(UMinusNode, self.current, operator, self.e7()) return self.e7() def e7(self) -> BaseNode: left = self.e8() block_start = self.current if self.accept('lparen'): lpar = self.create_node(SymbolNode, block_start) args = self.args() self.block_expect('rparen', block_start) rpar = self.create_node(SymbolNode, self.previous) if not isinstance(left, IdNode): raise ParseException('Function call must be applied to plain id', self.getline(), left.lineno, left.colno) assert isinstance(left.value, str) left = self.create_node(FunctionNode, left, lpar, args, rpar) go_again = True while go_again: go_again = False if self.accept('dot'): go_again = True left = self.method_call(left) if self.accept('lbracket'): go_again = True left = self.index_call(left) return left def e8(self) -> BaseNode: block_start = self.current if self.accept('lparen'): lpar = self.create_node(SymbolNode, block_start) e = self.statement() self.block_expect('rparen', block_start) rpar = self.create_node(SymbolNode, self.previous) return ParenthesizedNode(lpar, e, rpar) elif self.accept('lbracket'): lbracket = self.create_node(SymbolNode, block_start) args = self.args() self.block_expect('rbracket', block_start) rbracket = self.create_node(SymbolNode, self.previous) return self.create_node(ArrayNode, lbracket, args, rbracket) elif self.accept('lcurl'): lcurl = self.create_node(SymbolNode, block_start) key_values = self.key_values() self.block_expect('rcurl', block_start) rcurl = self.create_node(SymbolNode, self.previous) return self.create_node(DictNode, lcurl, key_values, rcurl) else: return self.e9() def e9(self) -> BaseNode: t = self.current if self.accept('true'): t.value = True return self.create_node(BooleanNode, t) if self.accept('false'): t.value = False return self.create_node(BooleanNode, t) if self.accept('id'): return self.create_node(IdNode, t) if self.accept('number'): return self.create_node(NumberNode, t) if self.accept('string'): return self.create_node(StringNode, t) if self.accept('fstring'): return self.create_node(FormatStringNode, t) if self.accept('multiline_string'): return self.create_node(MultilineStringNode, t) if self.accept('multiline_fstring'): return self.create_node(MultilineFormatStringNode, t) return EmptyNode(self.current.lineno, self.current.colno, self.current.filename) def key_values(self) -> ArgumentNode: s = self.statement() a = self.create_node(ArgumentNode, self.current) while not isinstance(s, EmptyNode): if self.accept('colon'): a.columns.append(self.create_node(SymbolNode, self.previous)) a.set_kwarg_no_check(s, self.statement()) if not self.accept('comma'): return a a.commas.append(self.create_node(SymbolNode, self.previous)) else: raise ParseException('Only key:value pairs are valid in dict construction.', self.getline(), s.lineno, s.colno) s = self.statement() return a def args(self) -> ArgumentNode: s = self.statement() a = self.create_node(ArgumentNode, self.current) while not isinstance(s, EmptyNode): if self.accept('comma'): a.commas.append(self.create_node(SymbolNode, self.previous)) a.append(s) elif self.accept('colon'): a.columns.append(self.create_node(SymbolNode, self.previous)) if not isinstance(s, IdNode): raise ParseException('Dictionary key must be a plain identifier.', self.getline(), s.lineno, s.colno) a.set_kwarg(s, self.statement()) if not self.accept('comma'): return a a.commas.append(self.create_node(SymbolNode, self.previous)) else: a.append(s) return a s = self.statement() return a def method_call(self, source_object: BaseNode) -> MethodNode: dot = self.create_node(SymbolNode, self.previous) methodname = self.e9() if not isinstance(methodname, IdNode): if isinstance(source_object, NumberNode) and isinstance(methodname, NumberNode): raise ParseException('meson does not support float numbers', self.getline(), source_object.lineno, source_object.colno) raise ParseException('Method name must be plain id', self.getline(), self.current.lineno, self.current.colno) assert isinstance(methodname.value, str) self.expect('lparen') lpar = self.create_node(SymbolNode, self.previous) args = self.args() rpar = self.create_node(SymbolNode, self.current) self.expect('rparen') method = self.create_node(MethodNode, source_object, dot, methodname, lpar, args, rpar) if self.accept('dot'): return self.method_call(method) return method def index_call(self, source_object: BaseNode) -> IndexNode: lbracket = self.create_node(SymbolNode, self.previous) index_statement = self.statement() self.expect('rbracket') rbracket = self.create_node(SymbolNode, self.previous) return self.create_node(IndexNode, source_object, lbracket, index_statement, rbracket) def foreachblock(self) -> ForeachClauseNode: foreach_ = self.create_node(SymbolNode, self.previous) self.expect('id') assert isinstance(self.previous.value, str) varnames = [self.create_node(IdNode, self.previous)] commas = [] if self.accept('comma'): commas.append(self.create_node(SymbolNode, self.previous)) self.expect('id') assert isinstance(self.previous.value, str) varnames.append(self.create_node(IdNode, self.previous)) self.expect('colon') column = self.create_node(SymbolNode, self.previous) items = self.statement() block = self.codeblock() endforeach = self.create_node(SymbolNode, self.current) return self.create_node(ForeachClauseNode, foreach_, varnames, commas, column, items, block, endforeach) def ifblock(self) -> IfClauseNode: if_node = self.create_node(SymbolNode, self.previous) condition = self.statement() clause = self.create_node(IfClauseNode, condition) self.expect('eol') block = self.codeblock() clause.ifs.append(self.create_node(IfNode, clause, if_node, condition, block)) self.elseifblock(clause) clause.elseblock = self.elseblock() clause.endif = self.create_node(SymbolNode, self.current) return clause def elseifblock(self, clause: IfClauseNode) -> None: while self.accept('elif'): elif_ = self.create_node(SymbolNode, self.previous) s = self.statement() self.expect('eol') b = self.codeblock() clause.ifs.append(self.create_node(IfNode, s, elif_, s, b)) def elseblock(self) -> T.Union[ElseNode, EmptyNode]: if self.accept('else'): else_ = self.create_node(SymbolNode, self.previous) self.expect('eol') block = self.codeblock() return ElseNode(else_, block) return EmptyNode(self.current.lineno, self.current.colno, self.current.filename) def testcaseblock(self) -> TestCaseClauseNode: testcase = self.create_node(SymbolNode, self.previous) condition = self.statement() self.expect('eol') block = self.codeblock() endtestcase = SymbolNode(self.current) return self.create_node(TestCaseClauseNode, testcase, condition, block, endtestcase) def line(self) -> BaseNode: block_start = self.current if self.current == 'eol': return EmptyNode(self.current.lineno, self.current.colno, self.current.filename) if self.accept('if'): ifblock = self.ifblock() self.block_expect('endif', block_start) return ifblock if self.accept('foreach'): forblock = self.foreachblock() self.block_expect('endforeach', block_start) return forblock if self.accept('continue'): return self.create_node(ContinueNode, self.current) if self.accept('break'): return self.create_node(BreakNode, self.current) if self.lexer.in_unit_test and self.accept('testcase'): block = self.testcaseblock() self.block_expect('endtestcase', block_start) return block return self.statement() def codeblock(self) -> CodeBlockNode: block = self.create_node(CodeBlockNode, self.current) cond = True try: while cond: for ws_token in self.current_ws: block.append_whitespaces(ws_token) self.current_ws = [] curline = self.line() if not isinstance(curline, EmptyNode): block.lines.append(curline) cond = self.accept('eol') except ParseException as e: e.ast = block raise # Remaining whitespaces will not be catched since there are no more nodes for ws_token in self.current_ws: block.append_whitespaces(ws_token) self.current_ws = [] return block