aboutsummaryrefslogtreecommitdiff
path: root/mesonbuild
diff options
context:
space:
mode:
authorCharles Brunet <charles.brunet@optelgroup.com>2023-08-25 10:08:24 -0400
committerCharles Brunet <charles.brunet@optelgroup.com>2023-09-11 07:51:19 -0400
commit11ef2a536c6c9fec0b048da74b36a0231ef2199a (patch)
tree07706ffdaec1ea32e9b3c76a544f80eb51229dae /mesonbuild
parent5b29eff8ad02348ff0495cd307b29259567e11df (diff)
downloadmeson-11ef2a536c6c9fec0b048da74b36a0231ef2199a.zip
meson-11ef2a536c6c9fec0b048da74b36a0231ef2199a.tar.gz
meson-11ef2a536c6c9fec0b048da74b36a0231ef2199a.tar.bz2
parser: preserve whitespaces and comments
Diffstat (limited to 'mesonbuild')
-rw-r--r--mesonbuild/mparser.py67
1 files changed, 60 insertions, 7 deletions
diff --git a/mesonbuild/mparser.py b/mesonbuild/mparser.py
index 75a12aa..a161842 100644
--- a/mesonbuild/mparser.py
+++ b/mesonbuild/mparser.py
@@ -116,7 +116,7 @@ class Lexer:
self.keywords.update({'testcase', 'endtestcase'})
self.token_specification = [
# Need to be sorted longest to shortest.
- ('ignore', re.compile(r'[ \t]')),
+ ('whitespace', re.compile(r'[ \t]+')),
('multiline_fstring', re.compile(r"f'''(.|\n)*?'''", re.M)),
('fstring', re.compile(r"f'([^'\\]|(\\.))*'")),
('id', re.compile('[_a-zA-Z][_0-9a-zA-Z]*')),
@@ -178,9 +178,7 @@ class Lexer:
span_end = loc
bytespan = (span_start, span_end)
value = mo.group()
- if tid in {'ignore', 'comment'}:
- break
- elif tid == 'lparen':
+ if tid == 'lparen':
par_count += 1
elif tid == 'rparen':
par_count -= 1
@@ -210,12 +208,12 @@ class Lexer:
elif tid == 'eol_cont':
lineno += 1
line_start = loc
- break
+ tid = 'whitespace'
elif tid == 'eol':
lineno += 1
line_start = loc
if par_count > 0 or bracket_count > 0 or curl_count > 0:
- break
+ tid = 'whitespace'
elif tid == 'id':
if value in self.keywords:
tid = value
@@ -235,6 +233,7 @@ class BaseNode:
filename: str = field(hash=False)
end_lineno: int = field(hash=False)
end_colno: int = field(hash=False)
+ whitespaces: T.Optional[WhitespaceNode] = field(hash=False)
def __init__(self, lineno: int, colno: int, filename: str,
end_lineno: T.Optional[int] = None, end_colno: T.Optional[int] = None) -> None:
@@ -257,6 +256,26 @@ class BaseNode:
if callable(func):
func(self)
+ def append_whitespaces(self, token: Token) -> None:
+ if self.whitespaces is None:
+ self.whitespaces = WhitespaceNode(token)
+ else:
+ self.whitespaces.append(token)
+
+
+@dataclass(unsafe_hash=True)
+class WhitespaceNode(BaseNode):
+
+ value: str
+
+ def __init__(self, token: Token[str]):
+ super().__init__(token.lineno, token.colno, token.filename)
+ self.value = ''
+ self.append(token)
+
+ def append(self, token: Token[str]) -> None:
+ self.value += token.value
+
@dataclass(unsafe_hash=True)
class ElementaryNode(T.Generic[TV_TokenTypes], BaseNode):
@@ -456,6 +475,7 @@ class UMinusNode(UnaryOperatorNode):
@dataclass(unsafe_hash=True)
class CodeBlockNode(BaseNode):
+ pre_whitespaces: T.Optional[WhitespaceNode] = field(hash=False)
lines: T.List[BaseNode] = field(hash=False)
def __init__(self, token: Token[TV_TokenTypes]):
@@ -463,6 +483,14 @@ class CodeBlockNode(BaseNode):
self.pre_whitespaces = None
self.lines = []
+ def append_whitespaces(self, token: Token) -> None:
+ if self.lines:
+ self.lines[-1].append_whitespaces(token)
+ elif self.pre_whitespaces is None:
+ self.pre_whitespaces = WhitespaceNode(token)
+ else:
+ self.pre_whitespaces.append(token)
+
@dataclass(unsafe_hash=True)
class IndexNode(BaseNode):
@@ -669,12 +697,16 @@ class Parser:
self.stream = self.lexer.lex(filename)
self.current: Token = Token('eof', '', 0, 0, 0, (0, 0), None)
self.previous = self.current
+ self.current_ws: T.List[Token] = []
self.getsym()
self.in_ternary = False
def create_node(self, node_type: T.Type[BaseNodeT], *args: T.Any, **kwargs: T.Any) -> BaseNodeT:
node = node_type(*args, **kwargs)
+ for ws_token in self.current_ws:
+ node.append_whitespaces(ws_token)
+ self.current_ws = []
return node
def getsym(self) -> None:
@@ -682,6 +714,12 @@ class Parser:
try:
self.current = next(self.stream)
+ while self.current.tid in {'eol', 'comment', 'whitespace'}:
+ self.current_ws.append(self.current)
+ if self.current.tid == 'eol':
+ break
+ self.current = next(self.stream)
+
except StopIteration:
self.current = Token('eof', '', self.current.line_start, self.current.lineno, self.current.colno + self.current.bytespan[1] - self.current.bytespan[0], (0, 0), None)
@@ -782,11 +820,17 @@ class Parser:
operator = self.create_node(SymbolNode, self.previous)
return self.create_node(ComparisonNode, operator_type, left, operator, self.e5())
if self.accept('not'):
+ ws = self.current_ws.copy()
not_token = self.previous
if self.accept('in'):
in_token = self.previous
+ self.current_ws = self.current_ws[len(ws):] # remove whitespaces between not and in
+ temp_node = EmptyNode(in_token.lineno, in_token.colno, in_token.filename)
+ for w in ws:
+ temp_node.append_whitespaces(w)
+
not_token.bytespan = (not_token.bytespan[0], in_token.bytespan[1])
- not_token.value += in_token.value
+ not_token.value += temp_node.whitespaces.value + in_token.value
operator = self.create_node(SymbolNode, not_token)
return self.create_node(ComparisonNode, 'notin', left, operator, self.e5())
return left
@@ -1054,6 +1098,10 @@ class Parser:
try:
while cond:
+ for ws_token in self.current_ws:
+ block.append_whitespaces(ws_token)
+ self.current_ws = []
+
curline = self.line()
if not isinstance(curline, EmptyNode):
@@ -1065,4 +1113,9 @@ class Parser:
e.ast = block
raise
+ # Remaining whitespaces will not be catched since there are no more nodes
+ for ws_token in self.current_ws:
+ block.append_whitespaces(ws_token)
+ self.current_ws = []
+
return block