aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJussi Pakkanen <jpakkane@gmail.com>2014-03-12 19:40:39 +0200
committerJussi Pakkanen <jpakkane@gmail.com>2014-03-12 19:40:39 +0200
commitfc42ae04507d6723d0331a9f9b71d3328ec56ce4 (patch)
treeaff0151269b6c23edc64cf3fb77b10f5856fc936
parentc7865cd98f0a420845cb50cf77e1f0c8f266475f (diff)
downloadmeson-fc42ae04507d6723d0331a9f9b71d3328ec56ce4.zip
meson-fc42ae04507d6723d0331a9f9b71d3328ec56ce4.tar.gz
meson-fc42ae04507d6723d0331a9f9b71d3328ec56ce4.tar.bz2
Some experiments with a self-written parser.
-rwxr-xr-xparsertest.py106
1 files changed, 106 insertions, 0 deletions
diff --git a/parsertest.py b/parsertest.py
new file mode 100755
index 0000000..704c188
--- /dev/null
+++ b/parsertest.py
@@ -0,0 +1,106 @@
+#!/usr/bin/python3
+
+# Copyright 2014 Jussi Pakkanen
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+# http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import re
+import sys
+
+class ParseException(Exception):
+ def __init__(self, lineno, colno):
+ super().__init__()
+ self.lineno = lineno
+ self.colno = colno
+
+class Token:
+ def __init__(self, id, lineno, colno):
+ self.id = id
+ self.lineno = lineno
+ self.colno = colno
+
+class Lexer:
+ def __init__(self):
+ self.keywords = {'true', 'false', 'if', 'else', 'elif',
+ 'endif', 'and', 'or', 'not'}
+ self.token_specification = [
+ # Need to be sorted longest to shortest.
+ ('ignore', re.compile(r'[ \t]')),
+ ('id', re.compile('[_a-zA-Z][_0-9a-zA-Z]*')),
+ ('number', re.compile(r'\d+')),
+ ('eol_cont', re.compile(r'\\\n')),
+ ('eol', re.compile(r'\n')),
+ ('multiline_string', re.compile(r"'''(.|\n)*?'''", re.M)),
+ ('comment', re.compile(r'\#.*')),
+ ('lparen', re.compile(r'\(')),
+ ('rparen', re.compile(r'\)')),
+ ('lbracket', re.compile(r'\[')),
+ ('lbracket', re.compile(r'\]')),
+ ('string', re.compile("'[^']*?'")),
+ ('comma', re.compile(r',')),
+ ('dot', re.compile(r'\.')),
+ ('semicolon', re.compile(r':')),
+ ('assign', re.compile(r'==')),
+ ('equal', re.compile(r'=')),
+ ('nequals', re.compile(r'\!=')),
+ ]
+
+ def lex(self, code):
+ lineno = 1
+ line_start = 0
+ loc = 0;
+ par_count = 0
+ bracket_count = 0
+ col = 0
+ while(loc < len(code)):
+ matched = False
+ for (tid, reg) in self.token_specification:
+ mo = reg.match(code, loc)
+ if mo:
+ curline = lineno
+ col = mo.start()-line_start
+ matched = True
+ loc = mo.end()
+ match_text = mo.group()
+ if tid == 'ignore':
+ break
+ elif tid == 'lparen':
+ par_count += 1
+ elif tid == 'rparen':
+ par_count -= 1
+ elif tid == 'lbracket':
+ bracket_count += 1
+ elif tid == 'rbracket':
+ bracket_count -= 1
+ elif tid == 'multiline_string':
+ lines = match_text.split('\n')
+ if len(lines) > 1:
+ lineno += len(lines) - 1
+ line_start = mo.end() - len(lines[-1])
+ elif tid == 'eol' or tid == 'eol_cont':
+ lineno += 1
+ line_start = loc
+ if par_count > 0 or bracket_count > 0:
+ break
+ yield Token(tid, curline, col)
+ if not matched:
+ raise ParseException(lineno, col)
+
+if __name__ == '__main__':
+ code = open(sys.argv[1]).read()
+ lex = Lexer()
+ try:
+ for i in lex.lex(code):
+ print('Token:', i.id, 'Line:', i.lineno, 'Column:', i.colno)
+ except ParseException as e:
+ print('Error line', e.lineno, 'column', e.colno) \ No newline at end of file