diff options
author | Tim 'mithro' Ansell <me@mith.ro> | 2018-03-03 15:00:55 -0800 |
---|---|---|
committer | Nirbheek Chauhan <nirbheek.chauhan@gmail.com> | 2018-04-17 09:55:34 +0000 |
commit | 36aab0f4b2a05a1ae4c8603e57b4c3c684fe8ea8 (patch) | |
tree | 314bc7cc0306163447aa53022f19943812d40683 /mesonbuild/mparser.py | |
parent | 6089631a1b9cd5b9b4e75598721b53b9abc33950 (diff) | |
download | meson-36aab0f4b2a05a1ae4c8603e57b4c3c684fe8ea8.zip meson-36aab0f4b2a05a1ae4c8603e57b4c3c684fe8ea8.tar.gz meson-36aab0f4b2a05a1ae4c8603e57b4c3c684fe8ea8.tar.bz2 |
Complete python escape sequences aware strings
Fixes #3169
Diffstat (limited to 'mesonbuild/mparser.py')
-rw-r--r-- | mesonbuild/mparser.py | 34 |
1 files changed, 30 insertions, 4 deletions
diff --git a/mesonbuild/mparser.py b/mesonbuild/mparser.py index 0e7524c..bf7c271 100644 --- a/mesonbuild/mparser.py +++ b/mesonbuild/mparser.py @@ -13,9 +13,36 @@ # limitations under the License. import re +import codecs from .mesonlib import MesonException from . import mlog +# This is the regex for the supported escape sequences of a regular string +# literal, like 'abc\x00' +ESCAPE_SEQUENCE_SINGLE_RE = re.compile(r''' + ( \\U........ # 8-digit hex escapes + | \\u.... # 4-digit hex escapes + | \\x.. # 2-digit hex escapes + | \\[0-7]{1,3} # Octal escapes + | \\N\{[^}]+\} # Unicode characters by name + | \\[\\'abfnrtv] # Single-character escapes + )''', re.UNICODE | re.VERBOSE) + +# This is the regex for the supported escape sequences of a multiline string +# literal, like '''abc\x00'''. The only difference is that single quote (') +# doesn't require escaping. +ESCAPE_SEQUENCE_MULTI_RE = re.compile(r''' + ( \\U........ # 8-digit hex escapes + | \\u.... # 4-digit hex escapes + | \\x.. # 2-digit hex escapes + | \\[0-7]{1,3} # Octal escapes + | \\N\{[^}]+\} # Unicode characters by name + | \\[\\abfnrtv] # Single-character escapes + )''', re.UNICODE | re.VERBOSE) + +def decode_match(match): + return codecs.decode(match.group(0), 'unicode_escape') + class ParseException(MesonException): def __init__(self, text, line, lineno, colno): # Format as error message, followed by the line with the error, followed by a caret to show the error column. @@ -112,7 +139,6 @@ class Lexer: par_count = 0 bracket_count = 0 col = 0 - newline_rx = re.compile(r'(?<!\\)((?:\\\\)*)\\n') while loc < len(self.code): matched = False value = None @@ -145,12 +171,12 @@ class Lexer: if match_text.find("\n") != -1: mlog.warning("""Newline character in a string detected, use ''' (three single quotes) for multiline strings instead. This will become a hard error in a future Meson release.""", self.getline(line_start), lineno, col) - value = match_text[1:-1].replace(r"\'", "'") - value = newline_rx.sub(r'\1\n', value) - value = value.replace(r" \\ ".strip(), r" \ ".strip()) + value = match_text[1:-1] + value = ESCAPE_SEQUENCE_SINGLE_RE.sub(decode_match, value) elif tid == 'multiline_string': tid = 'string' value = match_text[3:-3] + value = ESCAPE_SEQUENCE_MULTI_RE.sub(decode_match, value) lines = match_text.split('\n') if len(lines) > 1: lineno += len(lines) - 1 |