aboutsummaryrefslogtreecommitdiff
path: root/mesonbuild/mparser.py
diff options
context:
space:
mode:
authorTim 'mithro' Ansell <me@mith.ro>2018-03-03 15:00:55 -0800
committerNirbheek Chauhan <nirbheek.chauhan@gmail.com>2018-04-17 09:55:34 +0000
commit36aab0f4b2a05a1ae4c8603e57b4c3c684fe8ea8 (patch)
tree314bc7cc0306163447aa53022f19943812d40683 /mesonbuild/mparser.py
parent6089631a1b9cd5b9b4e75598721b53b9abc33950 (diff)
downloadmeson-36aab0f4b2a05a1ae4c8603e57b4c3c684fe8ea8.zip
meson-36aab0f4b2a05a1ae4c8603e57b4c3c684fe8ea8.tar.gz
meson-36aab0f4b2a05a1ae4c8603e57b4c3c684fe8ea8.tar.bz2
Complete python escape sequences aware strings
Fixes #3169
Diffstat (limited to 'mesonbuild/mparser.py')
-rw-r--r--mesonbuild/mparser.py34
1 files changed, 30 insertions, 4 deletions
diff --git a/mesonbuild/mparser.py b/mesonbuild/mparser.py
index 0e7524c..bf7c271 100644
--- a/mesonbuild/mparser.py
+++ b/mesonbuild/mparser.py
@@ -13,9 +13,36 @@
# limitations under the License.
import re
+import codecs
from .mesonlib import MesonException
from . import mlog
+# This is the regex for the supported escape sequences of a regular string
+# literal, like 'abc\x00'
+ESCAPE_SEQUENCE_SINGLE_RE = re.compile(r'''
+ ( \\U........ # 8-digit hex escapes
+ | \\u.... # 4-digit hex escapes
+ | \\x.. # 2-digit hex escapes
+ | \\[0-7]{1,3} # Octal escapes
+ | \\N\{[^}]+\} # Unicode characters by name
+ | \\[\\'abfnrtv] # Single-character escapes
+ )''', re.UNICODE | re.VERBOSE)
+
+# This is the regex for the supported escape sequences of a multiline string
+# literal, like '''abc\x00'''. The only difference is that single quote (')
+# doesn't require escaping.
+ESCAPE_SEQUENCE_MULTI_RE = re.compile(r'''
+ ( \\U........ # 8-digit hex escapes
+ | \\u.... # 4-digit hex escapes
+ | \\x.. # 2-digit hex escapes
+ | \\[0-7]{1,3} # Octal escapes
+ | \\N\{[^}]+\} # Unicode characters by name
+ | \\[\\abfnrtv] # Single-character escapes
+ )''', re.UNICODE | re.VERBOSE)
+
+def decode_match(match):
+ return codecs.decode(match.group(0), 'unicode_escape')
+
class ParseException(MesonException):
def __init__(self, text, line, lineno, colno):
# Format as error message, followed by the line with the error, followed by a caret to show the error column.
@@ -112,7 +139,6 @@ class Lexer:
par_count = 0
bracket_count = 0
col = 0
- newline_rx = re.compile(r'(?<!\\)((?:\\\\)*)\\n')
while loc < len(self.code):
matched = False
value = None
@@ -145,12 +171,12 @@ class Lexer:
if match_text.find("\n") != -1:
mlog.warning("""Newline character in a string detected, use ''' (three single quotes) for multiline strings instead.
This will become a hard error in a future Meson release.""", self.getline(line_start), lineno, col)
- value = match_text[1:-1].replace(r"\'", "'")
- value = newline_rx.sub(r'\1\n', value)
- value = value.replace(r" \\ ".strip(), r" \ ".strip())
+ value = match_text[1:-1]
+ value = ESCAPE_SEQUENCE_SINGLE_RE.sub(decode_match, value)
elif tid == 'multiline_string':
tid = 'string'
value = match_text[3:-3]
+ value = ESCAPE_SEQUENCE_MULTI_RE.sub(decode_match, value)
lines = match_text.split('\n')
if len(lines) > 1:
lineno += len(lines) - 1