aboutsummaryrefslogtreecommitdiff
path: root/mesonbuild/mparser.py
diff options
context:
space:
mode:
Diffstat (limited to 'mesonbuild/mparser.py')
-rw-r--r--mesonbuild/mparser.py34
1 files changed, 30 insertions, 4 deletions
diff --git a/mesonbuild/mparser.py b/mesonbuild/mparser.py
index 0e7524c..bf7c271 100644
--- a/mesonbuild/mparser.py
+++ b/mesonbuild/mparser.py
@@ -13,9 +13,36 @@
# limitations under the License.
import re
+import codecs
from .mesonlib import MesonException
from . import mlog
+# This is the regex for the supported escape sequences of a regular string
+# literal, like 'abc\x00'
+ESCAPE_SEQUENCE_SINGLE_RE = re.compile(r'''
+ ( \\U........ # 8-digit hex escapes
+ | \\u.... # 4-digit hex escapes
+ | \\x.. # 2-digit hex escapes
+ | \\[0-7]{1,3} # Octal escapes
+ | \\N\{[^}]+\} # Unicode characters by name
+ | \\[\\'abfnrtv] # Single-character escapes
+ )''', re.UNICODE | re.VERBOSE)
+
+# This is the regex for the supported escape sequences of a multiline string
+# literal, like '''abc\x00'''. The only difference is that single quote (')
+# doesn't require escaping.
+ESCAPE_SEQUENCE_MULTI_RE = re.compile(r'''
+ ( \\U........ # 8-digit hex escapes
+ | \\u.... # 4-digit hex escapes
+ | \\x.. # 2-digit hex escapes
+ | \\[0-7]{1,3} # Octal escapes
+ | \\N\{[^}]+\} # Unicode characters by name
+ | \\[\\abfnrtv] # Single-character escapes
+ )''', re.UNICODE | re.VERBOSE)
+
+def decode_match(match):
+ return codecs.decode(match.group(0), 'unicode_escape')
+
class ParseException(MesonException):
def __init__(self, text, line, lineno, colno):
# Format as error message, followed by the line with the error, followed by a caret to show the error column.
@@ -112,7 +139,6 @@ class Lexer:
par_count = 0
bracket_count = 0
col = 0
- newline_rx = re.compile(r'(?<!\\)((?:\\\\)*)\\n')
while loc < len(self.code):
matched = False
value = None
@@ -145,12 +171,12 @@ class Lexer:
if match_text.find("\n") != -1:
mlog.warning("""Newline character in a string detected, use ''' (three single quotes) for multiline strings instead.
This will become a hard error in a future Meson release.""", self.getline(line_start), lineno, col)
- value = match_text[1:-1].replace(r"\'", "'")
- value = newline_rx.sub(r'\1\n', value)
- value = value.replace(r" \\ ".strip(), r" \ ".strip())
+ value = match_text[1:-1]
+ value = ESCAPE_SEQUENCE_SINGLE_RE.sub(decode_match, value)
elif tid == 'multiline_string':
tid = 'string'
value = match_text[3:-3]
+ value = ESCAPE_SEQUENCE_MULTI_RE.sub(decode_match, value)
lines = match_text.split('\n')
if len(lines) > 1:
lineno += len(lines) - 1