aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--docs/markdown/Syntax.md20
-rw-r--r--mesonbuild/mparser.py34
-rw-r--r--test cases/common/190 escape and unicode/file.c.in5
-rw-r--r--test cases/common/190 escape and unicode/file.py10
-rw-r--r--test cases/common/190 escape and unicode/find.py9
-rw-r--r--test cases/common/190 escape and unicode/fun.c3
-rw-r--r--test cases/common/190 escape and unicode/main.c12
-rw-r--r--test cases/common/190 escape and unicode/meson.build25
-rw-r--r--test cases/common/33 try compile/meson.build4
-rw-r--r--test cases/common/39 tryrun/meson.build4
-rw-r--r--test cases/common/42 string operations/meson.build18
11 files changed, 125 insertions, 19 deletions
diff --git a/docs/markdown/Syntax.md b/docs/markdown/Syntax.md
index 1005100..01c8c6e 100644
--- a/docs/markdown/Syntax.md
+++ b/docs/markdown/Syntax.md
@@ -90,8 +90,24 @@ single quote do it like this:
single quote = 'contains a \' character'
```
-Similarly `\n` gets converted to a newline and `\\` to a single
-backslash.
+The full list of escape sequences is:
+
+* `\\` Backslash
+* `\'` Single quote
+* `\a` Bell
+* `\b` Backspace
+* `\f` Formfeed
+* `\n` Newline
+* `\r` Carriage Return
+* `\t` Horizontal Tab
+* `\v` Vertical Tab
+* `\ooo` Character with octal value ooo
+* `\xhh` Character with hex value hh
+* `\uxxxx` Character with 16-bit hex value xxxx
+* `\Uxxxxxxxx` Character with 32-bit hex value xxxxxxxx
+* `\N{name}` Character named name in Unicode database
+
+As in python and C, up to three octal digits are accepted in `\ooo`.
#### String concatenation
diff --git a/mesonbuild/mparser.py b/mesonbuild/mparser.py
index 0e7524c..bf7c271 100644
--- a/mesonbuild/mparser.py
+++ b/mesonbuild/mparser.py
@@ -13,9 +13,36 @@
# limitations under the License.
import re
+import codecs
from .mesonlib import MesonException
from . import mlog
+# This is the regex for the supported escape sequences of a regular string
+# literal, like 'abc\x00'
+ESCAPE_SEQUENCE_SINGLE_RE = re.compile(r'''
+ ( \\U........ # 8-digit hex escapes
+ | \\u.... # 4-digit hex escapes
+ | \\x.. # 2-digit hex escapes
+ | \\[0-7]{1,3} # Octal escapes
+ | \\N\{[^}]+\} # Unicode characters by name
+ | \\[\\'abfnrtv] # Single-character escapes
+ )''', re.UNICODE | re.VERBOSE)
+
+# This is the regex for the supported escape sequences of a multiline string
+# literal, like '''abc\x00'''. The only difference is that single quote (')
+# doesn't require escaping.
+ESCAPE_SEQUENCE_MULTI_RE = re.compile(r'''
+ ( \\U........ # 8-digit hex escapes
+ | \\u.... # 4-digit hex escapes
+ | \\x.. # 2-digit hex escapes
+ | \\[0-7]{1,3} # Octal escapes
+ | \\N\{[^}]+\} # Unicode characters by name
+ | \\[\\abfnrtv] # Single-character escapes
+ )''', re.UNICODE | re.VERBOSE)
+
+def decode_match(match):
+ return codecs.decode(match.group(0), 'unicode_escape')
+
class ParseException(MesonException):
def __init__(self, text, line, lineno, colno):
# Format as error message, followed by the line with the error, followed by a caret to show the error column.
@@ -112,7 +139,6 @@ class Lexer:
par_count = 0
bracket_count = 0
col = 0
- newline_rx = re.compile(r'(?<!\\)((?:\\\\)*)\\n')
while loc < len(self.code):
matched = False
value = None
@@ -145,12 +171,12 @@ class Lexer:
if match_text.find("\n") != -1:
mlog.warning("""Newline character in a string detected, use ''' (three single quotes) for multiline strings instead.
This will become a hard error in a future Meson release.""", self.getline(line_start), lineno, col)
- value = match_text[1:-1].replace(r"\'", "'")
- value = newline_rx.sub(r'\1\n', value)
- value = value.replace(r" \\ ".strip(), r" \ ".strip())
+ value = match_text[1:-1]
+ value = ESCAPE_SEQUENCE_SINGLE_RE.sub(decode_match, value)
elif tid == 'multiline_string':
tid = 'string'
value = match_text[3:-3]
+ value = ESCAPE_SEQUENCE_MULTI_RE.sub(decode_match, value)
lines = match_text.split('\n')
if len(lines) > 1:
lineno += len(lines) - 1
diff --git a/test cases/common/190 escape and unicode/file.c.in b/test cases/common/190 escape and unicode/file.c.in
new file mode 100644
index 0000000..413ed42
--- /dev/null
+++ b/test cases/common/190 escape and unicode/file.c.in
@@ -0,0 +1,5 @@
+#include<stdio.h>
+const char* does_it_work() {
+ printf("{NAME}\n");
+ return "yes it does";
+}
diff --git a/test cases/common/190 escape and unicode/file.py b/test cases/common/190 escape and unicode/file.py
new file mode 100644
index 0000000..af67a09
--- /dev/null
+++ b/test cases/common/190 escape and unicode/file.py
@@ -0,0 +1,10 @@
+#!/usr/bin/env python3
+
+import sys
+import os
+
+with open(sys.argv[1]) as fh:
+ content = fh.read().replace("{NAME}", sys.argv[2])
+
+with open(os.path.join(sys.argv[3]), 'w') as fh:
+ fh.write(content)
diff --git a/test cases/common/190 escape and unicode/find.py b/test cases/common/190 escape and unicode/find.py
new file mode 100644
index 0000000..34a3eb8
--- /dev/null
+++ b/test cases/common/190 escape and unicode/find.py
@@ -0,0 +1,9 @@
+#!/usr/bin/env python3
+
+import os
+import sys
+
+for fh in os.listdir('.'):
+ if os.path.isfile(fh):
+ if fh.endswith('.c'):
+ sys.stdout.write(fh + '\0')
diff --git a/test cases/common/190 escape and unicode/fun.c b/test cases/common/190 escape and unicode/fun.c
new file mode 100644
index 0000000..8eeb8ea
--- /dev/null
+++ b/test cases/common/190 escape and unicode/fun.c
@@ -0,0 +1,3 @@
+int a_fun() {
+ return 1;
+}
diff --git a/test cases/common/190 escape and unicode/main.c b/test cases/common/190 escape and unicode/main.c
new file mode 100644
index 0000000..0bcde16
--- /dev/null
+++ b/test cases/common/190 escape and unicode/main.c
@@ -0,0 +1,12 @@
+#include <string.h>
+
+const char* does_it_work();
+
+int a_fun();
+
+int main() {
+ if(strcmp(does_it_work(), "yes it does") != 0) {
+ return -a_fun();
+ }
+ return 0;
+}
diff --git a/test cases/common/190 escape and unicode/meson.build b/test cases/common/190 escape and unicode/meson.build
new file mode 100644
index 0000000..be11073
--- /dev/null
+++ b/test cases/common/190 escape and unicode/meson.build
@@ -0,0 +1,25 @@
+project('180 escape', 'c')
+
+gen = generator(find_program('file.py'), arguments:['@INPUT@', 'erd\u0151', '@OUTPUT@'], output: '@BASENAME@')
+
+gen_file = gen.process('file.c.in')
+
+find_file_list = run_command(find_program('find.py'))
+assert(find_file_list.returncode() == 0, 'Didn\'t find any files.')
+
+# Strings should support both octal \ooo and hex \xhh encodings
+
+found_files_oct = []
+foreach l : find_file_list.stdout().strip('\0').split('\000')
+ found_files_oct += [files(l)]
+endforeach
+
+test('first', executable('first', found_files_oct + [gen_file]))
+
+found_files_hex = []
+foreach l : find_file_list.stdout().strip('\x00').split('\x00')
+ found_files_hex += [files(l)]
+endforeach
+
+test('second', executable('second', found_files_hex + [gen_file]))
+
diff --git a/test cases/common/33 try compile/meson.build b/test cases/common/33 try compile/meson.build
index 09ca395..cb1037d 100644
--- a/test cases/common/33 try compile/meson.build
+++ b/test cases/common/33 try compile/meson.build
@@ -1,11 +1,11 @@
project('try compile', 'c', 'cpp')
code = '''#include<stdio.h>
-void func() { printf("Something.\n"); }
+void func() { printf("Something.\\n"); }
'''
breakcode = '''#include<nonexisting.h>
-void func() { printf("This won't work.\n"); }
+void func() { printf("This won't work.\\n"); }
'''
foreach compiler : [meson.get_compiler('c'), meson.get_compiler('cpp')]
diff --git a/test cases/common/39 tryrun/meson.build b/test cases/common/39 tryrun/meson.build
index c64446f..daf5be7 100644
--- a/test cases/common/39 tryrun/meson.build
+++ b/test cases/common/39 tryrun/meson.build
@@ -13,8 +13,8 @@ endif
ok_code = '''#include<stdio.h>
int main(int argc, char **argv) {
- printf("%s\n", "stdout");
- fprintf(stderr, "%s\n", "stderr");
+ printf("%s\\n", "stdout");
+ fprintf(stderr, "%s\\n", "stderr");
return 0;
}
'''
diff --git a/test cases/common/42 string operations/meson.build b/test cases/common/42 string operations/meson.build
index a43de70..1c289eb 100644
--- a/test cases/common/42 string operations/meson.build
+++ b/test cases/common/42 string operations/meson.build
@@ -77,21 +77,21 @@ assert('"1.1.20"'.strip('"') == '1.1.20', '" badly stripped')
assert('"1.1.20"'.strip('".') == '1.1.20', '". badly stripped')
assert('"1.1.20" '.strip('" ') == '1.1.20', '". badly stripped')
-bs_b = '''\b'''
-bs_bs_b = '''\\b'''
+bs_c = '''\c'''
+bs_bs_c = '''\\\c'''
nl = '''
'''
-bs_n = '''\n'''
+bs_n = '''\\n'''
bs_nl = '''\
'''
-bs_bs_n = '''\\n'''
-bs_bs_nl = '''\\
+bs_bs_n = '''\\\\n'''
+bs_bs_nl = '''\\\\
'''
-assert('\b' == bs_b, 'Single backslash broken')
-assert('\\b' == bs_b, 'Double backslash broken')
-assert('\\\b' == bs_bs_b, 'Three backslash broken')
-assert('\\\\b' == bs_bs_b, 'Four backslash broken')
+assert('\c' == bs_c, 'Single backslash broken')
+assert('\\c' == bs_c, 'Double backslash broken')
+assert('\\\c' == bs_bs_c, 'Three backslash broken')
+assert('\\\\c' == bs_bs_c, 'Four backslash broken')
assert('\n' == nl, 'Newline escape broken')
assert('\\n' == bs_n, 'Double backslash broken before n')
assert('\\\n' == bs_nl, 'Three backslash broken before n')