diff options
-rw-r--r-- | docs/markdown/Syntax.md | 20 | ||||
-rw-r--r-- | mesonbuild/mparser.py | 34 | ||||
-rw-r--r-- | test cases/common/190 escape and unicode/file.c.in | 5 | ||||
-rw-r--r-- | test cases/common/190 escape and unicode/file.py | 10 | ||||
-rw-r--r-- | test cases/common/190 escape and unicode/find.py | 9 | ||||
-rw-r--r-- | test cases/common/190 escape and unicode/fun.c | 3 | ||||
-rw-r--r-- | test cases/common/190 escape and unicode/main.c | 12 | ||||
-rw-r--r-- | test cases/common/190 escape and unicode/meson.build | 25 | ||||
-rw-r--r-- | test cases/common/33 try compile/meson.build | 4 | ||||
-rw-r--r-- | test cases/common/39 tryrun/meson.build | 4 | ||||
-rw-r--r-- | test cases/common/42 string operations/meson.build | 18 |
11 files changed, 125 insertions, 19 deletions
diff --git a/docs/markdown/Syntax.md b/docs/markdown/Syntax.md index 1005100..01c8c6e 100644 --- a/docs/markdown/Syntax.md +++ b/docs/markdown/Syntax.md @@ -90,8 +90,24 @@ single quote do it like this: single quote = 'contains a \' character' ``` -Similarly `\n` gets converted to a newline and `\\` to a single -backslash. +The full list of escape sequences is: + +* `\\` Backslash +* `\'` Single quote +* `\a` Bell +* `\b` Backspace +* `\f` Formfeed +* `\n` Newline +* `\r` Carriage Return +* `\t` Horizontal Tab +* `\v` Vertical Tab +* `\ooo` Character with octal value ooo +* `\xhh` Character with hex value hh +* `\uxxxx` Character with 16-bit hex value xxxx +* `\Uxxxxxxxx` Character with 32-bit hex value xxxxxxxx +* `\N{name}` Character named name in Unicode database + +As in python and C, up to three octal digits are accepted in `\ooo`. #### String concatenation diff --git a/mesonbuild/mparser.py b/mesonbuild/mparser.py index 0e7524c..bf7c271 100644 --- a/mesonbuild/mparser.py +++ b/mesonbuild/mparser.py @@ -13,9 +13,36 @@ # limitations under the License. import re +import codecs from .mesonlib import MesonException from . import mlog +# This is the regex for the supported escape sequences of a regular string +# literal, like 'abc\x00' +ESCAPE_SEQUENCE_SINGLE_RE = re.compile(r''' + ( \\U........ # 8-digit hex escapes + | \\u.... # 4-digit hex escapes + | \\x.. # 2-digit hex escapes + | \\[0-7]{1,3} # Octal escapes + | \\N\{[^}]+\} # Unicode characters by name + | \\[\\'abfnrtv] # Single-character escapes + )''', re.UNICODE | re.VERBOSE) + +# This is the regex for the supported escape sequences of a multiline string +# literal, like '''abc\x00'''. The only difference is that single quote (') +# doesn't require escaping. +ESCAPE_SEQUENCE_MULTI_RE = re.compile(r''' + ( \\U........ # 8-digit hex escapes + | \\u.... # 4-digit hex escapes + | \\x.. # 2-digit hex escapes + | \\[0-7]{1,3} # Octal escapes + | \\N\{[^}]+\} # Unicode characters by name + | \\[\\abfnrtv] # Single-character escapes + )''', re.UNICODE | re.VERBOSE) + +def decode_match(match): + return codecs.decode(match.group(0), 'unicode_escape') + class ParseException(MesonException): def __init__(self, text, line, lineno, colno): # Format as error message, followed by the line with the error, followed by a caret to show the error column. @@ -112,7 +139,6 @@ class Lexer: par_count = 0 bracket_count = 0 col = 0 - newline_rx = re.compile(r'(?<!\\)((?:\\\\)*)\\n') while loc < len(self.code): matched = False value = None @@ -145,12 +171,12 @@ class Lexer: if match_text.find("\n") != -1: mlog.warning("""Newline character in a string detected, use ''' (three single quotes) for multiline strings instead. This will become a hard error in a future Meson release.""", self.getline(line_start), lineno, col) - value = match_text[1:-1].replace(r"\'", "'") - value = newline_rx.sub(r'\1\n', value) - value = value.replace(r" \\ ".strip(), r" \ ".strip()) + value = match_text[1:-1] + value = ESCAPE_SEQUENCE_SINGLE_RE.sub(decode_match, value) elif tid == 'multiline_string': tid = 'string' value = match_text[3:-3] + value = ESCAPE_SEQUENCE_MULTI_RE.sub(decode_match, value) lines = match_text.split('\n') if len(lines) > 1: lineno += len(lines) - 1 diff --git a/test cases/common/190 escape and unicode/file.c.in b/test cases/common/190 escape and unicode/file.c.in new file mode 100644 index 0000000..413ed42 --- /dev/null +++ b/test cases/common/190 escape and unicode/file.c.in @@ -0,0 +1,5 @@ +#include<stdio.h> +const char* does_it_work() { + printf("{NAME}\n"); + return "yes it does"; +} diff --git a/test cases/common/190 escape and unicode/file.py b/test cases/common/190 escape and unicode/file.py new file mode 100644 index 0000000..af67a09 --- /dev/null +++ b/test cases/common/190 escape and unicode/file.py @@ -0,0 +1,10 @@ +#!/usr/bin/env python3 + +import sys +import os + +with open(sys.argv[1]) as fh: + content = fh.read().replace("{NAME}", sys.argv[2]) + +with open(os.path.join(sys.argv[3]), 'w') as fh: + fh.write(content) diff --git a/test cases/common/190 escape and unicode/find.py b/test cases/common/190 escape and unicode/find.py new file mode 100644 index 0000000..34a3eb8 --- /dev/null +++ b/test cases/common/190 escape and unicode/find.py @@ -0,0 +1,9 @@ +#!/usr/bin/env python3 + +import os +import sys + +for fh in os.listdir('.'): + if os.path.isfile(fh): + if fh.endswith('.c'): + sys.stdout.write(fh + '\0') diff --git a/test cases/common/190 escape and unicode/fun.c b/test cases/common/190 escape and unicode/fun.c new file mode 100644 index 0000000..8eeb8ea --- /dev/null +++ b/test cases/common/190 escape and unicode/fun.c @@ -0,0 +1,3 @@ +int a_fun() { + return 1; +} diff --git a/test cases/common/190 escape and unicode/main.c b/test cases/common/190 escape and unicode/main.c new file mode 100644 index 0000000..0bcde16 --- /dev/null +++ b/test cases/common/190 escape and unicode/main.c @@ -0,0 +1,12 @@ +#include <string.h> + +const char* does_it_work(); + +int a_fun(); + +int main() { + if(strcmp(does_it_work(), "yes it does") != 0) { + return -a_fun(); + } + return 0; +} diff --git a/test cases/common/190 escape and unicode/meson.build b/test cases/common/190 escape and unicode/meson.build new file mode 100644 index 0000000..be11073 --- /dev/null +++ b/test cases/common/190 escape and unicode/meson.build @@ -0,0 +1,25 @@ +project('180 escape', 'c') + +gen = generator(find_program('file.py'), arguments:['@INPUT@', 'erd\u0151', '@OUTPUT@'], output: '@BASENAME@') + +gen_file = gen.process('file.c.in') + +find_file_list = run_command(find_program('find.py')) +assert(find_file_list.returncode() == 0, 'Didn\'t find any files.') + +# Strings should support both octal \ooo and hex \xhh encodings + +found_files_oct = [] +foreach l : find_file_list.stdout().strip('\0').split('\000') + found_files_oct += [files(l)] +endforeach + +test('first', executable('first', found_files_oct + [gen_file])) + +found_files_hex = [] +foreach l : find_file_list.stdout().strip('\x00').split('\x00') + found_files_hex += [files(l)] +endforeach + +test('second', executable('second', found_files_hex + [gen_file])) + diff --git a/test cases/common/33 try compile/meson.build b/test cases/common/33 try compile/meson.build index 09ca395..cb1037d 100644 --- a/test cases/common/33 try compile/meson.build +++ b/test cases/common/33 try compile/meson.build @@ -1,11 +1,11 @@ project('try compile', 'c', 'cpp') code = '''#include<stdio.h> -void func() { printf("Something.\n"); } +void func() { printf("Something.\\n"); } ''' breakcode = '''#include<nonexisting.h> -void func() { printf("This won't work.\n"); } +void func() { printf("This won't work.\\n"); } ''' foreach compiler : [meson.get_compiler('c'), meson.get_compiler('cpp')] diff --git a/test cases/common/39 tryrun/meson.build b/test cases/common/39 tryrun/meson.build index c64446f..daf5be7 100644 --- a/test cases/common/39 tryrun/meson.build +++ b/test cases/common/39 tryrun/meson.build @@ -13,8 +13,8 @@ endif ok_code = '''#include<stdio.h> int main(int argc, char **argv) { - printf("%s\n", "stdout"); - fprintf(stderr, "%s\n", "stderr"); + printf("%s\\n", "stdout"); + fprintf(stderr, "%s\\n", "stderr"); return 0; } ''' diff --git a/test cases/common/42 string operations/meson.build b/test cases/common/42 string operations/meson.build index a43de70..1c289eb 100644 --- a/test cases/common/42 string operations/meson.build +++ b/test cases/common/42 string operations/meson.build @@ -77,21 +77,21 @@ assert('"1.1.20"'.strip('"') == '1.1.20', '" badly stripped') assert('"1.1.20"'.strip('".') == '1.1.20', '". badly stripped') assert('"1.1.20" '.strip('" ') == '1.1.20', '". badly stripped') -bs_b = '''\b''' -bs_bs_b = '''\\b''' +bs_c = '''\c''' +bs_bs_c = '''\\\c''' nl = ''' ''' -bs_n = '''\n''' +bs_n = '''\\n''' bs_nl = '''\ ''' -bs_bs_n = '''\\n''' -bs_bs_nl = '''\\ +bs_bs_n = '''\\\\n''' +bs_bs_nl = '''\\\\ ''' -assert('\b' == bs_b, 'Single backslash broken') -assert('\\b' == bs_b, 'Double backslash broken') -assert('\\\b' == bs_bs_b, 'Three backslash broken') -assert('\\\\b' == bs_bs_b, 'Four backslash broken') +assert('\c' == bs_c, 'Single backslash broken') +assert('\\c' == bs_c, 'Double backslash broken') +assert('\\\c' == bs_bs_c, 'Three backslash broken') +assert('\\\\c' == bs_bs_c, 'Four backslash broken') assert('\n' == nl, 'Newline escape broken') assert('\\n' == bs_n, 'Double backslash broken before n') assert('\\\n' == bs_nl, 'Three backslash broken before n') |