diff options
-rw-r--r-- | docs/markdown/Syntax.md | 20 | ||||
-rw-r--r-- | docs/markdown/snippets/more-escape-sequences.md | 17 | ||||
-rw-r--r-- | mesonbuild/backend/ninjabackend.py | 6 | ||||
-rw-r--r-- | mesonbuild/backend/vs2010backend.py | 6 | ||||
-rw-r--r-- | mesonbuild/compilers/c.py | 7 | ||||
-rw-r--r-- | mesonbuild/mparser.py | 48 | ||||
-rw-r--r-- | test cases/common/112 has arg/meson.build | 8 | ||||
-rw-r--r-- | test cases/common/190 escape and unicode/file.c.in | 5 | ||||
-rw-r--r-- | test cases/common/190 escape and unicode/file.py | 10 | ||||
-rw-r--r-- | test cases/common/190 escape and unicode/find.py | 9 | ||||
-rw-r--r-- | test cases/common/190 escape and unicode/fun.c | 3 | ||||
-rw-r--r-- | test cases/common/190 escape and unicode/main.c | 12 | ||||
-rw-r--r-- | test cases/common/190 escape and unicode/meson.build | 24 | ||||
-rw-r--r-- | test cases/common/33 try compile/meson.build | 4 | ||||
-rw-r--r-- | test cases/common/39 tryrun/meson.build | 4 | ||||
-rw-r--r-- | test cases/common/42 string operations/meson.build | 18 | ||||
-rw-r--r-- | test cases/failing/72 invalid escape char/meson.build | 4 |
17 files changed, 178 insertions, 27 deletions
diff --git a/docs/markdown/Syntax.md b/docs/markdown/Syntax.md index 1005100..01c8c6e 100644 --- a/docs/markdown/Syntax.md +++ b/docs/markdown/Syntax.md @@ -90,8 +90,24 @@ single quote do it like this: single quote = 'contains a \' character' ``` -Similarly `\n` gets converted to a newline and `\\` to a single -backslash. +The full list of escape sequences is: + +* `\\` Backslash +* `\'` Single quote +* `\a` Bell +* `\b` Backspace +* `\f` Formfeed +* `\n` Newline +* `\r` Carriage Return +* `\t` Horizontal Tab +* `\v` Vertical Tab +* `\ooo` Character with octal value ooo +* `\xhh` Character with hex value hh +* `\uxxxx` Character with 16-bit hex value xxxx +* `\Uxxxxxxxx` Character with 32-bit hex value xxxxxxxx +* `\N{name}` Character named name in Unicode database + +As in python and C, up to three octal digits are accepted in `\ooo`. #### String concatenation diff --git a/docs/markdown/snippets/more-escape-sequences.md b/docs/markdown/snippets/more-escape-sequences.md new file mode 100644 index 0000000..2894079 --- /dev/null +++ b/docs/markdown/snippets/more-escape-sequences.md @@ -0,0 +1,17 @@ +## String escape character update + +The strings (both single-quoted and triple-quoted) in meson has been taught the +same set of escape sequences as in Python. It is therefore now possible to use +arbitrary bytes in strings, like for example NUL (`\0`) and other ASCII control +characters. See the chapter about *Strings* in *Syntax* for more details. + +Potential backwards compatibility issue: Any valid escape sequence according to +the new rules will be interpreted as an escape sequence instead of the literal +characters. Previously only single-quote strings supported escape sequences and +the supported sequences were `\'`, `\\` and `\n`. + +The most likely breakage is usage of backslash-n in triple-quoted strings. It +is now written in the same way as in single-quoted strings: `\\n` instead of +`\n`. In general it is now recommended to escape any usage of backslash. +However, backslash-c (`\c`), for example, is still backslash-c because it isn't +a valid escape sequence. diff --git a/mesonbuild/backend/ninjabackend.py b/mesonbuild/backend/ninjabackend.py index 7bdd2d1..bc3a8ef 100644 --- a/mesonbuild/backend/ninjabackend.py +++ b/mesonbuild/backend/ninjabackend.py @@ -169,7 +169,7 @@ class NinjaBackend(backends.Backend): break else: # None of our compilers are MSVC, we're done. - return open(tempfilename, 'a') + return open(tempfilename, 'a', encoding='utf-8') filename = os.path.join(self.environment.get_scratch_dir(), 'incdetect.c') with open(filename, 'w') as f: @@ -196,7 +196,7 @@ int dummy; if match: with open(tempfilename, 'ab') as binfile: binfile.write(b'msvc_deps_prefix = ' + match.group(1) + b'\n') - return open(tempfilename, 'a') + return open(tempfilename, 'a', encoding='utf-8') raise MesonException('Could not determine vs dep dependency prefix string.') def generate(self, interp): @@ -206,7 +206,7 @@ int dummy; raise MesonException('Could not detect Ninja v1.5 or newer') outfilename = os.path.join(self.environment.get_build_dir(), self.ninja_filename) tempfilename = outfilename + '~' - with open(tempfilename, 'w') as outfile: + with open(tempfilename, 'w', encoding='utf-8') as outfile: outfile.write('# This is the build file for project "%s"\n' % self.build.get_project()) outfile.write('# It is autogenerated by the Meson build system.\n') diff --git a/mesonbuild/backend/vs2010backend.py b/mesonbuild/backend/vs2010backend.py index 4959e6e..22383dc 100644 --- a/mesonbuild/backend/vs2010backend.py +++ b/mesonbuild/backend/vs2010backend.py @@ -227,7 +227,7 @@ class Vs2010Backend(backends.Backend): def generate_solution(self, sln_filename, projlist): default_projlist = self.get_build_by_default_targets() - with open(sln_filename, 'w') as ofile: + with open(sln_filename, 'w', encoding='utf-8') as ofile: ofile.write('Microsoft Visual Studio Solution File, Format ' 'Version 11.00\n') ofile.write('# Visual Studio ' + self.vs_version + '\n') @@ -575,7 +575,7 @@ class Vs2010Backend(backends.Backend): tree.write(ofname, encoding='utf-8', xml_declaration=True) # ElementTree can not do prettyprinting so do it manually doc = xml.dom.minidom.parse(ofname) - with open(ofname, 'w') as of: + with open(ofname, 'w', encoding='utf-8') as of: of.write(doc.toprettyxml()) def gen_vcxproj(self, target, ofname, guid): @@ -1128,7 +1128,7 @@ if %%errorlevel%% neq 0 goto :VCEnd''' igroup = ET.SubElement(root, 'ItemGroup') rulefile = os.path.join(self.environment.get_scratch_dir(), 'regen.rule') if not os.path.exists(rulefile): - with open(rulefile, 'w') as f: + with open(rulefile, 'w', encoding='utf-8') as f: f.write("# Meson regen file.") custombuild = ET.SubElement(igroup, 'CustomBuild', Include=rulefile) message = ET.SubElement(custombuild, 'Message') diff --git a/mesonbuild/compilers/c.py b/mesonbuild/compilers/c.py index 279e435..0e474e7 100644 --- a/mesonbuild/compilers/c.py +++ b/mesonbuild/compilers/c.py @@ -842,7 +842,12 @@ class CCompiler(Compiler): return ['-pthread'] def has_multi_arguments(self, args, env): - for arg in args: + for arg in args[:]: + # some compilers, e.g. GCC, don't warn for unsupported warning-disable + # flags, so when we are testing a flag like "-Wno-forgotten-towel", also + # check the equivalent enable flag too "-Wforgotten-towel" + if arg.startswith('-Wno-'): + args.append('-W' + arg[5:]) if arg.startswith('-Wl,'): mlog.warning('''{} looks like a linker argument, but has_argument and other similar methods only support checking compiler arguments. diff --git a/mesonbuild/mparser.py b/mesonbuild/mparser.py index 0e7524c..9e43065 100644 --- a/mesonbuild/mparser.py +++ b/mesonbuild/mparser.py @@ -13,9 +13,44 @@ # limitations under the License. import re +import codecs from .mesonlib import MesonException from . import mlog +# This is the regex for the supported escape sequences of a regular string +# literal, like 'abc\x00' +ESCAPE_SEQUENCE_SINGLE_RE = re.compile(r''' + ( \\U........ # 8-digit hex escapes + | \\u.... # 4-digit hex escapes + | \\x.. # 2-digit hex escapes + | \\[0-7]{1,3} # Octal escapes + | \\N\{[^}]+\} # Unicode characters by name + | \\[\\'abfnrtv] # Single-character escapes + )''', re.UNICODE | re.VERBOSE) + +# This is the regex for the supported escape sequences of a multiline string +# literal, like '''abc\x00'''. The only difference is that single quote (') +# doesn't require escaping. +ESCAPE_SEQUENCE_MULTI_RE = re.compile(r''' + ( \\U........ # 8-digit hex escapes + | \\u.... # 4-digit hex escapes + | \\x.. # 2-digit hex escapes + | \\[0-7]{1,3} # Octal escapes + | \\N\{[^}]+\} # Unicode characters by name + | \\[\\abfnrtv] # Single-character escapes + )''', re.UNICODE | re.VERBOSE) + +class MesonUnicodeDecodeError(MesonException): + def __init__(self, match): + super().__init__("%s" % match) + self.match = match + +def decode_match(match): + try: + return codecs.decode(match.group(0), 'unicode_escape') + except UnicodeDecodeError as err: + raise MesonUnicodeDecodeError(match.group(0)) + class ParseException(MesonException): def __init__(self, text, line, lineno, colno): # Format as error message, followed by the line with the error, followed by a caret to show the error column. @@ -112,7 +147,6 @@ class Lexer: par_count = 0 bracket_count = 0 col = 0 - newline_rx = re.compile(r'(?<!\\)((?:\\\\)*)\\n') while loc < len(self.code): matched = False value = None @@ -145,12 +179,18 @@ class Lexer: if match_text.find("\n") != -1: mlog.warning("""Newline character in a string detected, use ''' (three single quotes) for multiline strings instead. This will become a hard error in a future Meson release.""", self.getline(line_start), lineno, col) - value = match_text[1:-1].replace(r"\'", "'") - value = newline_rx.sub(r'\1\n', value) - value = value.replace(r" \\ ".strip(), r" \ ".strip()) + value = match_text[1:-1] + try: + value = ESCAPE_SEQUENCE_SINGLE_RE.sub(decode_match, value) + except MesonUnicodeDecodeError as err: + raise MesonException("Failed to parse escape sequence: '{}' in string:\n {}".format(err.match, match_text)) elif tid == 'multiline_string': tid = 'string' value = match_text[3:-3] + try: + value = ESCAPE_SEQUENCE_MULTI_RE.sub(decode_match, value) + except MesonUnicodeDecodeError as err: + raise MesonException("Failed to parse escape sequence: '{}' in string:\n{}".format(err.match, match_text)) lines = match_text.split('\n') if len(lines) > 1: lineno += len(lines) - 1 diff --git a/test cases/common/112 has arg/meson.build b/test cases/common/112 has arg/meson.build index 27290a1..ba07311 100644 --- a/test cases/common/112 has arg/meson.build +++ b/test cases/common/112 has arg/meson.build @@ -39,11 +39,17 @@ assert(l2.length() == 0, 'First supported did not return empty array.') if cc.get_id() == 'gcc' pre_arg = '-Wformat' - anti_pre_arg = '-Wno-format' + # NOTE: We have special handling for -Wno-foo args because gcc silently + # ignores unknown -Wno-foo args unless you pass -Werror, so for this test, we + # pass it as two separate arguments. + anti_pre_arg = ['-W', 'no-format'] arg = '-Werror=format-security' assert(not cc.has_multi_arguments([anti_pre_arg, arg]), 'Arg that should be broken is not.') assert(cc.has_multi_arguments(pre_arg), 'Arg that should have worked does not work.') assert(cc.has_multi_arguments([pre_arg, arg]), 'Arg that should have worked does not work.') + # Test that gcc correctly errors out on unknown -Wno flags + assert(not cc.has_argument('-Wno-lol-meson-test-flags'), 'should error out on unknown -Wno args') + assert(not cc.has_multi_arguments(['-Wno-pragmas', '-Wno-lol-meson-test-flags']), 'should error out even if some -Wno args are valid') endif if cc.get_id() == 'clang' and cc.version().version_compare('<=4.0.0') diff --git a/test cases/common/190 escape and unicode/file.c.in b/test cases/common/190 escape and unicode/file.c.in new file mode 100644 index 0000000..413ed42 --- /dev/null +++ b/test cases/common/190 escape and unicode/file.c.in @@ -0,0 +1,5 @@ +#include<stdio.h> +const char* does_it_work() { + printf("{NAME}\n"); + return "yes it does"; +} diff --git a/test cases/common/190 escape and unicode/file.py b/test cases/common/190 escape and unicode/file.py new file mode 100644 index 0000000..af67a09 --- /dev/null +++ b/test cases/common/190 escape and unicode/file.py @@ -0,0 +1,10 @@ +#!/usr/bin/env python3 + +import sys +import os + +with open(sys.argv[1]) as fh: + content = fh.read().replace("{NAME}", sys.argv[2]) + +with open(os.path.join(sys.argv[3]), 'w') as fh: + fh.write(content) diff --git a/test cases/common/190 escape and unicode/find.py b/test cases/common/190 escape and unicode/find.py new file mode 100644 index 0000000..34a3eb8 --- /dev/null +++ b/test cases/common/190 escape and unicode/find.py @@ -0,0 +1,9 @@ +#!/usr/bin/env python3 + +import os +import sys + +for fh in os.listdir('.'): + if os.path.isfile(fh): + if fh.endswith('.c'): + sys.stdout.write(fh + '\0') diff --git a/test cases/common/190 escape and unicode/fun.c b/test cases/common/190 escape and unicode/fun.c new file mode 100644 index 0000000..8eeb8ea --- /dev/null +++ b/test cases/common/190 escape and unicode/fun.c @@ -0,0 +1,3 @@ +int a_fun() { + return 1; +} diff --git a/test cases/common/190 escape and unicode/main.c b/test cases/common/190 escape and unicode/main.c new file mode 100644 index 0000000..0bcde16 --- /dev/null +++ b/test cases/common/190 escape and unicode/main.c @@ -0,0 +1,12 @@ +#include <string.h> + +const char* does_it_work(); + +int a_fun(); + +int main() { + if(strcmp(does_it_work(), "yes it does") != 0) { + return -a_fun(); + } + return 0; +} diff --git a/test cases/common/190 escape and unicode/meson.build b/test cases/common/190 escape and unicode/meson.build new file mode 100644 index 0000000..65377b6 --- /dev/null +++ b/test cases/common/190 escape and unicode/meson.build @@ -0,0 +1,24 @@ +project('180 escape', 'c') + +gen = generator(find_program('file.py'), arguments:['@INPUT@', 'erd\u0151', '@OUTPUT@'], output: '@BASENAME@') + +gen_file = gen.process('file.c.in') + +find_file_list = run_command(find_program('find.py')) +assert(find_file_list.returncode() == 0, 'Didn\'t find any files.') + +# Strings should support both octal \ooo and hex \xhh encodings + +found_files_oct = [] +foreach l : find_file_list.stdout().strip('\0').split('\000') + found_files_oct += [files(l)] +endforeach + +test('first', executable('first', found_files_oct + [gen_file])) + +found_files_hex = [] +foreach l : find_file_list.stdout().strip('\x00').split('\x00') + found_files_hex += [files(l)] +endforeach + +test('second', executable('second', found_files_hex + [gen_file])) diff --git a/test cases/common/33 try compile/meson.build b/test cases/common/33 try compile/meson.build index 09ca395..cb1037d 100644 --- a/test cases/common/33 try compile/meson.build +++ b/test cases/common/33 try compile/meson.build @@ -1,11 +1,11 @@ project('try compile', 'c', 'cpp') code = '''#include<stdio.h> -void func() { printf("Something.\n"); } +void func() { printf("Something.\\n"); } ''' breakcode = '''#include<nonexisting.h> -void func() { printf("This won't work.\n"); } +void func() { printf("This won't work.\\n"); } ''' foreach compiler : [meson.get_compiler('c'), meson.get_compiler('cpp')] diff --git a/test cases/common/39 tryrun/meson.build b/test cases/common/39 tryrun/meson.build index c64446f..daf5be7 100644 --- a/test cases/common/39 tryrun/meson.build +++ b/test cases/common/39 tryrun/meson.build @@ -13,8 +13,8 @@ endif ok_code = '''#include<stdio.h> int main(int argc, char **argv) { - printf("%s\n", "stdout"); - fprintf(stderr, "%s\n", "stderr"); + printf("%s\\n", "stdout"); + fprintf(stderr, "%s\\n", "stderr"); return 0; } ''' diff --git a/test cases/common/42 string operations/meson.build b/test cases/common/42 string operations/meson.build index a43de70..1c289eb 100644 --- a/test cases/common/42 string operations/meson.build +++ b/test cases/common/42 string operations/meson.build @@ -77,21 +77,21 @@ assert('"1.1.20"'.strip('"') == '1.1.20', '" badly stripped') assert('"1.1.20"'.strip('".') == '1.1.20', '". badly stripped') assert('"1.1.20" '.strip('" ') == '1.1.20', '". badly stripped') -bs_b = '''\b''' -bs_bs_b = '''\\b''' +bs_c = '''\c''' +bs_bs_c = '''\\\c''' nl = ''' ''' -bs_n = '''\n''' +bs_n = '''\\n''' bs_nl = '''\ ''' -bs_bs_n = '''\\n''' -bs_bs_nl = '''\\ +bs_bs_n = '''\\\\n''' +bs_bs_nl = '''\\\\ ''' -assert('\b' == bs_b, 'Single backslash broken') -assert('\\b' == bs_b, 'Double backslash broken') -assert('\\\b' == bs_bs_b, 'Three backslash broken') -assert('\\\\b' == bs_bs_b, 'Four backslash broken') +assert('\c' == bs_c, 'Single backslash broken') +assert('\\c' == bs_c, 'Double backslash broken') +assert('\\\c' == bs_bs_c, 'Three backslash broken') +assert('\\\\c' == bs_bs_c, 'Four backslash broken') assert('\n' == nl, 'Newline escape broken') assert('\\n' == bs_n, 'Double backslash broken before n') assert('\\\n' == bs_nl, 'Three backslash broken before n') diff --git a/test cases/failing/72 invalid escape char/meson.build b/test cases/failing/72 invalid escape char/meson.build new file mode 100644 index 0000000..b4e9196 --- /dev/null +++ b/test cases/failing/72 invalid escape char/meson.build @@ -0,0 +1,4 @@ +# Make sure meson exits on invalid string +# The string below contains an invalid unicode code point + +'my name is what \uxyzo who are you' |