aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--docs/markdown/Syntax.md20
-rw-r--r--docs/markdown/snippets/more-escape-sequences.md17
-rw-r--r--mesonbuild/backend/ninjabackend.py6
-rw-r--r--mesonbuild/backend/vs2010backend.py6
-rw-r--r--mesonbuild/compilers/c.py7
-rw-r--r--mesonbuild/mparser.py48
-rw-r--r--test cases/common/112 has arg/meson.build8
-rw-r--r--test cases/common/190 escape and unicode/file.c.in5
-rw-r--r--test cases/common/190 escape and unicode/file.py10
-rw-r--r--test cases/common/190 escape and unicode/find.py9
-rw-r--r--test cases/common/190 escape and unicode/fun.c3
-rw-r--r--test cases/common/190 escape and unicode/main.c12
-rw-r--r--test cases/common/190 escape and unicode/meson.build24
-rw-r--r--test cases/common/33 try compile/meson.build4
-rw-r--r--test cases/common/39 tryrun/meson.build4
-rw-r--r--test cases/common/42 string operations/meson.build18
-rw-r--r--test cases/failing/72 invalid escape char/meson.build4
17 files changed, 178 insertions, 27 deletions
diff --git a/docs/markdown/Syntax.md b/docs/markdown/Syntax.md
index 1005100..01c8c6e 100644
--- a/docs/markdown/Syntax.md
+++ b/docs/markdown/Syntax.md
@@ -90,8 +90,24 @@ single quote do it like this:
single quote = 'contains a \' character'
```
-Similarly `\n` gets converted to a newline and `\\` to a single
-backslash.
+The full list of escape sequences is:
+
+* `\\` Backslash
+* `\'` Single quote
+* `\a` Bell
+* `\b` Backspace
+* `\f` Formfeed
+* `\n` Newline
+* `\r` Carriage Return
+* `\t` Horizontal Tab
+* `\v` Vertical Tab
+* `\ooo` Character with octal value ooo
+* `\xhh` Character with hex value hh
+* `\uxxxx` Character with 16-bit hex value xxxx
+* `\Uxxxxxxxx` Character with 32-bit hex value xxxxxxxx
+* `\N{name}` Character named name in Unicode database
+
+As in python and C, up to three octal digits are accepted in `\ooo`.
#### String concatenation
diff --git a/docs/markdown/snippets/more-escape-sequences.md b/docs/markdown/snippets/more-escape-sequences.md
new file mode 100644
index 0000000..2894079
--- /dev/null
+++ b/docs/markdown/snippets/more-escape-sequences.md
@@ -0,0 +1,17 @@
+## String escape character update
+
+The strings (both single-quoted and triple-quoted) in meson has been taught the
+same set of escape sequences as in Python. It is therefore now possible to use
+arbitrary bytes in strings, like for example NUL (`\0`) and other ASCII control
+characters. See the chapter about *Strings* in *Syntax* for more details.
+
+Potential backwards compatibility issue: Any valid escape sequence according to
+the new rules will be interpreted as an escape sequence instead of the literal
+characters. Previously only single-quote strings supported escape sequences and
+the supported sequences were `\'`, `\\` and `\n`.
+
+The most likely breakage is usage of backslash-n in triple-quoted strings. It
+is now written in the same way as in single-quoted strings: `\\n` instead of
+`\n`. In general it is now recommended to escape any usage of backslash.
+However, backslash-c (`\c`), for example, is still backslash-c because it isn't
+a valid escape sequence.
diff --git a/mesonbuild/backend/ninjabackend.py b/mesonbuild/backend/ninjabackend.py
index 7bdd2d1..bc3a8ef 100644
--- a/mesonbuild/backend/ninjabackend.py
+++ b/mesonbuild/backend/ninjabackend.py
@@ -169,7 +169,7 @@ class NinjaBackend(backends.Backend):
break
else:
# None of our compilers are MSVC, we're done.
- return open(tempfilename, 'a')
+ return open(tempfilename, 'a', encoding='utf-8')
filename = os.path.join(self.environment.get_scratch_dir(),
'incdetect.c')
with open(filename, 'w') as f:
@@ -196,7 +196,7 @@ int dummy;
if match:
with open(tempfilename, 'ab') as binfile:
binfile.write(b'msvc_deps_prefix = ' + match.group(1) + b'\n')
- return open(tempfilename, 'a')
+ return open(tempfilename, 'a', encoding='utf-8')
raise MesonException('Could not determine vs dep dependency prefix string.')
def generate(self, interp):
@@ -206,7 +206,7 @@ int dummy;
raise MesonException('Could not detect Ninja v1.5 or newer')
outfilename = os.path.join(self.environment.get_build_dir(), self.ninja_filename)
tempfilename = outfilename + '~'
- with open(tempfilename, 'w') as outfile:
+ with open(tempfilename, 'w', encoding='utf-8') as outfile:
outfile.write('# This is the build file for project "%s"\n' %
self.build.get_project())
outfile.write('# It is autogenerated by the Meson build system.\n')
diff --git a/mesonbuild/backend/vs2010backend.py b/mesonbuild/backend/vs2010backend.py
index 4959e6e..22383dc 100644
--- a/mesonbuild/backend/vs2010backend.py
+++ b/mesonbuild/backend/vs2010backend.py
@@ -227,7 +227,7 @@ class Vs2010Backend(backends.Backend):
def generate_solution(self, sln_filename, projlist):
default_projlist = self.get_build_by_default_targets()
- with open(sln_filename, 'w') as ofile:
+ with open(sln_filename, 'w', encoding='utf-8') as ofile:
ofile.write('Microsoft Visual Studio Solution File, Format '
'Version 11.00\n')
ofile.write('# Visual Studio ' + self.vs_version + '\n')
@@ -575,7 +575,7 @@ class Vs2010Backend(backends.Backend):
tree.write(ofname, encoding='utf-8', xml_declaration=True)
# ElementTree can not do prettyprinting so do it manually
doc = xml.dom.minidom.parse(ofname)
- with open(ofname, 'w') as of:
+ with open(ofname, 'w', encoding='utf-8') as of:
of.write(doc.toprettyxml())
def gen_vcxproj(self, target, ofname, guid):
@@ -1128,7 +1128,7 @@ if %%errorlevel%% neq 0 goto :VCEnd'''
igroup = ET.SubElement(root, 'ItemGroup')
rulefile = os.path.join(self.environment.get_scratch_dir(), 'regen.rule')
if not os.path.exists(rulefile):
- with open(rulefile, 'w') as f:
+ with open(rulefile, 'w', encoding='utf-8') as f:
f.write("# Meson regen file.")
custombuild = ET.SubElement(igroup, 'CustomBuild', Include=rulefile)
message = ET.SubElement(custombuild, 'Message')
diff --git a/mesonbuild/compilers/c.py b/mesonbuild/compilers/c.py
index 279e435..0e474e7 100644
--- a/mesonbuild/compilers/c.py
+++ b/mesonbuild/compilers/c.py
@@ -842,7 +842,12 @@ class CCompiler(Compiler):
return ['-pthread']
def has_multi_arguments(self, args, env):
- for arg in args:
+ for arg in args[:]:
+ # some compilers, e.g. GCC, don't warn for unsupported warning-disable
+ # flags, so when we are testing a flag like "-Wno-forgotten-towel", also
+ # check the equivalent enable flag too "-Wforgotten-towel"
+ if arg.startswith('-Wno-'):
+ args.append('-W' + arg[5:])
if arg.startswith('-Wl,'):
mlog.warning('''{} looks like a linker argument, but has_argument
and other similar methods only support checking compiler arguments.
diff --git a/mesonbuild/mparser.py b/mesonbuild/mparser.py
index 0e7524c..9e43065 100644
--- a/mesonbuild/mparser.py
+++ b/mesonbuild/mparser.py
@@ -13,9 +13,44 @@
# limitations under the License.
import re
+import codecs
from .mesonlib import MesonException
from . import mlog
+# This is the regex for the supported escape sequences of a regular string
+# literal, like 'abc\x00'
+ESCAPE_SEQUENCE_SINGLE_RE = re.compile(r'''
+ ( \\U........ # 8-digit hex escapes
+ | \\u.... # 4-digit hex escapes
+ | \\x.. # 2-digit hex escapes
+ | \\[0-7]{1,3} # Octal escapes
+ | \\N\{[^}]+\} # Unicode characters by name
+ | \\[\\'abfnrtv] # Single-character escapes
+ )''', re.UNICODE | re.VERBOSE)
+
+# This is the regex for the supported escape sequences of a multiline string
+# literal, like '''abc\x00'''. The only difference is that single quote (')
+# doesn't require escaping.
+ESCAPE_SEQUENCE_MULTI_RE = re.compile(r'''
+ ( \\U........ # 8-digit hex escapes
+ | \\u.... # 4-digit hex escapes
+ | \\x.. # 2-digit hex escapes
+ | \\[0-7]{1,3} # Octal escapes
+ | \\N\{[^}]+\} # Unicode characters by name
+ | \\[\\abfnrtv] # Single-character escapes
+ )''', re.UNICODE | re.VERBOSE)
+
+class MesonUnicodeDecodeError(MesonException):
+ def __init__(self, match):
+ super().__init__("%s" % match)
+ self.match = match
+
+def decode_match(match):
+ try:
+ return codecs.decode(match.group(0), 'unicode_escape')
+ except UnicodeDecodeError as err:
+ raise MesonUnicodeDecodeError(match.group(0))
+
class ParseException(MesonException):
def __init__(self, text, line, lineno, colno):
# Format as error message, followed by the line with the error, followed by a caret to show the error column.
@@ -112,7 +147,6 @@ class Lexer:
par_count = 0
bracket_count = 0
col = 0
- newline_rx = re.compile(r'(?<!\\)((?:\\\\)*)\\n')
while loc < len(self.code):
matched = False
value = None
@@ -145,12 +179,18 @@ class Lexer:
if match_text.find("\n") != -1:
mlog.warning("""Newline character in a string detected, use ''' (three single quotes) for multiline strings instead.
This will become a hard error in a future Meson release.""", self.getline(line_start), lineno, col)
- value = match_text[1:-1].replace(r"\'", "'")
- value = newline_rx.sub(r'\1\n', value)
- value = value.replace(r" \\ ".strip(), r" \ ".strip())
+ value = match_text[1:-1]
+ try:
+ value = ESCAPE_SEQUENCE_SINGLE_RE.sub(decode_match, value)
+ except MesonUnicodeDecodeError as err:
+ raise MesonException("Failed to parse escape sequence: '{}' in string:\n {}".format(err.match, match_text))
elif tid == 'multiline_string':
tid = 'string'
value = match_text[3:-3]
+ try:
+ value = ESCAPE_SEQUENCE_MULTI_RE.sub(decode_match, value)
+ except MesonUnicodeDecodeError as err:
+ raise MesonException("Failed to parse escape sequence: '{}' in string:\n{}".format(err.match, match_text))
lines = match_text.split('\n')
if len(lines) > 1:
lineno += len(lines) - 1
diff --git a/test cases/common/112 has arg/meson.build b/test cases/common/112 has arg/meson.build
index 27290a1..ba07311 100644
--- a/test cases/common/112 has arg/meson.build
+++ b/test cases/common/112 has arg/meson.build
@@ -39,11 +39,17 @@ assert(l2.length() == 0, 'First supported did not return empty array.')
if cc.get_id() == 'gcc'
pre_arg = '-Wformat'
- anti_pre_arg = '-Wno-format'
+ # NOTE: We have special handling for -Wno-foo args because gcc silently
+ # ignores unknown -Wno-foo args unless you pass -Werror, so for this test, we
+ # pass it as two separate arguments.
+ anti_pre_arg = ['-W', 'no-format']
arg = '-Werror=format-security'
assert(not cc.has_multi_arguments([anti_pre_arg, arg]), 'Arg that should be broken is not.')
assert(cc.has_multi_arguments(pre_arg), 'Arg that should have worked does not work.')
assert(cc.has_multi_arguments([pre_arg, arg]), 'Arg that should have worked does not work.')
+ # Test that gcc correctly errors out on unknown -Wno flags
+ assert(not cc.has_argument('-Wno-lol-meson-test-flags'), 'should error out on unknown -Wno args')
+ assert(not cc.has_multi_arguments(['-Wno-pragmas', '-Wno-lol-meson-test-flags']), 'should error out even if some -Wno args are valid')
endif
if cc.get_id() == 'clang' and cc.version().version_compare('<=4.0.0')
diff --git a/test cases/common/190 escape and unicode/file.c.in b/test cases/common/190 escape and unicode/file.c.in
new file mode 100644
index 0000000..413ed42
--- /dev/null
+++ b/test cases/common/190 escape and unicode/file.c.in
@@ -0,0 +1,5 @@
+#include<stdio.h>
+const char* does_it_work() {
+ printf("{NAME}\n");
+ return "yes it does";
+}
diff --git a/test cases/common/190 escape and unicode/file.py b/test cases/common/190 escape and unicode/file.py
new file mode 100644
index 0000000..af67a09
--- /dev/null
+++ b/test cases/common/190 escape and unicode/file.py
@@ -0,0 +1,10 @@
+#!/usr/bin/env python3
+
+import sys
+import os
+
+with open(sys.argv[1]) as fh:
+ content = fh.read().replace("{NAME}", sys.argv[2])
+
+with open(os.path.join(sys.argv[3]), 'w') as fh:
+ fh.write(content)
diff --git a/test cases/common/190 escape and unicode/find.py b/test cases/common/190 escape and unicode/find.py
new file mode 100644
index 0000000..34a3eb8
--- /dev/null
+++ b/test cases/common/190 escape and unicode/find.py
@@ -0,0 +1,9 @@
+#!/usr/bin/env python3
+
+import os
+import sys
+
+for fh in os.listdir('.'):
+ if os.path.isfile(fh):
+ if fh.endswith('.c'):
+ sys.stdout.write(fh + '\0')
diff --git a/test cases/common/190 escape and unicode/fun.c b/test cases/common/190 escape and unicode/fun.c
new file mode 100644
index 0000000..8eeb8ea
--- /dev/null
+++ b/test cases/common/190 escape and unicode/fun.c
@@ -0,0 +1,3 @@
+int a_fun() {
+ return 1;
+}
diff --git a/test cases/common/190 escape and unicode/main.c b/test cases/common/190 escape and unicode/main.c
new file mode 100644
index 0000000..0bcde16
--- /dev/null
+++ b/test cases/common/190 escape and unicode/main.c
@@ -0,0 +1,12 @@
+#include <string.h>
+
+const char* does_it_work();
+
+int a_fun();
+
+int main() {
+ if(strcmp(does_it_work(), "yes it does") != 0) {
+ return -a_fun();
+ }
+ return 0;
+}
diff --git a/test cases/common/190 escape and unicode/meson.build b/test cases/common/190 escape and unicode/meson.build
new file mode 100644
index 0000000..65377b6
--- /dev/null
+++ b/test cases/common/190 escape and unicode/meson.build
@@ -0,0 +1,24 @@
+project('180 escape', 'c')
+
+gen = generator(find_program('file.py'), arguments:['@INPUT@', 'erd\u0151', '@OUTPUT@'], output: '@BASENAME@')
+
+gen_file = gen.process('file.c.in')
+
+find_file_list = run_command(find_program('find.py'))
+assert(find_file_list.returncode() == 0, 'Didn\'t find any files.')
+
+# Strings should support both octal \ooo and hex \xhh encodings
+
+found_files_oct = []
+foreach l : find_file_list.stdout().strip('\0').split('\000')
+ found_files_oct += [files(l)]
+endforeach
+
+test('first', executable('first', found_files_oct + [gen_file]))
+
+found_files_hex = []
+foreach l : find_file_list.stdout().strip('\x00').split('\x00')
+ found_files_hex += [files(l)]
+endforeach
+
+test('second', executable('second', found_files_hex + [gen_file]))
diff --git a/test cases/common/33 try compile/meson.build b/test cases/common/33 try compile/meson.build
index 09ca395..cb1037d 100644
--- a/test cases/common/33 try compile/meson.build
+++ b/test cases/common/33 try compile/meson.build
@@ -1,11 +1,11 @@
project('try compile', 'c', 'cpp')
code = '''#include<stdio.h>
-void func() { printf("Something.\n"); }
+void func() { printf("Something.\\n"); }
'''
breakcode = '''#include<nonexisting.h>
-void func() { printf("This won't work.\n"); }
+void func() { printf("This won't work.\\n"); }
'''
foreach compiler : [meson.get_compiler('c'), meson.get_compiler('cpp')]
diff --git a/test cases/common/39 tryrun/meson.build b/test cases/common/39 tryrun/meson.build
index c64446f..daf5be7 100644
--- a/test cases/common/39 tryrun/meson.build
+++ b/test cases/common/39 tryrun/meson.build
@@ -13,8 +13,8 @@ endif
ok_code = '''#include<stdio.h>
int main(int argc, char **argv) {
- printf("%s\n", "stdout");
- fprintf(stderr, "%s\n", "stderr");
+ printf("%s\\n", "stdout");
+ fprintf(stderr, "%s\\n", "stderr");
return 0;
}
'''
diff --git a/test cases/common/42 string operations/meson.build b/test cases/common/42 string operations/meson.build
index a43de70..1c289eb 100644
--- a/test cases/common/42 string operations/meson.build
+++ b/test cases/common/42 string operations/meson.build
@@ -77,21 +77,21 @@ assert('"1.1.20"'.strip('"') == '1.1.20', '" badly stripped')
assert('"1.1.20"'.strip('".') == '1.1.20', '". badly stripped')
assert('"1.1.20" '.strip('" ') == '1.1.20', '". badly stripped')
-bs_b = '''\b'''
-bs_bs_b = '''\\b'''
+bs_c = '''\c'''
+bs_bs_c = '''\\\c'''
nl = '''
'''
-bs_n = '''\n'''
+bs_n = '''\\n'''
bs_nl = '''\
'''
-bs_bs_n = '''\\n'''
-bs_bs_nl = '''\\
+bs_bs_n = '''\\\\n'''
+bs_bs_nl = '''\\\\
'''
-assert('\b' == bs_b, 'Single backslash broken')
-assert('\\b' == bs_b, 'Double backslash broken')
-assert('\\\b' == bs_bs_b, 'Three backslash broken')
-assert('\\\\b' == bs_bs_b, 'Four backslash broken')
+assert('\c' == bs_c, 'Single backslash broken')
+assert('\\c' == bs_c, 'Double backslash broken')
+assert('\\\c' == bs_bs_c, 'Three backslash broken')
+assert('\\\\c' == bs_bs_c, 'Four backslash broken')
assert('\n' == nl, 'Newline escape broken')
assert('\\n' == bs_n, 'Double backslash broken before n')
assert('\\\n' == bs_nl, 'Three backslash broken before n')
diff --git a/test cases/failing/72 invalid escape char/meson.build b/test cases/failing/72 invalid escape char/meson.build
new file mode 100644
index 0000000..b4e9196
--- /dev/null
+++ b/test cases/failing/72 invalid escape char/meson.build
@@ -0,0 +1,4 @@
+# Make sure meson exits on invalid string
+# The string below contains an invalid unicode code point
+
+'my name is what \uxyzo who are you'