17 files changed, 178 insertions, 27 deletions
diff --git a/docs/markdown/Syntax.md b/docs/markdown/Syntax.md
index 1005100..01c8c6e 100644
--- a/docs/markdown/Syntax.md
+++ b/docs/markdown/Syntax.md
@@ -90,8 +90,24 @@ single quote do it like this:
 single quote = 'contains a \' character'
 ```
 
-Similarly `\n` gets converted to a newline and `\\` to a single
-backslash.
+The full list of escape sequences is:
+
+* `\\` Backslash
+* `\'` Single quote
+* `\a` Bell
+* `\b` Backspace
+* `\f` Formfeed
+* `\n` Newline
+* `\r` Carriage Return
+* `\t` Horizontal Tab
+* `\v` Vertical Tab
+* `\ooo` Character with octal value ooo
+* `\xhh` Character with hex value hh
+* `\uxxxx` Character with 16-bit hex value xxxx
+* `\Uxxxxxxxx` Character with 32-bit hex value xxxxxxxx
+* `\N{name}` Character named name in Unicode database
+
+As in python and C, up to three octal digits are accepted in `\ooo`.
 
 #### String concatenation
 
diff --git a/docs/markdown/snippets/more-escape-sequences.md b/docs/markdown/snippets/more-escape-sequences.md
new file mode 100644
index 0000000..2894079
--- /dev/null
+++ b/docs/markdown/snippets/more-escape-sequences.md
@@ -0,0 +1,17 @@
+## String escape character update
+
+The strings (both single-quoted and triple-quoted) in meson has been taught the
+same set of escape sequences as in Python. It is therefore now possible to use
+arbitrary bytes in strings, like for example NUL (`\0`) and other ASCII control
+characters. See the chapter about *Strings* in *Syntax* for more details.
+
+Potential backwards compatibility issue: Any valid escape sequence according to
+the new rules will be interpreted as an escape sequence instead of the literal
+characters. Previously only single-quote strings supported escape sequences and
+the supported sequences were `\'`, `\\` and `\n`.
+
+The most likely breakage is usage of backslash-n in triple-quoted strings. It
+is now written in the same way as in single-quoted strings: `\\n` instead of
+`\n`. In general it is now recommended to escape any usage of backslash.
+However, backslash-c (`\c`), for example, is still backslash-c because it isn't
+a valid escape sequence.
diff --git a/mesonbuild/backend/ninjabackend.py b/mesonbuild/backend/ninjabackend.py
index 7bdd2d1..bc3a8ef 100644
--- a/mesonbuild/backend/ninjabackend.py
+++ b/mesonbuild/backend/ninjabackend.py
@@ -169,7 +169,7 @@ class NinjaBackend(backends.Backend):
                 break
         else:
             # None of our compilers are MSVC, we're done.
-            return open(tempfilename, 'a')
+            return open(tempfilename, 'a', encoding='utf-8')
         filename = os.path.join(self.environment.get_scratch_dir(),
                                 'incdetect.c')
         with open(filename, 'w') as f:
@@ -196,7 +196,7 @@ int dummy;
             if match:
                 with open(tempfilename, 'ab') as binfile:
                     binfile.write(b'msvc_deps_prefix = ' + match.group(1) + b'\n')
-                return open(tempfilename, 'a')
+                return open(tempfilename, 'a', encoding='utf-8')
         raise MesonException('Could not determine vs dep dependency prefix string.')
 
     def generate(self, interp):
@@ -206,7 +206,7 @@ int dummy;
             raise MesonException('Could not detect Ninja v1.5 or newer')
         outfilename = os.path.join(self.environment.get_build_dir(), self.ninja_filename)
         tempfilename = outfilename + '~'
-        with open(tempfilename, 'w') as outfile:
+        with open(tempfilename, 'w', encoding='utf-8') as outfile:
             outfile.write('# This is the build file for project "%s"\n' %
                           self.build.get_project())
             outfile.write('# It is autogenerated by the Meson build system.\n')
diff --git a/mesonbuild/backend/vs2010backend.py b/mesonbuild/backend/vs2010backend.py
index 4959e6e..22383dc 100644
--- a/mesonbuild/backend/vs2010backend.py
+++ b/mesonbuild/backend/vs2010backend.py
@@ -227,7 +227,7 @@ class Vs2010Backend(backends.Backend):
 
     def generate_solution(self, sln_filename, projlist):
         default_projlist = self.get_build_by_default_targets()
-        with open(sln_filename, 'w') as ofile:
+        with open(sln_filename, 'w', encoding='utf-8') as ofile:
             ofile.write('Microsoft Visual Studio Solution File, Format '
                         'Version 11.00\n')
             ofile.write('# Visual Studio ' + self.vs_version + '\n')
@@ -575,7 +575,7 @@ class Vs2010Backend(backends.Backend):
         tree.write(ofname, encoding='utf-8', xml_declaration=True)
         # ElementTree can not do prettyprinting so do it manually
         doc = xml.dom.minidom.parse(ofname)
-        with open(ofname, 'w') as of:
+        with open(ofname, 'w', encoding='utf-8') as of:
             of.write(doc.toprettyxml())
 
     def gen_vcxproj(self, target, ofname, guid):
@@ -1128,7 +1128,7 @@ if %%errorlevel%% neq 0 goto :VCEnd'''
         igroup = ET.SubElement(root, 'ItemGroup')
         rulefile = os.path.join(self.environment.get_scratch_dir(), 'regen.rule')
         if not os.path.exists(rulefile):
-            with open(rulefile, 'w') as f:
+            with open(rulefile, 'w', encoding='utf-8') as f:
                 f.write("# Meson regen file.")
         custombuild = ET.SubElement(igroup, 'CustomBuild', Include=rulefile)
         message = ET.SubElement(custombuild, 'Message')
diff --git a/mesonbuild/compilers/c.py b/mesonbuild/compilers/c.py
index 279e435..0e474e7 100644
--- a/mesonbuild/compilers/c.py
+++ b/mesonbuild/compilers/c.py
@@ -842,7 +842,12 @@ class CCompiler(Compiler):
         return ['-pthread']
 
     def has_multi_arguments(self, args, env):
-        for arg in args:
+        for arg in args[:]:
+            # some compilers, e.g. GCC, don't warn for unsupported warning-disable
+            # flags, so when we are testing a flag like "-Wno-forgotten-towel", also
+            # check the equivalent enable flag too "-Wforgotten-towel"
+            if arg.startswith('-Wno-'):
+                    args.append('-W' + arg[5:])
             if arg.startswith('-Wl,'):
                 mlog.warning('''{} looks like a linker argument, but has_argument
 and other similar methods only support checking compiler arguments.
diff --git a/mesonbuild/mparser.py b/mesonbuild/mparser.py
index 0e7524c..9e43065 100644
--- a/mesonbuild/mparser.py
+++ b/mesonbuild/mparser.py
@@ -13,9 +13,44 @@
 # limitations under the License.
 
 import re
+import codecs
 from .mesonlib import MesonException
 from . import mlog
 
+# This is the regex for the supported escape sequences of a regular string
+# literal, like 'abc\x00'
+ESCAPE_SEQUENCE_SINGLE_RE = re.compile(r'''
+    ( \\U........      # 8-digit hex escapes
+    | \\u....          # 4-digit hex escapes
+    | \\x..            # 2-digit hex escapes
+    | \\[0-7]{1,3}     # Octal escapes
+    | \\N\{[^}]+\}     # Unicode characters by name
+    | \\[\\'abfnrtv]   # Single-character escapes
+    )''', re.UNICODE | re.VERBOSE)
+
+# This is the regex for the supported escape sequences of a multiline string
+# literal, like '''abc\x00'''. The only difference is that single quote (')
+# doesn't require escaping.
+ESCAPE_SEQUENCE_MULTI_RE = re.compile(r'''
+    ( \\U........      # 8-digit hex escapes
+    | \\u....          # 4-digit hex escapes
+    | \\x..            # 2-digit hex escapes
+    | \\[0-7]{1,3}     # Octal escapes
+    | \\N\{[^}]+\}     # Unicode characters by name
+    | \\[\\abfnrtv]    # Single-character escapes
+    )''', re.UNICODE | re.VERBOSE)
+
+class MesonUnicodeDecodeError(MesonException):
+    def __init__(self, match):
+        super().__init__("%s" % match)
+        self.match = match
+
+def decode_match(match):
+    try:
+        return codecs.decode(match.group(0), 'unicode_escape')
+    except UnicodeDecodeError as err:
+        raise MesonUnicodeDecodeError(match.group(0))
+
 class ParseException(MesonException):
     def __init__(self, text, line, lineno, colno):
         # Format as error message, followed by the line with the error, followed by a caret to show the error column.
@@ -112,7 +147,6 @@ class Lexer:
         par_count = 0
         bracket_count = 0
         col = 0
-        newline_rx = re.compile(r'(?<!\\)((?:\\\\)*)\\n')
         while loc < len(self.code):
             matched = False
             value = None
@@ -145,12 +179,18 @@ class Lexer:
                         if match_text.find("\n") != -1:
                             mlog.warning("""Newline character in a string detected, use ''' (three single quotes) for multiline strings instead.
 This will become a hard error in a future Meson release.""", self.getline(line_start), lineno, col)
-                        value = match_text[1:-1].replace(r"\'", "'")
-                        value = newline_rx.sub(r'\1\n', value)
-                        value = value.replace(r" \\ ".strip(), r" \ ".strip())
+                        value = match_text[1:-1]
+                        try:
+                            value = ESCAPE_SEQUENCE_SINGLE_RE.sub(decode_match, value)
+                        except MesonUnicodeDecodeError as err:
+                            raise MesonException("Failed to parse escape sequence: '{}' in string:\n  {}".format(err.match, match_text))
                     elif tid == 'multiline_string':
                         tid = 'string'
                         value = match_text[3:-3]
+                        try:
+                            value = ESCAPE_SEQUENCE_MULTI_RE.sub(decode_match, value)
+                        except MesonUnicodeDecodeError as err:
+                            raise MesonException("Failed to parse escape sequence: '{}' in string:\n{}".format(err.match, match_text))
                         lines = match_text.split('\n')
                         if len(lines) > 1:
                             lineno += len(lines) - 1
diff --git a/test cases/common/112 has arg/meson.build b/test cases/common/112 has arg/meson.build
index 27290a1..ba07311 100644
--- a/test cases/common/112 has arg/meson.build
+++ b/test cases/common/112 has arg/meson.build
@@ -39,11 +39,17 @@ assert(l2.length() == 0, 'First supported did not return empty array.')
 
 if cc.get_id() == 'gcc'
   pre_arg = '-Wformat'
-  anti_pre_arg = '-Wno-format'
+  # NOTE: We have special handling for -Wno-foo args because gcc silently
+  # ignores unknown -Wno-foo args unless you pass -Werror, so for this test, we
+  # pass it as two separate arguments.
+  anti_pre_arg = ['-W', 'no-format']
   arg = '-Werror=format-security'
   assert(not cc.has_multi_arguments([anti_pre_arg, arg]), 'Arg that should be broken is not.')
   assert(cc.has_multi_arguments(pre_arg), 'Arg that should have worked does not work.')
   assert(cc.has_multi_arguments([pre_arg, arg]), 'Arg that should have worked does not work.')
+  # Test that gcc correctly errors out on unknown -Wno flags
+  assert(not cc.has_argument('-Wno-lol-meson-test-flags'), 'should error out on unknown -Wno args')
+  assert(not cc.has_multi_arguments(['-Wno-pragmas', '-Wno-lol-meson-test-flags']), 'should error out even if some -Wno args are valid')
 endif
 
 if cc.get_id() == 'clang' and cc.version().version_compare('<=4.0.0')
diff --git a/test cases/common/190 escape and unicode/file.c.in b/test cases/common/190 escape and unicode/file.c.in
new file mode 100644
index 0000000..413ed42
--- /dev/null
+++ b/test cases/common/190 escape and unicode/file.c.in
@@ -0,0 +1,5 @@
+#include<stdio.h>
+const char* does_it_work() {
+    printf("{NAME}\n");
+    return "yes it does";
+}
diff --git a/test cases/common/190 escape and unicode/file.py b/test cases/common/190 escape and unicode/file.py
new file mode 100644
index 0000000..af67a09
--- /dev/null
+++ b/test cases/common/190 escape and unicode/file.py
@@ -0,0 +1,10 @@
+#!/usr/bin/env python3
+
+import sys
+import os
+
+with open(sys.argv[1]) as fh:
+    content = fh.read().replace("{NAME}", sys.argv[2])
+
+with open(os.path.join(sys.argv[3]), 'w') as fh:
+    fh.write(content)
diff --git a/test cases/common/190 escape and unicode/find.py b/test cases/common/190 escape and unicode/find.py
new file mode 100644
index 0000000..34a3eb8
--- /dev/null
+++ b/test cases/common/190 escape and unicode/find.py
@@ -0,0 +1,9 @@
+#!/usr/bin/env python3
+
+import os
+import sys
+
+for fh in os.listdir('.'):
+    if os.path.isfile(fh):
+        if fh.endswith('.c'):
+            sys.stdout.write(fh + '\0')
diff --git a/test cases/common/190 escape and unicode/fun.c b/test cases/common/190 escape and unicode/fun.c
new file mode 100644
index 0000000..8eeb8ea
--- /dev/null
+++ b/test cases/common/190 escape and unicode/fun.c
@@ -0,0 +1,3 @@
+int a_fun() {
+    return 1;
+}
diff --git a/test cases/common/190 escape and unicode/main.c b/test cases/common/190 escape and unicode/main.c
new file mode 100644
index 0000000..0bcde16
--- /dev/null
+++ b/test cases/common/190 escape and unicode/main.c
@@ -0,0 +1,12 @@
+#include <string.h>
+
+const char* does_it_work();
+
+int a_fun();
+
+int main() {
+    if(strcmp(does_it_work(), "yes it does") != 0) {
+        return -a_fun();
+    }
+    return 0;
+}
diff --git a/test cases/common/190 escape and unicode/meson.build b/test cases/common/190 escape and unicode/meson.build
new file mode 100644
index 0000000..65377b6
--- /dev/null
+++ b/test cases/common/190 escape and unicode/meson.build
@@ -0,0 +1,24 @@
+project('180 escape', 'c')
+
+gen = generator(find_program('file.py'), arguments:['@INPUT@', 'erd\u0151', '@OUTPUT@'], output: '@BASENAME@')
+
+gen_file = gen.process('file.c.in')
+
+find_file_list = run_command(find_program('find.py'))
+assert(find_file_list.returncode() == 0, 'Didn\'t find any files.')
+
+# Strings should support both octal \ooo and hex \xhh encodings
+
+found_files_oct = []
+foreach l : find_file_list.stdout().strip('\0').split('\000')
+  found_files_oct += [files(l)]
+endforeach
+
+test('first', executable('first', found_files_oct + [gen_file]))
+
+found_files_hex = []
+foreach l : find_file_list.stdout().strip('\x00').split('\x00')
+  found_files_hex += [files(l)]
+endforeach
+
+test('second', executable('second', found_files_hex + [gen_file]))
diff --git a/test cases/common/33 try compile/meson.build b/test cases/common/33 try compile/meson.build
index 09ca395..cb1037d 100644
--- a/test cases/common/33 try compile/meson.build
+++ b/test cases/common/33 try compile/meson.build
@@ -1,11 +1,11 @@
 project('try compile', 'c', 'cpp')
 
 code = '''#include<stdio.h>
-void func() { printf("Something.\n"); }
+void func() { printf("Something.\\n"); }
 '''
 
 breakcode = '''#include<nonexisting.h>
-void func() { printf("This won't work.\n"); }
+void func() { printf("This won't work.\\n"); }
 '''
 
 foreach compiler : [meson.get_compiler('c'), meson.get_compiler('cpp')]
diff --git a/test cases/common/39 tryrun/meson.build b/test cases/common/39 tryrun/meson.build
index c64446f..daf5be7 100644
--- a/test cases/common/39 tryrun/meson.build
+++ b/test cases/common/39 tryrun/meson.build
@@ -13,8 +13,8 @@ endif
 
 ok_code = '''#include<stdio.h>
 int main(int argc, char **argv) {
-  printf("%s\n", "stdout");
-  fprintf(stderr, "%s\n", "stderr");
+  printf("%s\\n", "stdout");
+  fprintf(stderr, "%s\\n", "stderr");
   return 0;
 }
 '''
diff --git a/test cases/common/42 string operations/meson.build b/test cases/common/42 string operations/meson.build
index a43de70..1c289eb 100644
--- a/test cases/common/42 string operations/meson.build
+++ b/test cases/common/42 string operations/meson.build
@@ -77,21 +77,21 @@ assert('"1.1.20"'.strip('"') == '1.1.20', '" badly stripped')
 assert('"1.1.20"'.strip('".') == '1.1.20', '". badly stripped')
 assert('"1.1.20"   '.strip('" ') == '1.1.20', '". badly stripped')
 
-bs_b = '''\b'''
-bs_bs_b = '''\\b'''
+bs_c = '''\c'''
+bs_bs_c = '''\\\c'''
 nl = '''
 '''
-bs_n = '''\n'''
+bs_n = '''\\n'''
 bs_nl = '''\
 '''
-bs_bs_n = '''\\n'''
-bs_bs_nl = '''\\
+bs_bs_n = '''\\\\n'''
+bs_bs_nl = '''\\\\
 '''
 
-assert('\b' == bs_b, 'Single backslash broken')
-assert('\\b' == bs_b, 'Double backslash broken')
-assert('\\\b' == bs_bs_b, 'Three backslash broken')
-assert('\\\\b' == bs_bs_b, 'Four backslash broken')
+assert('\c' == bs_c, 'Single backslash broken')
+assert('\\c' == bs_c, 'Double backslash broken')
+assert('\\\c' == bs_bs_c, 'Three backslash broken')
+assert('\\\\c' == bs_bs_c, 'Four backslash broken')
 assert('\n' == nl, 'Newline escape broken')
 assert('\\n' == bs_n, 'Double backslash broken before n')
 assert('\\\n' == bs_nl, 'Three backslash broken before n')
diff --git a/test cases/failing/72 invalid escape char/meson.build b/test cases/failing/72 invalid escape char/meson.build
new file mode 100644
index 0000000..b4e9196
--- /dev/null
+++ b/test cases/failing/72 invalid escape char/meson.build
@@ -0,0 +1,4 @@
+# Make sure meson exits on invalid string
+# The string below contains an invalid unicode code point
+
+'my name is what \uxyzo who are you'