diff options
-rw-r--r-- | docs/markdown/Syntax.md | 3 | ||||
-rw-r--r-- | mesonbuild/mparser.py | 12 | ||||
-rw-r--r-- | test cases/common/185 escape and unicode/meson.build | 14 |
3 files changed, 23 insertions, 6 deletions
diff --git a/docs/markdown/Syntax.md b/docs/markdown/Syntax.md index cf56dd3..aadb14a 100644 --- a/docs/markdown/Syntax.md +++ b/docs/markdown/Syntax.md @@ -116,6 +116,9 @@ The full list of escape sequences is: As in python and C, up to three octal digits are accepted in `\ooo`. +Unrecognized escape sequences are left in the string unchanged, i.e., the +backslash is left in the string. + #### String concatenation Strings can be concatenated to form a new string using the `+` symbol. diff --git a/mesonbuild/mparser.py b/mesonbuild/mparser.py index 17783ce..4305a7c 100644 --- a/mesonbuild/mparser.py +++ b/mesonbuild/mparser.py @@ -21,12 +21,12 @@ from . import mlog # This is the regex for the supported escape sequences of a regular string # literal, like 'abc\x00' ESCAPE_SEQUENCE_SINGLE_RE = re.compile(r''' - ( \\U........ # 8-digit hex escapes - | \\u.... # 4-digit hex escapes - | \\x.. # 2-digit hex escapes - | \\[0-7]{1,3} # Octal escapes - | \\N\{[^}]+\} # Unicode characters by name - | \\[\\'abfnrtv] # Single-character escapes + ( \\U[A-Fa-f0-9]{8} # 8-digit hex escapes + | \\u[A-Fa-f0-9]{4} # 4-digit hex escapes + | \\x[A-Fa-f0-9]{2} # 2-digit hex escapes + | \\[0-7]{1,3} # Octal escapes + | \\N\{[^}]+\} # Unicode characters by name + | \\[\\'abfnrtv] # Single-character escapes )''', re.UNICODE | re.VERBOSE) class MesonUnicodeDecodeError(MesonException): diff --git a/test cases/common/185 escape and unicode/meson.build b/test cases/common/185 escape and unicode/meson.build index 65377b6..e4fe628 100644 --- a/test cases/common/185 escape and unicode/meson.build +++ b/test cases/common/185 escape and unicode/meson.build @@ -22,3 +22,17 @@ foreach l : find_file_list.stdout().strip('\x00').split('\x00') endforeach test('second', executable('second', found_files_hex + [gen_file])) + +# Unrecognized and malformed escape sequences are literal + +malformed = [ + [ '\c', 'c' ], + [ '\Uabcdefghi', 'Uabcdefghi'], + [ '\u123 ', 'u123 '], + [ '\xqr', 'xqr'], +] + +foreach m : malformed + assert(m[0].endswith(m[1]), 'bad escape sequence had unexpected end') + assert(m[0].startswith('\\'), 'bad escape sequence had unexpected start') +endforeach |