diff options
author | Markus Armbruster <armbru@redhat.com> | 2018-08-23 18:39:52 +0200 |
---|---|---|
committer | Markus Armbruster <armbru@redhat.com> | 2018-08-24 20:26:37 +0200 |
commit | 4b1c0cd7c7f9f9cf2e46c0a9c9cd88b2cba3decd (patch) | |
tree | 5841258a3a8fd1fff3aeb2f70f3c68e308c68b8b | |
parent | de930f45cb56ccf7535cbacee3f3686d416f5283 (diff) | |
download | qemu-4b1c0cd7c7f9f9cf2e46c0a9c9cd88b2cba3decd.zip qemu-4b1c0cd7c7f9f9cf2e46c0a9c9cd88b2cba3decd.tar.gz qemu-4b1c0cd7c7f9f9cf2e46c0a9c9cd88b2cba3decd.tar.bz2 |
json: Accept overlong \xC0\x80 as U+0000 ("modified UTF-8")
Since the JSON grammer doesn't accept U+0000 anywhere, this merely
exchanges one kind of parse error for another. It's purely for
consistency with qobject_to_json(), which accepts \xC0\x80 (see commit
e2ec3f97680).
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <20180823164025.12553-26-armbru@redhat.com>
-rw-r--r-- | qobject/json-lexer.c | 2 | ||||
-rw-r--r-- | qobject/json-parser.c | 2 | ||||
-rw-r--r-- | tests/check-qjson.c | 8 |
3 files changed, 3 insertions, 9 deletions
diff --git a/qobject/json-lexer.c b/qobject/json-lexer.c index 93fa273..4c402f6 100644 --- a/qobject/json-lexer.c +++ b/qobject/json-lexer.c @@ -93,7 +93,7 @@ * interpolation = %((l|ll|I64)[du]|[ipsf]) * * Note: - * - Input must be encoded in UTF-8. + * - Input must be encoded in modified UTF-8. * - Decoding and validating is left to the parser. */ diff --git a/qobject/json-parser.c b/qobject/json-parser.c index b779316..a9b227f 100644 --- a/qobject/json-parser.c +++ b/qobject/json-parser.c @@ -200,7 +200,7 @@ static QString *qstring_from_escaped_str(JSONParserContext *ctxt, } } else { cp = mod_utf8_codepoint(ptr, 6, &end); - if (cp <= 0) { + if (cp < 0) { parse_error(ctxt, token, "invalid UTF-8 sequence in string"); goto out; } diff --git a/tests/check-qjson.c b/tests/check-qjson.c index 71c77d2..3abf12b 100644 --- a/tests/check-qjson.c +++ b/tests/check-qjson.c @@ -152,12 +152,6 @@ static void string_with_quotes(void) static void utf8_string(void) { /* - * Problem: we can't easily deal with embedded U+0000. Parsing - * the JSON string "this \\u0000" is fun" yields "this \0 is fun", - * which gets misinterpreted as NUL-terminated "this ". We should - * consider using overlong encoding \xC0\x80 for U+0000 ("modified - * UTF-8"). - * * Most test cases are scraped from Markus Kuhn's UTF-8 decoder * capability and stress test at * http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt @@ -586,7 +580,7 @@ static void utf8_string(void) { /* \U+0000 */ "\xC0\x80", - NULL, + "\xC0\x80", "\\u0000", }, { |