From 325601b47b64b33cbe237508df2037e195795497 Mon Sep 17 00:00:00 2001 From: Anthony Liguori Date: Wed, 1 Jun 2011 12:14:52 -0500 Subject: json-lexer: limit the maximum size of a given token Signed-off-by: Michael Roth Signed-off-by: Anthony Liguori --- json-lexer.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'json-lexer.c') diff --git a/json-lexer.c b/json-lexer.c index 65c9720..fe5a060 100644 --- a/json-lexer.c +++ b/json-lexer.c @@ -18,6 +18,8 @@ #include "qemu-common.h" #include "json-lexer.h" +#define MAX_TOKEN_SIZE (64ULL << 20) + /* * \"([^\\\"]|(\\\"\\'\\\\\\/\\b\\f\\n\\r\\t\\u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]))*\" * '([^\\']|(\\\"\\'\\\\\\/\\b\\f\\n\\r\\t\\u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]))*' @@ -309,6 +311,17 @@ static int json_lexer_feed_char(JSONLexer *lexer, char ch) } lexer->state = new_state; } while (!char_consumed); + + /* Do not let a single token grow to an arbitrarily large size, + * this is a security consideration. + */ + if (lexer->token->length > MAX_TOKEN_SIZE) { + lexer->emit(lexer, lexer->token, lexer->state, lexer->x, lexer->y); + QDECREF(lexer->token); + lexer->token = qstring_new(); + lexer->state = IN_START; + } + return 0; } -- cgit v1.1 From 529a0ef5f30e28a801d6527a3556adcaa4959669 Mon Sep 17 00:00:00 2001 From: Anthony Liguori Date: Wed, 1 Jun 2011 12:14:56 -0500 Subject: json-lexer: reset the lexer state on an invalid token Signed-off-by: Michael Roth Signed-off-by: Anthony Liguori --- json-lexer.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'json-lexer.c') diff --git a/json-lexer.c b/json-lexer.c index fe5a060..a5bbe9e 100644 --- a/json-lexer.c +++ b/json-lexer.c @@ -305,6 +305,9 @@ static int json_lexer_feed_char(JSONLexer *lexer, char ch) new_state = IN_START; break; case IN_ERROR: + QDECREF(lexer->token); + lexer->token = qstring_new(); + new_state = IN_START; return -EINVAL; default: break; -- cgit v1.1 From bd3924a33a66c40065a8fa73b4d7a27aca3b0e04 Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Wed, 1 Jun 2011 12:14:57 -0500 Subject: json-lexer: fix flushing logic to not always go to error state Currently we flush the lexer by passing in a NULL character. This generally forces the lexer to go to the corresponding TERMINAL() state for whatever token type it is currently parsing, emits the token to the parser, then puts the lexer back into IN_START state. However, since a NULL character causes char_consumed to be 0, we always do a second pass after this, which puts us in the IN_ERROR state. Fix this behavior by adding a "flush" flag that tells the lexer not to do a more than 1 iteration. Signed-off-by: Michael Roth Signed-off-by: Anthony Liguori --- json-lexer.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'json-lexer.c') diff --git a/json-lexer.c b/json-lexer.c index a5bbe9e..6b49047 100644 --- a/json-lexer.c +++ b/json-lexer.c @@ -274,7 +274,7 @@ void json_lexer_init(JSONLexer *lexer, JSONLexerEmitter func) lexer->x = lexer->y = 0; } -static int json_lexer_feed_char(JSONLexer *lexer, char ch) +static int json_lexer_feed_char(JSONLexer *lexer, char ch, bool flush) { int char_consumed, new_state; @@ -313,7 +313,7 @@ static int json_lexer_feed_char(JSONLexer *lexer, char ch) break; } lexer->state = new_state; - } while (!char_consumed); + } while (!char_consumed && !flush); /* Do not let a single token grow to an arbitrarily large size, * this is a security consideration. @@ -335,7 +335,7 @@ int json_lexer_feed(JSONLexer *lexer, const char *buffer, size_t size) for (i = 0; i < size; i++) { int err; - err = json_lexer_feed_char(lexer, buffer[i]); + err = json_lexer_feed_char(lexer, buffer[i], false); if (err < 0) { return err; } -- cgit v1.1 From b011f61931f0113b29b7cd7e921dd022e0b04834 Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Wed, 1 Jun 2011 12:14:58 -0500 Subject: json-lexer: make lexer error-recovery more deterministic Currently when we reach an error state we effectively flush everything fed to the lexer, which can put us in a state where we keep feeding tokens into the parser at arbitrary offsets in the stream. This makes it difficult for the lexer/tokenizer/parser to get back in sync when bad input is made by the client. With these changes we emit an error state/token up to the tokenizer as soon as we reach an error state, and continue processing any data passed in rather than bailing out. The reset token will be used to reset the tokenizer and parser, such that they'll recover state as soon as the lexer begins generating valid token sequences again. We also map chr(192,193,245-255) to an error state here, since they are invalid UTF-8 characters. QMP guest proxy/agent will use chr(255) to force a flush/reset of previous input for reliable delivery of certain events, so also we document that thoroughly here. Signed-off-by: Michael Roth Signed-off-by: Anthony Liguori --- json-lexer.c | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) (limited to 'json-lexer.c') diff --git a/json-lexer.c b/json-lexer.c index 6b49047..c21338f 100644 --- a/json-lexer.c +++ b/json-lexer.c @@ -105,7 +105,8 @@ static const uint8_t json_lexer[][256] = { ['u'] = IN_DQ_UCODE0, }, [IN_DQ_STRING] = { - [1 ... 0xFF] = IN_DQ_STRING, + [1 ... 0xBF] = IN_DQ_STRING, + [0xC2 ... 0xF4] = IN_DQ_STRING, ['\\'] = IN_DQ_STRING_ESCAPE, ['"'] = JSON_STRING, }, @@ -144,7 +145,8 @@ static const uint8_t json_lexer[][256] = { ['u'] = IN_SQ_UCODE0, }, [IN_SQ_STRING] = { - [1 ... 0xFF] = IN_SQ_STRING, + [1 ... 0xBF] = IN_SQ_STRING, + [0xC2 ... 0xF4] = IN_SQ_STRING, ['\\'] = IN_SQ_STRING_ESCAPE, ['\''] = JSON_STRING, }, @@ -305,10 +307,25 @@ static int json_lexer_feed_char(JSONLexer *lexer, char ch, bool flush) new_state = IN_START; break; case IN_ERROR: + /* XXX: To avoid having previous bad input leaving the parser in an + * unresponsive state where we consume unpredictable amounts of + * subsequent "good" input, percolate this error state up to the + * tokenizer/parser by forcing a NULL object to be emitted, then + * reset state. + * + * Also note that this handling is required for reliable channel + * negotiation between QMP and the guest agent, since chr(0xFF) + * is placed at the beginning of certain events to ensure proper + * delivery when the channel is in an unknown state. chr(0xFF) is + * never a valid ASCII/UTF-8 sequence, so this should reliably + * induce an error/flush state. + */ + lexer->emit(lexer, lexer->token, JSON_ERROR, lexer->x, lexer->y); QDECREF(lexer->token); lexer->token = qstring_new(); new_state = IN_START; - return -EINVAL; + lexer->state = new_state; + return 0; default: break; } @@ -346,7 +363,7 @@ int json_lexer_feed(JSONLexer *lexer, const char *buffer, size_t size) int json_lexer_flush(JSONLexer *lexer) { - return lexer->state == IN_START ? 0 : json_lexer_feed_char(lexer, 0); + return lexer->state == IN_START ? 0 : json_lexer_feed_char(lexer, 0, true); } void json_lexer_destroy(JSONLexer *lexer) -- cgit v1.1