diff options
author | Chip Salzenberg <chip@pobox.com> | 2012-04-03 18:00:29 -0700 |
---|---|---|
committer | Chip Salzenberg <chip@pobox.com> | 2013-09-27 17:32:06 -0700 |
commit | 9c259c07aa53381df5819ef61627342c932d626f (patch) | |
tree | c13313ce1ae36aeaea0a441c8335bee67cc689a5 /src/load.c | |
parent | e4d6a9f6f4f90aa7bb1b5e09d146ac8d2cb3cd1d (diff) | |
download | jansson-9c259c07aa53381df5819ef61627342c932d626f.zip jansson-9c259c07aa53381df5819ef61627342c932d626f.tar.gz jansson-9c259c07aa53381df5819ef61627342c932d626f.tar.bz2 |
Support \u0000 - add size_t string lengths to API, load and dump \u000, etc.
Also:
Steal strings during parsing for half the mallocs!
Change all input-caused assertions to errors. No crashes please, we're programmers.
Diffstat (limited to 'src/load.c')
-rw-r--r-- | src/load.c | 80 |
1 files changed, 49 insertions, 31 deletions
@@ -63,7 +63,10 @@ typedef struct { strbuffer_t saved_text; int token; union { - char *string; + struct { + char *val; + size_t len; + } string; json_int_t integer; double real; } value; @@ -279,6 +282,13 @@ static void lex_save_cached(lex_t *lex) } } +static void lex_free_string(lex_t *lex) +{ + jsonp_free(lex->value.string.val); + lex->value.string.val = NULL; + lex->value.string.len = 0; +} + /* assumes that str points to 'u' plus at least 4 valid hex digits */ static int32_t decode_unicode_escape(const char *str) { @@ -297,7 +307,7 @@ static int32_t decode_unicode_escape(const char *str) else if(l_isupper(c)) value += c - 'A' + 10; else - assert(0); + return -1; } return value; @@ -310,7 +320,7 @@ static void lex_scan_string(lex_t *lex, json_error_t *error) char *t; int i; - lex->value.string = NULL; + lex->value.string.val = NULL; lex->token = TOKEN_INVALID; c = lex_get_save(lex, error); @@ -365,14 +375,12 @@ static void lex_scan_string(lex_t *lex, json_error_t *error) - two \uXXXX escapes (length 12) forming an UTF-16 surrogate pair are converted to 4 bytes */ - lex->value.string = jsonp_malloc(lex->saved_text.length + 1); - if(!lex->value.string) { + t = jsonp_malloc(lex->saved_text.length + 1); + if(!t) { /* this is not very nice, since TOKEN_INVALID is returned */ goto out; } - - /* the target */ - t = lex->value.string; + lex->value.string.val = t; /* + 1 to skip the " */ p = strbuffer_value(&lex->saved_text) + 1; @@ -381,17 +389,24 @@ static void lex_scan_string(lex_t *lex, json_error_t *error) if(*p == '\\') { p++; if(*p == 'u') { - char buffer[4]; - int length; + size_t length; int32_t value; value = decode_unicode_escape(p); + if(value < 0) { + error_set(error, lex, "invalid Unicode escape '%.6s'", p - 1); + goto out; + } p += 5; if(0xD800 <= value && value <= 0xDBFF) { /* surrogate pair */ if(*p == '\\' && *(p + 1) == 'u') { int32_t value2 = decode_unicode_escape(++p); + if(value2 < 0) { + error_set(error, lex, "invalid Unicode escape '%.6s'", p - 1); + goto out; + } p += 5; if(0xDC00 <= value2 && value2 <= 0xDFFF) { @@ -420,16 +435,9 @@ static void lex_scan_string(lex_t *lex, json_error_t *error) error_set(error, lex, "invalid Unicode '\\u%04X'", value); goto out; } - else if(value == 0) - { - error_set(error, lex, "\\u0000 is not allowed"); - goto out; - } - if(utf8_encode(value, buffer, &length)) + if(utf8_encode(value, t, &length)) assert(0); - - memcpy(t, buffer, length); t += length; } else { @@ -451,11 +459,12 @@ static void lex_scan_string(lex_t *lex, json_error_t *error) *(t++) = *(p++); } *t = '\0'; + lex->value.string.len = t - lex->value.string.val; lex->token = TOKEN_STRING; return; out: - jsonp_free(lex->value.string); + lex_free_string(lex); } #ifndef JANSSON_USING_CMAKE /* disabled if using cmake */ @@ -571,10 +580,8 @@ static int lex_scan(lex_t *lex, json_error_t *error) strbuffer_clear(&lex->saved_text); - if(lex->token == TOKEN_STRING) { - jsonp_free(lex->value.string); - lex->value.string = NULL; - } + if(lex->token == TOKEN_STRING) + lex_free_string(lex); c = lex_get(lex, error); while(c == ' ' || c == '\t' || c == '\n' || c == '\r') @@ -635,13 +642,14 @@ out: return lex->token; } -static char *lex_steal_string(lex_t *lex) +static char *lex_steal_string(lex_t *lex, size_t *out_len) { char *result = NULL; - if(lex->token == TOKEN_STRING) - { - result = lex->value.string; - lex->value.string = NULL; + if(lex->token == TOKEN_STRING) { + result = lex->value.string.val; + *out_len = lex->value.string.len; + lex->value.string.val = NULL; + lex->value.string.len = 0; } return result; } @@ -659,7 +667,7 @@ static int lex_init(lex_t *lex, get_func get, void *data) static void lex_close(lex_t *lex) { if(lex->token == TOKEN_STRING) - jsonp_free(lex->value.string); + lex_free_string(lex); strbuffer_close(&lex->saved_text); } @@ -680,6 +688,7 @@ static json_t *parse_object(lex_t *lex, size_t flags, json_error_t *error) while(1) { char *key; + size_t len; json_t *value; if(lex->token != TOKEN_STRING) { @@ -687,9 +696,14 @@ static json_t *parse_object(lex_t *lex, size_t flags, json_error_t *error) goto error; } - key = lex_steal_string(lex); + key = lex_steal_string(lex, &len); if(!key) return NULL; + if (memchr(key, len, '\0')) { + jsonp_free(key); + error_set(error, lex, "nul char in object key not supported"); + goto error; + } if(flags & JSON_REJECT_DUPLICATES) { if(json_object_get(object, key)) { @@ -788,7 +802,11 @@ static json_t *parse_value(lex_t *lex, size_t flags, json_error_t *error) switch(lex->token) { case TOKEN_STRING: { - json = json_string_nocheck(lex->value.string); + json = jsonp_stringn_nocheck_own(lex->value.string.val, lex->value.string.len); + if(json) { + lex->value.string.val = NULL; + lex->value.string.len = 0; + } break; } |