jim-json.c: Fix two issues.

1. Properly unescape string tokens (Fixes #362) 2. Handle invalid json where a dictionary/object has a key but no value
author: Steve Bennett <steveb@workware.net.au> 2025-12-29 13:11:30 +1000
committer: Steve Bennett <steveb@workware.net.au> 2025-12-30 04:59:08 +1000
commit: 7f688acf949f88e90a83f7e8049d65a98b6a27a0 (patch)
tree: 569a167ac752daf28eb181675a6a6abaf64fb400
parent: 912c167ed6dc5c2cb30bd57e20f10954503438e2 (diff)
download: jimtcl-7f688acf949f88e90a83f7e8049d65a98b6a27a0.tar.gz
jimtcl-7f688acf949f88e90a83f7e8049d65a98b6a27a0.tar.bz2
jimtcl-7f688acf949f88e90a83f7e8049d65a98b6a27a0.zip
2 files changed, 59 insertions, 28 deletions
diff --git a/jim-json.c b/jim-json.c
index 923b54c..6d2a5f8 100644
--- a/jim-json.c
+++ b/jim-json.c
@@ -43,7 +43,7 @@ struct json_state {
 	Jim_Obj *nullObj;
 	const char *json;
 	jsmntok_t *tok;
-	int need_subst;
+	int retcode;
 	/* The following are used for -schema */
 	int enable_schema;
 	int enable_index;
@@ -217,17 +217,33 @@ json_decode_dump_value(Jim_Interp *interp, struct json_state *state, Jim_Obj *li
 {
 	const jsmntok_t *t = state->tok;
 
+	if (t->type == JSMN_UNDEFINED) {
+		/* Malformed JSON - just return */
+		state->retcode = JIM_ERR;
+		Jim_SetResultString(interp, "invalid JSON string", -1);
+		return;
+	}
+
 	if (t->type == JSMN_STRING || t->type == JSMN_PRIMITIVE) {
 		Jim_Obj	*elem;
 		int len = t->end - t->start;
 		const char *p = state->json + t->start;
+
+		int decr = 0;
+
 		int set_source = 1;
 		if (t->type == JSMN_STRING) {
-			/* Do we need to process backslash escapes? */
-			if (state->need_subst == 0 && memchr(p, '\\', len) != NULL) {
-				state->need_subst = 1;
-			}
 			elem = Jim_NewStringObj(interp, p, len);
+			if (memchr(p, '\\', len) != NULL) {
+				/* Need to process backslash escapes */
+				Jim_Obj *newelem;
+				Jim_IncrRefCount(elem);
+				Jim_SubstObj(interp, elem, &newelem, JIM_SUBST_FLAG | JIM_SUBST_NOCMD | JIM_SUBST_NOVAR);
+				Jim_IncrRefCount(newelem);
+				Jim_DecrRefCount(interp, elem);
+				decr = 1;
+				elem = newelem;
+			}
 		} else if (p[0] == 'n') {	/* null */
 			elem = state->nullObj;
 			set_source = 0;
@@ -244,6 +260,9 @@ json_decode_dump_value(Jim_Interp *interp, struct json_state *state, Jim_Obj *li
 		}
 
 		Jim_ListAppendElement(interp, list, elem);
+		if (decr) {
+			Jim_DecrRefCount(interp, elem);
+		}
 		state->tok++;
 	}
 	else {
@@ -299,6 +318,8 @@ static int parse_json_decode_options(Jim_Interp *interp, int argc, Jim_Obj *cons
  * Use jsmn to tokenise the JSON string 'json' of length 'len'
  *
  * Returns an allocated array of tokens or NULL on error (and sets an error result)
+ * Note that a sentinel value is added to the end of the array (type JSMN_UNDEFINED)
+ * so the caller can detect the end of the array in the case of malformed JSON.
  */
 static jsmntok_t *
 json_decode_tokenize(Jim_Interp *interp, const char *json, size_t len)
@@ -333,7 +354,7 @@ error:
 		return NULL;
 	}
 
-	t = Jim_Alloc(n * sizeof(*t));
+	t = Jim_Alloc((n + 1) * sizeof(*t));
 
 	jsmn_init(&parser);
 	n = jsmn_parse(&parser, json, len, t, n);
@@ -344,6 +365,8 @@ error:
 		Jim_Free(t);
 		goto error;
 	}
+	/* Add a sentinel value */
+	t[n].type = JSMN_UNDEFINED;
 
 	return t;
 }
@@ -385,44 +408,39 @@ json_decode(Jim_Interp *interp, int argc, Jim_Obj *const argv[])
 		goto done;
 	}
 	state.tok = tokens;
+	state.retcode = JIM_OK;
 	json_decode_schema_push(interp, &state);
 
 	list = json_decode_dump_container(interp, &state);
 	Jim_Free(tokens);
-	ret = JIM_OK;
 
 	/* Make sure the refcount doesn't go to 0 during Jim_SubstObj() */
 	Jim_IncrRefCount(list);
 
-	if (state.need_subst) {
-		/* Subsitute backslashes in the returned dictionary.
-		 * Need to be careful of refcounts.
-		 * Note that Jim_SubstObj() supports a few more escapes than
-		 * JSON requires, but should give the same result for all legal escapes.
-		 */
-		Jim_Obj *newList;
-		Jim_SubstObj(interp, list, &newList, JIM_SUBST_FLAG | JIM_SUBST_NOCMD | JIM_SUBST_NOVAR);
-		Jim_IncrRefCount(newList);
+	if (state.retcode == JIM_OK) {
+		if (state.schemaObj) {
+			Jim_Obj *resultObj = Jim_NewListObj(interp, NULL, 0);
+			Jim_ListAppendElement(interp, resultObj, list);
+			Jim_ListAppendElement(interp, resultObj, state.schemaObj);
+			Jim_SetResult(interp, resultObj);
+			Jim_DecrRefCount(interp, state.schemaObj);
+		}
+		else {
+			Jim_SetResult(interp, list);
+		}
 		Jim_DecrRefCount(interp, list);
-		list = newList;
-	}
-
-	if (state.schemaObj) {
-		Jim_Obj *resultObj = Jim_NewListObj(interp, NULL, 0);
-		Jim_ListAppendElement(interp, resultObj, list);
-		Jim_ListAppendElement(interp, resultObj, state.schemaObj);
-		Jim_SetResult(interp, resultObj);
-		Jim_DecrRefCount(interp, state.schemaObj);
 	}
 	else {
-		Jim_SetResult(interp, list);
+		if (state.schemaObj) {
+			Jim_DecrRefCount(interp, state.schemaObj);
+		}
+		Jim_DecrRefCount(interp, list);
 	}
-	Jim_DecrRefCount(interp, list);
 
 done:
 	Jim_DecrRefCount(interp, state.nullObj);
 
-	return ret;
+	return state.retcode;
 }
 
 int
diff --git a/tests/json.test b/tests/json.test
index 09c002c..ce6238c 100644
--- a/tests/json.test
+++ b/tests/json.test
@@ -135,6 +135,19 @@ test json-4.3 {source info preserved} -body {
 	info source [dict get [lindex [dict get [json::decode $json] payload timeline] 0] comment]
 } -result {data.json 17}
 
+# This test shows that the simple single-pass subst didn't work
+test json-5.1 {decode escapes} -body {
+	json::decode {{ "backslash":"This is a backslash: \\" }}
+} -result backslash\ This\\\ is\\\ a\\\ backslash:\\\ \\\\
+
+test json-5.2 {decode invalid dictionary} -body {
+    json::decode -schema {{ "key" }}
+} -returnCodes error -result {invalid JSON string}
+
+test json-5.3 {decode invalid nested dictionary} -body {
+    json::decode -schema {{"key":{ "subkey" }}}
+} -returnCodes error -result {invalid JSON string}
+
 unset -nocomplain json
 
 test json-encode-1.1 {String with backslashes}  {
author	Steve Bennett <steveb@workware.net.au>	2025-12-29 13:11:30 +1000
committer	Steve Bennett <steveb@workware.net.au>	2025-12-30 04:59:08 +1000
commit	7f688acf949f88e90a83f7e8049d65a98b6a27a0 (patch)
tree	569a167ac752daf28eb181675a6a6abaf64fb400
parent	912c167ed6dc5c2cb30bd57e20f10954503438e2 (diff)
download	jimtcl-7f688acf949f88e90a83f7e8049d65a98b6a27a0.tar.gz jimtcl-7f688acf949f88e90a83f7e8049d65a98b6a27a0.tar.bz2 jimtcl-7f688acf949f88e90a83f7e8049d65a98b6a27a0.zip