json: Add json encoder/decoder

Using the jsmn library for decoding. Based on the original implementation by Svyatoslav Mishyn <juef@openmailbox.org> Signed-off-by: Steve Bennett <steveb@workware.net.au>
author: Steve Bennett <steveb@workware.net.au> 2019-11-04 08:41:32 +1000
committer: Steve Bennett <steveb@workware.net.au> 2019-11-09 19:59:15 +1000
commit: dd064e670daf910fa50e138ec0c36822405b60f5 (patch)
tree: c512150b006c3e1dbc7f4575e2d744133caa0b59 /jim-json.c
parent: 529c84b4ee31f51925a9ac14247a94a428592c7d (diff)
download: jimtcl-dd064e670daf910fa50e138ec0c36822405b60f5.zip
jimtcl-dd064e670daf910fa50e138ec0c36822405b60f5.tar.gz
jimtcl-dd064e670daf910fa50e138ec0c36822405b60f5.tar.bz2
1 files changed, 414 insertions, 0 deletions
diff --git a/jim-json.c b/jim-json.c
new file mode 100644
index 0000000..c75ac74
--- /dev/null
+++ b/jim-json.c
@@ -0,0 +1,414 @@
+/*
+ * Copyright (c) 2015 - 2016 Svyatoslav Mishyn <juef@openmailbox.org>
+ * Copyright (c) 2019 Steve Bennett <steveb@workware.net.au>
+ *
+ * Permission to use, copy, modify, and/or distribute this software for
+ * any purpose with or without fee is hereby granted, provided that the
+ * above copyright notice and this permission notice appear in all
+ * copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
+ * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
+ * AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
+ * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
+ * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+ * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <assert.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <jim.h>
+
+#include "jsmn/jsmn.h"
+
+/* These are all the schema types we support */
+typedef enum {
+	JSON_BOOL,
+	JSON_OBJ,
+	JSON_LIST,
+	JSON_MIXED,
+	JSON_STR,
+	JSON_NUM,
+	JSON_MAX_TYPE,
+} json_schema_t;
+
+struct json_state {
+	Jim_Obj *nullObj;
+	const char *json;
+	jsmntok_t *tok;
+	int need_subst;
+	/* The following are used for -schema */
+	int enable_schema;
+	Jim_Obj *schemaObj;
+	Jim_Obj *schemaTypeObj[JSON_MAX_TYPE];
+};
+
+static void json_decode_dump_value(Jim_Interp *interp, struct json_state *state, Jim_Obj *list);
+
+/**
+ * Start a new subschema. Returns the previous schemaObj.
+ * Does nothing and returns NULL if -schema is not enabled.
+ */
+static Jim_Obj *json_decode_schema_push(Jim_Interp *interp, struct json_state *state)
+{
+	Jim_Obj *prevSchemaObj = NULL;
+	if (state->enable_schema) {
+		prevSchemaObj = state->schemaObj;
+		state->schemaObj = Jim_NewListObj(interp, NULL, 0);
+		Jim_IncrRefCount(state->schemaObj);
+	}
+	return prevSchemaObj;
+}
+
+/**
+ * Combines the current schema with the previous schema, prevSchemaObj
+ * returned by json_decode_schema_push().
+ * Does nothing if -schema is not enabled.
+ */
+static void json_decode_schema_pop(Jim_Interp *interp, struct json_state *state, Jim_Obj *prevSchemaObj)
+{
+	if (state->enable_schema) {
+		Jim_ListAppendElement(interp, prevSchemaObj, state->schemaObj);
+		Jim_DecrRefCount(interp, state->schemaObj);
+		state->schemaObj = prevSchemaObj;
+	}
+}
+
+/**
+ * Appends the schema type to state->schemaObj based on 'type'
+ */
+static void json_decode_add_schema_type(Jim_Interp *interp, struct json_state *state, json_schema_t type)
+{
+	static const char * const schema_names[] = {
+		"bool",
+		"obj",
+		"list",
+		"mixed",
+		"str",
+		"num",
+	};
+	assert(type >= 0 && type < JSON_MAX_TYPE);
+	/* Share multiple instances of the same type */
+	if (state->schemaTypeObj[type] == NULL) {
+		state->schemaTypeObj[type] = Jim_NewStringObj(interp, schema_names[type], -1);
+	}
+	Jim_ListAppendElement(interp, state->schemaObj, state->schemaTypeObj[type]);
+}
+
+/**
+ * Returns the schema type for the given token.
+ * There is a one-to-one correspondence except for JSMN_PRIMITIVE
+ * which will return JSON_BOOL for true, false and JSON_NUM otherise.
+ */
+static json_schema_t json_decode_get_type(const jsmntok_t *tok, const char *json)
+{
+	switch (tok->type) {
+		case JSMN_PRIMITIVE:
+			assert(json);
+			if (json[tok->start] == 't' || json[tok->start] == 'f') {
+				return JSON_BOOL;
+			}
+			return JSON_NUM;
+		case JSMN_OBJECT:
+			return JSON_OBJ;
+		case JSMN_ARRAY:
+			/* Return mixed by default - need other checks to select list instead */
+			return JSON_MIXED;
+		case JSMN_STRING:
+		default:
+			return JSON_STR;
+	}
+}
+
+/**
+ * Returns the current object (state->tok) as a Tcl list.
+ *
+ * state->tok is incremented to just past the object that was dumped.
+ */
+static Jim_Obj *
+json_decode_dump_container(Jim_Interp *interp, struct json_state *state)
+{
+	int i;
+	Jim_Obj *list = Jim_NewListObj(interp, NULL, 0);
+	int size = state->tok->size;
+	int type = state->tok->type;
+	json_schema_t container_type = JSON_OBJ; /* JSON_LIST, JSON_MIXED or JSON_OBJ */
+
+	if (state->schemaObj) {
+		json_schema_t list_type;
+		/* Figure out the type to use for the container */
+		if (type == JSMN_ARRAY) {
+			/* If every element of the array is of the same primitive schema type (str, bool or num),
+			 * we can use "list", otherwise need to use "mixed"
+			 */
+			container_type = JSON_LIST;
+			if (size) {
+				list_type = json_decode_get_type(&state->tok[1], state->json);
+
+				if (list_type == JSON_BOOL || list_type == JSON_STR || list_type == JSON_NUM) {
+					for (i = 2; i <= size; i++) {
+						if (json_decode_get_type(state->tok + i, state->json) != list_type) {
+							/* Can't use list */
+							container_type = JSON_MIXED;
+							break;
+						}
+					}
+				}
+			}
+		}
+		json_decode_add_schema_type(interp, state, container_type);
+		if (container_type == JSON_LIST && size) {
+			json_decode_add_schema_type(interp, state, list_type);
+		}
+	}
+
+	state->tok++;
+
+	for (i = 0; i < size; i++) {
+		if (type == JSMN_OBJECT) {
+			/* Dump the object key */
+			if (state->enable_schema) {
+				const char *p = state->json + state->tok->start;
+				int len = state->tok->end - state->tok->start;
+				Jim_ListAppendElement(interp, state->schemaObj, Jim_NewStringObj(interp, p, len));
+			}
+			json_decode_dump_value(interp, state, list);
+		}
+
+		if (state->schemaObj && container_type != JSON_LIST) {
+			if (state->tok->type == JSMN_STRING || state->tok->type == JSMN_PRIMITIVE) {
+				json_decode_add_schema_type(interp, state, json_decode_get_type(state->tok, state->json));
+			}
+		}
+
+		/* Dump the array or object value */
+		json_decode_dump_value(interp, state, list);
+	}
+
+	return list;
+}
+
+/**
+ * Appends the value at state->tok to 'list' and increments state->tok to just
+ * past that token.
+ *
+ * Also appends to the schema if state->enable_schema is set.
+ */
+static void
+json_decode_dump_value(Jim_Interp *interp, struct json_state *state, Jim_Obj *list)
+{
+	const jsmntok_t *t = state->tok;
+
+	if (t->type == JSMN_STRING || t->type == JSMN_PRIMITIVE) {
+		Jim_Obj	*elem;
+		int len = t->end - t->start;
+		const char *p = state->json + t->start;
+		if (t->type == JSMN_STRING) {
+			/* Do we need to process backslash escapes? */
+			if (state->need_subst == 0 && memchr(p, '\\', len) != NULL) {
+				state->need_subst = 1;
+			}
+			elem = Jim_NewStringObj(interp, p, len);
+		} else if (p[0] == 'n') {	/* null */
+			elem = state->nullObj;
+		} else if (p[0] == 'I') {
+			elem = Jim_NewStringObj(interp, "Inf", -1);
+		} else if (p[0] == '-' && p[1] == 'I') {
+			elem = Jim_NewStringObj(interp, "-Inf", -1);
+		} else {		/* number, true or false */
+			elem = Jim_NewStringObj(interp, p, len);
+		}
+
+		Jim_ListAppendElement(interp, list, elem);
+		state->tok++;
+	}
+	else {
+		Jim_Obj *prevSchemaObj = json_decode_schema_push(interp, state);
+		Jim_Obj *newList = json_decode_dump_container(interp, state);
+		Jim_ListAppendElement(interp, list, newList);
+		json_decode_schema_pop(interp, state, prevSchemaObj);
+	}
+}
+
+/* Parses the options ?-null string? ?-schema? *state.
+ * Any options not present are not set.
+ *
+ * Returns JIM_OK or JIM_ERR and sets an error result.
+ */
+static int parse_json_decode_options(Jim_Interp *interp, int argc, Jim_Obj *const argv[], struct json_state *state)
+{
+	static const char * const options[] = { "-null", "-schema", NULL };
+	enum { OPT_NULL, OPT_SCHEMA, };
+	int i;
+
+	for (i = 1; i < argc - 1; i++) {
+		int option;
+		if (Jim_GetEnum(interp, argv[i], options, &option, NULL, JIM_ERRMSG | JIM_ENUM_ABBREV) != JIM_OK) {
+			return JIM_ERR;
+		}
+		switch (option) {
+			case OPT_NULL:
+				i++;
+				Jim_IncrRefCount(argv[i]);
+				Jim_DecrRefCount(interp, state->nullObj);
+				state->nullObj = argv[i];
+				break;
+
+			case OPT_SCHEMA:
+				state->enable_schema = 1;
+				break;
+		}
+	}
+
+	if (i != argc - 1) {
+		Jim_WrongNumArgs(interp, 1, argv,
+			"?-null nullvalue? ?-schema? json");
+		return JIM_ERR;
+	}
+
+	return JIM_OK;
+}
+
+/**
+ * Use jsmn to tokenise the JSON string 'json' of length 'len'
+ *
+ * Returns an allocated array of tokens or NULL on error (and sets an error result)
+ */
+static jsmntok_t *
+json_decode_tokenize(Jim_Interp *interp, const char *json, size_t len)
+{
+	jsmntok_t	*t;
+	jsmn_parser	 parser;
+	int n;
+
+	/* Parse once just to find the number of tokens */
+	jsmn_init(&parser);
+	n = jsmn_parse(&parser, json, len, NULL, 0);
+
+error:
+	switch (n) {
+		case JSMN_ERROR_INVAL:
+			Jim_SetResultString(interp, "invalid JSON string", -1);
+			return NULL;
+
+		case JSMN_ERROR_PART:
+			Jim_SetResultString(interp, "truncated JSON string", -1);
+			return NULL;
+
+		case 0:
+			Jim_SetResultString(interp, "root element must be an object or an array", -1);
+			return NULL;
+
+		default:
+			break;
+	}
+
+	if (n < 0) {
+		return NULL;
+	}
+
+	t = Jim_Alloc(n * sizeof(*t));
+
+	jsmn_init(&parser);
+	n = jsmn_parse(&parser, json, len, t, n);
+	if (t->type != JSMN_OBJECT && t->type != JSMN_ARRAY) {
+		n = 0;
+	}
+	if (n <= 0) {
+		Jim_Free(t);
+		goto error;
+	}
+
+	return t;
+}
+
+/**
+ * json::decode returns the decoded data structure.
+ *
+ * If -schema is specified, returns a list of {data schema}
+ */
+static int
+json_decode(Jim_Interp *interp, int argc, Jim_Obj *const argv[])
+{
+	Jim_Obj *list;
+	jsmntok_t *tokens;
+	int len;
+	int ret = JIM_ERR;
+	struct json_state state;
+
+	memset(&state, 0, sizeof(state));
+
+	state.nullObj = Jim_NewStringObj(interp, "null", -1);
+	Jim_IncrRefCount(state.nullObj);
+
+	if (parse_json_decode_options(interp, argc, argv, &state) != JIM_OK) {
+		goto done;
+	}
+
+	state.json = Jim_GetString(argv[argc - 1], &len);
+
+	if (!len) {
+		Jim_SetResultString(interp, "empty JSON string", -1);
+		goto done;
+	}
+	if ((tokens = json_decode_tokenize(interp, state.json, len)) == NULL) {
+		goto done;
+	}
+	state.tok = tokens;
+	json_decode_schema_push(interp, &state);
+
+	list = json_decode_dump_container(interp, &state);
+	Jim_Free(tokens);
+	ret = JIM_OK;
+
+	/* Make sure the refcount doesn't go to 0 during Jim_SubstObj() */
+	Jim_IncrRefCount(list);
+
+	if (state.need_subst) {
+		/* Subsitute backslashes in the returned dictionary.
+		 * Need to be careful of refcounts.
+		 * Note that Jim_SubstObj() supports a few more escapes than
+		 * JSON requires, but should give the same result for all legal escapes.
+		 */
+		Jim_Obj *newList;
+		Jim_SubstObj(interp, list, &newList, JIM_SUBST_FLAG | JIM_SUBST_NOCMD | JIM_SUBST_NOVAR);
+		Jim_IncrRefCount(newList);
+		Jim_DecrRefCount(interp, list);
+		list = newList;
+	}
+
+	if (state.schemaObj) {
+		Jim_Obj *resultObj = Jim_NewListObj(interp, NULL, 0);
+		Jim_ListAppendElement(interp, resultObj, list);
+		Jim_ListAppendElement(interp, resultObj, state.schemaObj);
+		Jim_SetResult(interp, resultObj);
+		Jim_DecrRefCount(interp, state.schemaObj);
+	}
+	else {
+		Jim_SetResult(interp, list);
+	}
+	Jim_DecrRefCount(interp, list);
+
+done:
+	Jim_DecrRefCount(interp, state.nullObj);
+
+	return ret;
+}
+
+int
+Jim_jsonInit(Jim_Interp *interp)
+{
+	if (Jim_PackageProvide(interp, "json", "1.0", JIM_ERRMSG) != JIM_OK) {
+		return JIM_ERR;
+	}
+
+	Jim_CreateCommand(interp, "json::decode", json_decode, NULL, NULL);
+	/* Load the Tcl implementation of the json encoder if possible */
+	Jim_PackageRequire(interp, "jsonencode", 0);
+	return JIM_OK;
+}
author	Steve Bennett <steveb@workware.net.au>	2019-11-04 08:41:32 +1000
committer	Steve Bennett <steveb@workware.net.au>	2019-11-09 19:59:15 +1000
commit	dd064e670daf910fa50e138ec0c36822405b60f5 (patch)
tree	c512150b006c3e1dbc7f4575e2d744133caa0b59 /jim-json.c
parent	529c84b4ee31f51925a9ac14247a94a428592c7d (diff)
download	jimtcl-dd064e670daf910fa50e138ec0c36822405b60f5.zip jimtcl-dd064e670daf910fa50e138ec0c36822405b60f5.tar.gz jimtcl-dd064e670daf910fa50e138ec0c36822405b60f5.tar.bz2