aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorPetri Lehtinen <petri@digip.org>2009-12-05 22:55:30 +0200
committerPetri Lehtinen <petri@digip.org>2009-12-05 22:55:30 +0200
commit50031440a3b7ab2623e9468bd20e837250250cd9 (patch)
tree063ac18e20b93b3a96d786ea8e6bd761a2fb978d /src
parentd67aeb9739bf3e963ceaa8b622d20cd87a0b65fe (diff)
downloadjansson-50031440a3b7ab2623e9468bd20e837250250cd9.zip
jansson-50031440a3b7ab2623e9468bd20e837250250cd9.tar.gz
jansson-50031440a3b7ab2623e9468bd20e837250250cd9.tar.bz2
Implement JSON_ENSURE_ASCII encoding flag
With this flag, all Unicode characters outside the ASCII range are escaped.
Diffstat (limited to 'src')
-rw-r--r--src/dump.c67
-rw-r--r--src/jansson.h5
-rw-r--r--src/load.c2
-rw-r--r--src/utf.c33
-rw-r--r--src/utf.h3
5 files changed, 88 insertions, 22 deletions
diff --git a/src/dump.c b/src/dump.c
index 8d2a82b..dc3fcbc 100644
--- a/src/dump.c
+++ b/src/dump.c
@@ -14,6 +14,7 @@
#include <jansson.h>
#include "jansson_private.h"
#include "strbuffer.h"
+#include "utf.h"
#define MAX_INTEGER_STR_LENGTH 100
#define MAX_REAL_STR_LENGTH 100
@@ -65,34 +66,49 @@ static int dump_indent(unsigned long flags, int depth, int space, dump_func dump
return 0;
}
-static int dump_string(const char *str, dump_func dump, void *data)
+static int dump_string(const char *str, int ascii, dump_func dump, void *data)
{
- const char *end;
+ const char *pos, *end;
+ int32_t codepoint;
if(dump("\"", 1, data))
return -1;
- end = str;
+ end = pos = str;
while(1)
{
const char *text;
- char seq[7];
+ char seq[13];
int length;
- while(*end && *end != '\\' && *end != '"' && (unsigned char)*end > 0x1F)
- end++;
+ while(*end)
+ {
+ end = utf8_iterate(pos, &codepoint);
+ if(!end)
+ return -1;
- if(end != str) {
- if(dump(str, end - str, data))
+ /* mandatory escape or control char */
+ if(codepoint == '\\' || codepoint == '"' || codepoint < 0x20)
+ break;
+
+ /* non-ASCII */
+ if(ascii && codepoint > 0x7F)
+ break;
+
+ pos = end;
+ }
+
+ if(pos != str) {
+ if(dump(str, pos - str, data))
return -1;
}
- if(!*end)
+ if(end == pos)
break;
/* handle \, ", and control codes */
length = 2;
- switch(*end)
+ switch(codepoint)
{
case '\\': text = "\\\\"; break;
case '\"': text = "\\\""; break;
@@ -103,9 +119,27 @@ static int dump_string(const char *str, dump_func dump, void *data)
case '\t': text = "\\t"; break;
default:
{
- sprintf(seq, "\\u00%02x", *end);
+ /* codepoint is in BMP */
+ if(codepoint < 0x10000)
+ {
+ sprintf(seq, "\\u%04x", codepoint);
+ length = 6;
+ }
+
+ /* not in BMP -> construct a UTF-16 surrogate pair */
+ else
+ {
+ int32_t first, last;
+
+ codepoint -= 0x10000;
+ first = 0xD800 | ((codepoint & 0xffc00) >> 10);
+ last = 0xDC00 | (codepoint & 0x003ff);
+
+ sprintf(seq, "\\u%04x\\u%04x", first, last);
+ length = 12;
+ }
+
text = seq;
- length = 6;
break;
}
}
@@ -113,8 +147,7 @@ static int dump_string(const char *str, dump_func dump, void *data)
if(dump(text, length, data))
return -1;
- end++;
- str = end;
+ str = pos = end;
}
return dump("\"", 1, data);
@@ -123,6 +156,8 @@ static int dump_string(const char *str, dump_func dump, void *data)
static int do_dump(const json_t *json, unsigned long flags, int depth,
dump_func dump, void *data)
{
+ int ascii = flags & JSON_ENSURE_ASCII ? 1 : 0;
+
switch(json_typeof(json)) {
case JSON_NULL:
return dump("null", 4, data);
@@ -158,7 +193,7 @@ static int do_dump(const json_t *json, unsigned long flags, int depth,
}
case JSON_STRING:
- return dump_string(json_string_value(json), dump, data);
+ return dump_string(json_string_value(json), ascii, dump, data);
case JSON_ARRAY:
{
@@ -238,7 +273,7 @@ static int do_dump(const json_t *json, unsigned long flags, int depth,
{
void *next = json_object_iter_next((json_t *)json, iter);
- dump_string(json_object_iter_key(iter), dump, data);
+ dump_string(json_object_iter_key(iter), ascii, dump, data);
if(dump(separator, separator_length, data) ||
do_dump(json_object_iter_value(iter), flags, depth + 1,
dump, data))
diff --git a/src/jansson.h b/src/jansson.h
index c8a5a90..d59fe10 100644
--- a/src/jansson.h
+++ b/src/jansson.h
@@ -141,8 +141,9 @@ json_t *json_loads(const char *input, json_error_t *error);
json_t *json_loadf(FILE *input, json_error_t *error);
json_t *json_load_file(const char *path, json_error_t *error);
-#define JSON_INDENT(n) (n & 0xFF)
-#define JSON_COMPACT 0x100
+#define JSON_INDENT(n) (n & 0xFF)
+#define JSON_COMPACT 0x100
+#define JSON_ENSURE_ASCII 0x200
char *json_dumps(const json_t *json, unsigned long flags);
int json_dumpf(const json_t *json, FILE *output, unsigned long flags);
diff --git a/src/load.c b/src/load.c
index 32d6500..278f35e 100644
--- a/src/load.c
+++ b/src/load.c
@@ -149,7 +149,7 @@ static char stream_get(stream_t *stream, json_error_t *error)
for(i = 1; i < count; i++)
stream->buffer[i] = stream->get(stream->data);
- if(!utf8_check_full(stream->buffer, count))
+ if(!utf8_check_full(stream->buffer, count, NULL))
goto out;
stream->stream_pos += count;
diff --git a/src/utf.c b/src/utf.c
index 2efcb68..dda80f0 100644
--- a/src/utf.c
+++ b/src/utf.c
@@ -80,7 +80,7 @@ int utf8_check_first(char byte)
}
}
-int utf8_check_full(const char *buffer, int size)
+int utf8_check_full(const char *buffer, int size, int32_t *codepoint)
{
int i;
int32_t value = 0;
@@ -130,9 +130,38 @@ int utf8_check_full(const char *buffer, int size)
return 0;
}
+ if(codepoint)
+ *codepoint = value;
+
return 1;
}
+const char *utf8_iterate(const char *buffer, int32_t *codepoint)
+{
+ int count;
+ int32_t value;
+
+ if(!*buffer)
+ return buffer;
+
+ count = utf8_check_first(buffer[0]);
+ if(count <= 0)
+ return NULL;
+
+ if(count == 1)
+ value = (unsigned char)buffer[0];
+ else
+ {
+ if(!utf8_check_full(buffer, count, &value))
+ return NULL;
+ }
+
+ if(codepoint)
+ *codepoint = value;
+
+ return buffer + count;
+}
+
int utf8_check_string(const char *string, int length)
{
int i;
@@ -150,7 +179,7 @@ int utf8_check_string(const char *string, int length)
if(i + count > length)
return 0;
- if(!utf8_check_full(&string[i], count))
+ if(!utf8_check_full(&string[i], count, NULL))
return 0;
i += count - 1;
diff --git a/src/utf.h b/src/utf.h
index 75d7b6e..03fba69 100644
--- a/src/utf.h
+++ b/src/utf.h
@@ -11,7 +11,8 @@
int utf8_encode(int codepoint, char *buffer, int *size);
int utf8_check_first(char byte);
-int utf8_check_full(const char *buffer, int size);
+int utf8_check_full(const char *buffer, int size, int32_t *codepoint);
+const char *utf8_iterate(const char *buffer, int32_t *codepoint);
int utf8_check_string(const char *string, int length);