5 files changed, 205 insertions, 52 deletions
diff --git a/src/dump.c b/src/dump.c
index bd12a7b..ba70f8d 100644
--- a/src/dump.c
+++ b/src/dump.c
@@ -9,10 +9,13 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <stdint.h>
+#include <assert.h>
 
 #include <jansson.h>
 #include "jansson_private.h"
 #include "strbuffer.h"
+#include "utf.h"
 
 #define MAX_INTEGER_STR_LENGTH  100
 #define MAX_REAL_STR_LENGTH     100
@@ -42,7 +45,7 @@ static int dump_to_file(const char *buffer, int size, void *data)
 /* 256 spaces (the maximum indentation size) */
 static char whitespace[] = "                                                                                                                                                                                                                                                                ";
 
-static int dump_indent(unsigned long flags, int depth, dump_func dump, void *data)
+static int dump_indent(unsigned long flags, int depth, int space, dump_func dump, void *data)
 {
     if(JSON_INDENT(flags) > 0)
     {
@@ -57,37 +60,56 @@ static int dump_indent(unsigned long flags, int depth, dump_func dump, void *dat
                 return -1;
         }
     }
+    else if(space && !(flags & JSON_COMPACT))
+    {
+        return dump(" ", 1, data);
+    }
     return 0;
 }
 
-static int dump_string(const char *str, dump_func dump, void *data)
+static int dump_string(const char *str, int ascii, dump_func dump, void *data)
 {
-    const char *end;
+    const char *pos, *end;
+    int32_t codepoint;
 
     if(dump("\"", 1, data))
         return -1;
 
-    end = str;
+    end = pos = str;
     while(1)
     {
         const char *text;
-        char seq[7];
+        char seq[13];
         int length;
 
-        while(*end && *end != '\\' && *end != '"' && (unsigned char)*end > 0x1F)
-            end++;
+        while(*end)
+        {
+            end = utf8_iterate(pos, &codepoint);
+            if(!end)
+                return -1;
+
+            /* mandatory escape or control char */
+            if(codepoint == '\\' || codepoint == '"' || codepoint < 0x20)
+                break;
+
+            /* non-ASCII */
+            if(ascii && codepoint > 0x7F)
+                break;
+
+            pos = end;
+        }
 
-        if(end != str) {
-            if(dump(str, end - str, data))
+        if(pos != str) {
+            if(dump(str, pos - str, data))
                 return -1;
         }
 
-        if(!*end)
+        if(end == pos)
             break;
 
         /* handle \, ", and control codes */
         length = 2;
-        switch(*end)
+        switch(codepoint)
         {
             case '\\': text = "\\\\"; break;
             case '\"': text = "\\\""; break;
@@ -98,9 +120,27 @@ static int dump_string(const char *str, dump_func dump, void *data)
             case '\t': text = "\\t"; break;
             default:
             {
-                sprintf(seq, "\\u00%02x", *end);
+                /* codepoint is in BMP */
+                if(codepoint < 0x10000)
+                {
+                    sprintf(seq, "\\u%04x", codepoint);
+                    length = 6;
+                }
+
+                /* not in BMP -> construct a UTF-16 surrogate pair */
+                else
+                {
+                    int32_t first, last;
+
+                    codepoint -= 0x10000;
+                    first = 0xD800 | ((codepoint & 0xffc00) >> 10);
+                    last = 0xDC00 | (codepoint & 0x003ff);
+
+                    sprintf(seq, "\\u%04x\\u%04x", first, last);
+                    length = 12;
+                }
+
                 text = seq;
-                length = 6;
                 break;
             }
         }
@@ -108,16 +148,22 @@ static int dump_string(const char *str, dump_func dump, void *data)
         if(dump(text, length, data))
             return -1;
 
-        end++;
-        str = end;
+        str = pos = end;
     }
 
     return dump("\"", 1, data);
 }
 
+static int object_key_cmp(const void *key1, const void *key2)
+{
+    return strcmp(*(const char **)key1, *(const char **)key2);
+}
+
 static int do_dump(const json_t *json, unsigned long flags, int depth,
                    dump_func dump, void *data)
 {
+    int ascii = flags & JSON_ENSURE_ASCII ? 1 : 0;
+
     switch(json_typeof(json)) {
         case JSON_NULL:
             return dump("null", 4, data);
@@ -168,7 +214,7 @@ static int do_dump(const json_t *json, unsigned long flags, int depth,
         }
 
         case JSON_STRING:
-            return dump_string(json_string_value(json), dump, data);
+            return dump_string(json_string_value(json), ascii, dump, data);
 
         case JSON_ARRAY:
         {
@@ -188,7 +234,7 @@ static int do_dump(const json_t *json, unsigned long flags, int depth,
                 return -1;
             if(n == 0)
                 return dump("]", 1, data);
-            if(dump_indent(flags, depth + 1, dump, data))
+            if(dump_indent(flags, depth + 1, 0, dump, data))
                 return -1;
 
             for(i = 0; i < n; ++i) {
@@ -199,12 +245,12 @@ static int do_dump(const json_t *json, unsigned long flags, int depth,
                 if(i < n - 1)
                 {
                     if(dump(",", 1, data) ||
-                       dump_indent(flags, depth + 1, dump, data))
+                       dump_indent(flags, depth + 1, 1, dump, data))
                         return -1;
                 }
                 else
                 {
-                    if(dump_indent(flags, depth, dump, data))
+                    if(dump_indent(flags, depth, 0, dump, data))
                         return -1;
                 }
             }
@@ -217,6 +263,17 @@ static int do_dump(const json_t *json, unsigned long flags, int depth,
         {
             json_object_t *object;
             void *iter;
+            const char *separator;
+            int separator_length;
+
+            if(flags & JSON_COMPACT) {
+                separator = ":";
+                separator_length = 1;
+            }
+            else {
+                separator = ": ";
+                separator_length = 2;
+            }
 
             /* detect circular references */
             object = json_to_object(json);
@@ -230,32 +287,99 @@ static int do_dump(const json_t *json, unsigned long flags, int depth,
                 return -1;
             if(!iter)
                 return dump("}", 1, data);
-            if(dump_indent(flags, depth + 1, dump, data))
+            if(dump_indent(flags, depth + 1, 0, dump, data))
                 return -1;
 
-            while(iter)
+            if(flags & JSON_SORT_KEYS)
             {
-                void *next = json_object_iter_next((json_t *)json, iter);
+                /* Sort keys */
 
-                dump_string(json_object_iter_key(iter), dump, data);
-                if(dump(": ", 2, data) ||
-                   do_dump(json_object_iter_value(iter), flags, depth + 1,
-                           dump, data))
+                const char **keys;
+                unsigned int size;
+                unsigned int i;
+
+                size = json_object_size(json);
+                keys = malloc(size * sizeof(const char *));
+                if(!keys)
                     return -1;
 
-                if(next)
+                i = 0;
+                while(iter)
                 {
-                    if(dump(",", 1, data) ||
-                       dump_indent(flags, depth + 1, dump, data))
-                        return -1;
+                    keys[i] = json_object_iter_key(iter);
+                    iter = json_object_iter_next((json_t *)json, iter);
+                    i++;
                 }
-                else
+                assert(i == size);
+
+                qsort(keys, size, sizeof(const char *), object_key_cmp);
+
+                for(i = 0; i < size; i++)
                 {
-                    if(dump_indent(flags, depth, dump, data))
+                    const char *key;
+                    json_t *value;
+
+                    key = keys[i];
+                    value = json_object_get(json, key);
+                    assert(value);
+
+                    dump_string(key, ascii, dump, data);
+                    if(dump(separator, separator_length, data) ||
+                       do_dump(value, flags, depth + 1, dump, data))
+                    {
+                        free(keys);
                         return -1;
+                    }
+
+                    if(i < size - 1)
+                    {
+                        if(dump(",", 1, data) ||
+                           dump_indent(flags, depth + 1, 1, dump, data))
+                        {
+                            free(keys);
+                            return -1;
+                        }
+                    }
+                    else
+                    {
+                        if(dump_indent(flags, depth, 0, dump, data))
+                        {
+                            free(keys);
+                            return -1;
+                        }
+                    }
                 }
 
-                iter = next;
+                free(keys);
+            }
+            else
+            {
+                /* Don't sort keys */
+
+                while(iter)
+                {
+                    void *next = json_object_iter_next((json_t *)json, iter);
+
+                    dump_string(json_object_iter_key(iter), ascii, dump, data);
+                    if(dump(separator, separator_length, data) ||
+                       do_dump(json_object_iter_value(iter), flags, depth + 1,
+                               dump, data))
+                        return -1;
+
+                    if(next)
+                    {
+                        if(dump(",", 1, data) ||
+                           dump_indent(flags, depth + 1, 1, dump, data))
+                            return -1;
+                    }
+                    else
+                    {
+                        if(dump_indent(flags, depth, 0, dump, data))
+                            return -1;
+                    }
+
+                    iter = next;
+                }
             }
 
             object->visited = 0;
@@ -285,11 +409,6 @@ char *json_dumps(const json_t *json, unsigned long flags)
         return NULL;
     }
 
-    if(dump_to_strbuffer("\n", 1, (void *)&strbuff)) {
-        strbuffer_close(&strbuff);
-        return NULL;
-    }
-
     result = strdup(strbuffer_value(&strbuff));
     strbuffer_close(&strbuff);
 
@@ -301,9 +420,7 @@ int json_dumpf(const json_t *json, FILE *output, unsigned long flags)
     if(!json_is_array(json) && !json_is_object(json))
         return -1;
 
-    if(do_dump(json, flags, 0, dump_to_file, (void *)output))
-        return -1;
-    return dump_to_file("\n", 1, (void *)output);
+    return do_dump(json, flags, 0, dump_to_file, (void *)output);
 }
 
 int json_dump_file(const json_t *json, const char *path, unsigned long flags)
diff --git a/src/jansson.h b/src/jansson.h
index ac6736f..607dfb5 100644
--- a/src/jansson.h
+++ b/src/jansson.h
@@ -141,7 +141,10 @@ json_t *json_loads(const char *input, json_error_t *error);
 json_t *json_loadf(FILE *input, json_error_t *error);
 json_t *json_load_file(const char *path, json_error_t *error);
 
-#define JSON_INDENT(n)   (n & 0xFF)
+#define JSON_INDENT(n)      (n & 0xFF)
+#define JSON_COMPACT        0x100
+#define JSON_ENSURE_ASCII   0x200
+#define JSON_SORT_KEYS      0x400
 
 char *json_dumps(const json_t *json, unsigned long flags);
 int json_dumpf(const json_t *json, FILE *output, unsigned long flags);
diff --git a/src/load.c b/src/load.c
index 005e03c..4d08139 100644
--- a/src/load.c
+++ b/src/load.c
@@ -14,6 +14,7 @@
 #include <string.h>
 #include <stdarg.h>
 #include <assert.h>
+#include <stdint.h>
 
 #include <jansson.h>
 #include "jansson_private.h"
@@ -148,7 +149,7 @@ static char stream_get(stream_t *stream, json_error_t *error)
             for(i = 1; i < count; i++)
                 stream->buffer[i] = stream->get(stream->data);
 
-            if(!utf8_check_full(stream->buffer, count))
+            if(!utf8_check_full(stream->buffer, count, NULL))
                 goto out;
 
             stream->stream_pos += count;
@@ -221,10 +222,10 @@ static void lex_save_cached(lex_t *lex)
 }
 
 /* assumes that str points to 'u' plus at least 4 valid hex digits */
-static int decode_unicode_escape(const char *str)
+static int32_t decode_unicode_escape(const char *str)
 {
     int i;
-    int value = 0;
+    int32_t value = 0;
 
     assert(str[0] == 'u');
 
@@ -325,7 +326,7 @@ static void lex_scan_string(lex_t *lex, json_error_t *error)
             if(*p == 'u') {
                 char buffer[4];
                 int length;
-                int value;
+                int32_t value;
 
                 value = decode_unicode_escape(p);
                 p += 5;
@@ -333,7 +334,7 @@ static void lex_scan_string(lex_t *lex, json_error_t *error)
                 if(0xD800 <= value && value <= 0xDBFF) {
                     /* surrogate pair */
                     if(*p == '\\' && *(p + 1) == 'u') {
-                        int value2 = decode_unicode_escape(++p);
+                        int32_t value2 = decode_unicode_escape(++p);
                         p += 5;
 
                         if(0xDC00 <= value2 && value2 <= 0xDFFF) {
diff --git a/src/utf.c b/src/utf.c
index cf2e8e4..dda80f0 100644
--- a/src/utf.c
+++ b/src/utf.c
@@ -6,8 +6,9 @@
  */
 
 #include <string.h>
+#include <stdint.h>
 
-int utf8_encode(int codepoint, char *buffer, int *size)
+int utf8_encode(int32_t codepoint, char *buffer, int *size)
 {
     if(codepoint < 0)
         return -1;
@@ -79,9 +80,10 @@ int utf8_check_first(char byte)
     }
 }
 
-int utf8_check_full(const char *buffer, int size)
+int utf8_check_full(const char *buffer, int size, int32_t *codepoint)
 {
-    int i, value = 0;
+    int i;
+    int32_t value = 0;
     unsigned char u = (unsigned char)buffer[0];
 
     if(size == 2)
@@ -128,9 +130,38 @@ int utf8_check_full(const char *buffer, int size)
         return 0;
     }
 
+    if(codepoint)
+        *codepoint = value;
+
     return 1;
 }
 
+const char *utf8_iterate(const char *buffer, int32_t *codepoint)
+{
+    int count;
+    int32_t value;
+
+    if(!*buffer)
+        return buffer;
+
+    count = utf8_check_first(buffer[0]);
+    if(count <= 0)
+        return NULL;
+
+    if(count == 1)
+        value = (unsigned char)buffer[0];
+    else
+    {
+        if(!utf8_check_full(buffer, count, &value))
+            return NULL;
+    }
+
+    if(codepoint)
+        *codepoint = value;
+
+    return buffer + count;
+}
+
 int utf8_check_string(const char *string, int length)
 {
     int i;
@@ -148,7 +179,7 @@ int utf8_check_string(const char *string, int length)
             if(i + count > length)
                 return 0;
 
-            if(!utf8_check_full(&string[i], count))
+            if(!utf8_check_full(&string[i], count, NULL))
                 return 0;
 
             i += count - 1;
diff --git a/src/utf.h b/src/utf.h
index 75d7b6e..03fba69 100644
--- a/src/utf.h
+++ b/src/utf.h
@@ -11,7 +11,8 @@
 int utf8_encode(int codepoint, char *buffer, int *size);
 
 int utf8_check_first(char byte);
-int utf8_check_full(const char *buffer, int size);
+int utf8_check_full(const char *buffer, int size, int32_t *codepoint);
+const char *utf8_iterate(const char *buffer, int32_t *codepoint);
 
 int utf8_check_string(const char *string, int length);