core: dicts (and arrays) now preserve insertion order

Although the documentation has always stated that, like Tcl, insertion order of dictionaries was preserved, this has never been the case. Instead, dictionaries were implemented as simple hash tables that did not preserve order. Now, a new implementation of dictionaries preserves insertion order and has a number of other benefits. Instead of hashing keys and storing keys and values in the hash table, the keys and values are not stored in a separate table, exactly as lists are stored, with alternating key, value pairs. Iterating over the dictionary is exactly like iterating over a list, where the order is consistent. The hash table uses closed hashing rather than open hashing to avoid allocatation of hash entry structures. Instead a fixed (but expandable) hash table maps the key hash to the offset in the key/value table. This use of offsets means that if the key/value table grows, the offsets remain valid. Likewise, if the hash table needs to grow, the key, value table remains unchanged. In addition to the offset (which indexes to the value, and 0 means the hash table entry is unused), the original hash is stored in the hash table. This reduces the need for object comparisons on hash entry collisions. The collision resolution algorithm is the same as that used by Python: peturb >>= 5; idx = (5 * idx + 1 + peturb) & dict->sizemask; In order to reduce collisions, the hash table is expanded once it reaches half full. This is more conservative that Python where the table is expanded when it is two thirds full. Note that if a hash collision occurs and then the original entry that cased the hash collision is removed, we still need to continue iterating when searching for the new key. Don't stop at the now-empty slot. So these entries are marked with offset=-1 to indicate that they need to be skipped. In addition, the new faster hashing algorithm from Tcl 8.7 is used. This the hash for integers to be calculated efficiently without requiring them to be converted to string form first. This implementation is modelled largely on the Python dict implementation. Overall the performance should be an improvement over the current implementation, whilst preserving order. Dictionary creating and insertion should be faster as hash entries do not need to be allocated and resizing should be slightly faster. Entry lookup should be about the same, except may be faster for pure integer keys. Below are some indicative benchmarks. OLD NEW dict-create-1.1 Create empty dict 97.2ns . dict-create-1.2 Create small dict 440ns -27% dict-create-1.3 Create medium dict 1.54us -57% dict-create-1.4 Create large dict (int keys) 130us -80% dict-create-1.5 Create large dict (string keys) 143us -75% dict-set-1.1 Replace existing item 258ns -34% dict-set-1.2 Replace nonexistent item 365ns -49% dict-exists-1.1 Find existing item 55.7ns -5% dict-exists-1.2 Find nonexistent item 55.0ns -5% Signed-off-by: Steve Bennett <steveb@workware.net.au>
author: Steve Bennett <steveb@workware.net.au> 2020-06-06 15:06:34 +1000
committer: Steve Bennett <steveb@workware.net.au> 2020-06-25 09:01:02 +1000
commit: 99b7ce979f72087be74b039aa988a4f25f3b6d27 (patch)
tree: 6b23f609b5309e3eea97b49e0eaf729f423a1766 /jim.h
parent: 015569f090acda5702eafe6487b534fa76d354d5 (diff)
download: jimtcl-99b7ce979f72087be74b039aa988a4f25f3b6d27.zip
jimtcl-99b7ce979f72087be74b039aa988a4f25f3b6d27.tar.gz
jimtcl-99b7ce979f72087be74b039aa988a4f25f3b6d27.tar.bz2
1 files changed, 23 insertions, 3 deletions
diff --git a/jim.h b/jim.h
index ad66b16..a1e4c46 100644
--- a/jim.h
+++ b/jim.h
@@ -236,6 +236,8 @@ typedef struct Jim_HashTableIterator {
         (entry)->u.val = (_val_); \
 } while(0)
 
+#define Jim_SetHashIntVal(ht, entry, _val_) (entry)->u.intval = (_val_)
+
 #define Jim_FreeEntryKey(ht, entry) \
     if ((ht)->type->keyDestructor) \
         (ht)->type->keyDestructor((ht)->privdata, (entry)->key)
@@ -256,6 +258,7 @@ typedef struct Jim_HashTableIterator {
 
 #define Jim_GetHashEntryKey(he) ((he)->key)
 #define Jim_GetHashEntryVal(he) ((he)->u.val)
+#define Jim_GetHashEntryIntVal(he) ((he)->u.intval)
 #define Jim_GetHashTableCollisions(ht) ((ht)->collisions)
 #define Jim_GetHashTableSize(ht) ((ht)->size)
 #define Jim_GetHashTableUsed(ht) ((ht)->used)
@@ -318,6 +321,8 @@ typedef struct Jim_Obj {
             int len;        /* Length */
             int maxLen;        /* Allocated 'ele' length */
         } listValue;
+        /* dict object */
+        struct Jim_Dict *dictValue;
         /* String type */
         struct {
             int maxLength;
@@ -455,7 +460,22 @@ typedef int Jim_CmdProc(struct Jim_Interp *interp, int argc,
     Jim_Obj *const *argv);
 typedef void Jim_DelCmdProc(struct Jim_Interp *interp, void *privData);
 
-
+/* The dict structure. It uses the same approach as Python OrderedDict
+ * of storing a hash table of table offsets into a table containing keys and objects.
+ * This preserves order when adding and replacing elements.
+ */
+typedef struct Jim_Dict {
+    struct JimDictHashEntry {
+        int offset;
+        unsigned hash;
+    } *ht;		        /* Allocated hash table of size 'size' */
+    unsigned int size;          /* Size of the hash table (0 or power of two) */
+    unsigned int sizemask;      /* mask to apply to hash to index into offsets table */
+    unsigned int uniq;          /* unique value to add to hash generator */
+    Jim_Obj **table;            /* Table of alternating key, value elements */
+    int len;                    /* Number of used elements in table */
+    int maxLen;                 /* Allocated length of table */
+} Jim_Dict;
 
 /* A command is implemented in C if isproc is 0, otherwise
  * it is a Tcl procedure with the arglist and body represented by the
@@ -799,8 +819,8 @@ JIM_EXPORT int Jim_DictKeysVector (Jim_Interp *interp,
 JIM_EXPORT int Jim_SetDictKeysVector (Jim_Interp *interp,
         Jim_Obj *varNamePtr, Jim_Obj *const *keyv, int keyc,
         Jim_Obj *newObjPtr, int flags);
-JIM_EXPORT int Jim_DictPairs(Jim_Interp *interp,
-        Jim_Obj *dictPtr, Jim_Obj ***objPtrPtr, int *len);
+JIM_EXPORT Jim_Obj **Jim_DictPairs(Jim_Interp *interp,
+        Jim_Obj *dictPtr, int *len);
 JIM_EXPORT int Jim_DictAddElement(Jim_Interp *interp, Jim_Obj *objPtr,
         Jim_Obj *keyObjPtr, Jim_Obj *valueObjPtr);
author	Steve Bennett <steveb@workware.net.au>	2020-06-06 15:06:34 +1000
committer	Steve Bennett <steveb@workware.net.au>	2020-06-25 09:01:02 +1000
commit	99b7ce979f72087be74b039aa988a4f25f3b6d27 (patch)
tree	6b23f609b5309e3eea97b49e0eaf729f423a1766 /jim.h
parent	015569f090acda5702eafe6487b534fa76d354d5 (diff)
download	jimtcl-99b7ce979f72087be74b039aa988a4f25f3b6d27.zip jimtcl-99b7ce979f72087be74b039aa988a4f25f3b6d27.tar.gz jimtcl-99b7ce979f72087be74b039aa988a4f25f3b6d27.tar.bz2