4 files changed, 76 insertions, 64 deletions
diff --git a/jim-tclprefix.c b/jim-tclprefix.c
index dcffd4d..c492234 100644
--- a/jim-tclprefix.c
+++ b/jim-tclprefix.c
@@ -28,6 +28,32 @@ static int JimStringCommonLength(const char *str1, int charlen1, const char *str
     return maxlen;
 }
 
+/*
+ * Like Jim_StringCompareObj() except only matches as much as the length of firstObjPtr.
+ * So "abc" matches "abcdef" but "abcdef" does not match "abc".
+ */
+int JimStringComparePrefix(Jim_Interp *interp, Jim_Obj *firstObjPtr, Jim_Obj *secondObjPtr)
+{
+    /* We do this the easy way by creating a (possibly) shorter version of secondObjPtr */
+    int l1 = Jim_Utf8Length(interp, firstObjPtr);
+    const char *s2 = Jim_String(secondObjPtr);
+    int l2 = Jim_Utf8Length(interp, secondObjPtr);
+    Jim_Obj *objPtr;
+    int ret;
+
+    if (l2 > l1) {
+        objPtr = Jim_NewStringObjUtf8(interp, s2, l1);
+    }
+    else {
+        objPtr = secondObjPtr;
+    }
+    Jim_IncrRefCount(objPtr);
+
+    ret = Jim_StringCompareObj(interp, firstObjPtr, objPtr, 0);
+    Jim_DecrRefCount(interp, objPtr);
+    return ret;
+}
+
 /* [tcl::prefix]
  */
 static int Jim_TclPrefixCoreCommand(Jim_Interp *interp, int argc, Jim_Obj *const *argv)
@@ -140,7 +166,7 @@ static int Jim_TclPrefixCoreCommand(Jim_Interp *interp, int argc, Jim_Obj *const
                 objPtr = Jim_NewListObj(interp, NULL, 0);
                 for (i = 0; i < listlen; i++) {
                     Jim_Obj *valObj = Jim_ListGetIndex(interp, argv[2], i);
-                    if (Jim_StringCompareLenObj(interp, argv[3], valObj, 0) == 0) {
+                    if (JimStringComparePrefix(interp, argv[3], valObj) == 0) {
                         Jim_ListAppendElement(interp, objPtr, valObj);
                     }
                 }
@@ -164,7 +190,7 @@ static int Jim_TclPrefixCoreCommand(Jim_Interp *interp, int argc, Jim_Obj *const
                 for (i = 0; i < listlen; i++) {
                     Jim_Obj *valObj = Jim_ListGetIndex(interp, argv[2], i);
 
-                    if (Jim_StringCompareLenObj(interp, stringObj, valObj, 0)) {
+                    if (JimStringComparePrefix(interp, stringObj, valObj)) {
                         /* Does not begin with 'string' */
                         continue;
                     }
diff --git a/jim.c b/jim.c
index 499fa69..9ddf4e5 100644
--- a/jim.c
+++ b/jim.c
@@ -317,54 +317,34 @@ static int JimGlobMatch(const char *pattern, const char *string, int nocase)
 }
 
 /**
- * string comparison. Works on binary data.
+ * utf-8 string comparison. case-insensitive if nocase is set.
  *
  * Returns -1, 0 or 1
  *
- * Note that the lengths are byte lengths, not char lengths.
+ * Note that the lengths are character lengths, not byte lengths.
  */
-static int JimStringCompare(const char *s1, int l1, const char *s2, int l2)
+static int JimStringCompareUtf8(const char *s1, int l1, const char *s2, int l2, int nocase)
 {
-    if (l1 < l2) {
-        return memcmp(s1, s2, l1) <= 0 ? -1 : 1;
-    }
-    else if (l2 < l1) {
-        return memcmp(s1, s2, l2) >= 0 ? 1 : -1;
+    int minlen = l1;
+    if (l2 < l1) {
+        minlen = l2;
     }
-    else {
-        return JimSign(memcmp(s1, s2, l1));
-    }
-}
-
-/**
- * Compare null terminated strings, up to a maximum of 'maxchars' characters,
- * (or end of string if 'maxchars' is -1).
- *
- * Returns -1, 0, 1 for s1 < s2, s1 == s2, s1 > s2 respectively.
- *
- * Note: does not support embedded nulls.
- */
-static int JimStringCompareLen(const char *s1, const char *s2, int maxchars, int nocase)
-{
-    while (*s1 && *s2 && maxchars) {
+    while (minlen) {
         int c1, c2;
         s1 += utf8_tounicode_case(s1, &c1, nocase);
         s2 += utf8_tounicode_case(s2, &c2, nocase);
         if (c1 != c2) {
             return JimSign(c1 - c2);
         }
-        maxchars--;
+        minlen--;
     }
-    if (!maxchars) {
-        return 0;
+    /* Equal to this point, so the shorter string is less */
+    if (l1 < l2) {
+        return -1;
     }
-    /* One string or both terminated */
-    if (*s1) {
+    if (l1 > l2) {
         return 1;
     }
-    if (*s2) {
-        return -1;
-    }
     return 0;
 }
 
@@ -2548,33 +2528,13 @@ int Jim_StringMatchObj(Jim_Interp *interp, Jim_Obj *patternObjPtr, Jim_Obj *objP
     return JimGlobMatch(Jim_String(patternObjPtr), Jim_String(objPtr), nocase);
 }
 
-/*
- * Note: does not support embedded nulls for the nocase option.
- */
 int Jim_StringCompareObj(Jim_Interp *interp, Jim_Obj *firstObjPtr, Jim_Obj *secondObjPtr, int nocase)
 {
-    int l1, l2;
-    const char *s1 = Jim_GetString(firstObjPtr, &l1);
-    const char *s2 = Jim_GetString(secondObjPtr, &l2);
-
-    if (nocase) {
-        /* Do a character compare for nocase */
-        return JimStringCompareLen(s1, s2, -1, nocase);
-    }
-    return JimStringCompare(s1, l1, s2, l2);
-}
-
-/**
- * Like Jim_StringCompareObj() except compares to a maximum of the length of firstObjPtr.
- *
- * Note: does not support embedded nulls
- */
-int Jim_StringCompareLenObj(Jim_Interp *interp, Jim_Obj *firstObjPtr, Jim_Obj *secondObjPtr, int nocase)
-{
     const char *s1 = Jim_String(firstObjPtr);
+    int l1 = Jim_Utf8Length(interp, firstObjPtr);
     const char *s2 = Jim_String(secondObjPtr);
-
-    return JimStringCompareLen(s1, s2, Jim_Utf8Length(interp, firstObjPtr), nocase);
+    int l2 = Jim_Utf8Length(interp, secondObjPtr);
+    return JimStringCompareUtf8(s1, l1, s2, l2, nocase);
 }
 
 /* Convert a range, as returned by Jim_GetRange(), into
@@ -13353,7 +13313,7 @@ static Jim_Obj *JimStringMap(Jim_Interp *interp, Jim_Obj *mapListObjPtr,
 
             if (strLen >= kl && kl) {
                 int rc;
-                rc = JimStringCompareLen(str, k, kl, nocase);
+                rc = JimStringCompareUtf8(str, kl, k, kl, nocase);
                 if (rc == 0) {
                     if (noMatchStart) {
                         Jim_AppendString(interp, resultObjPtr, noMatchStart, str - noMatchStart);
@@ -13487,12 +13447,19 @@ badcompareargs:
                     Jim_SetResultBool(interp, Jim_StringEqObj(argv[0], argv[1]));
                 }
                 else {
+                    const char *s1 = Jim_String(argv[0]);
+                    int l1 = Jim_Utf8Length(interp, argv[0]);
+                    const char *s2 = Jim_String(argv[1]);
+                    int l2 = Jim_Utf8Length(interp, argv[1]);
                     if (opt_length >= 0) {
-                        n = JimStringCompareLen(Jim_String(argv[0]), Jim_String(argv[1]), opt_length, !opt_case);
-                    }
-                    else {
-                        n = Jim_StringCompareObj(interp, argv[0], argv[1], !opt_case);
+                        if (l1 > opt_length) {
+                            l1 = opt_length;
+                        }
+                        if (l2 > opt_length) {
+                            l2 = opt_length;
+                        }
                     }
+                    n = JimStringCompareUtf8(s1, l1, s2, l2, !opt_case);
                     Jim_SetResultInt(interp, option == OPT_COMPARE ? n : n == 0);
                 }
                 return JIM_OK;
diff --git a/jim.h b/jim.h
index bbef251..f6d5f70 100644
--- a/jim.h
+++ b/jim.h
@@ -699,8 +699,6 @@ JIM_EXPORT int Jim_CompareStringImmediate (Jim_Interp *interp,
         Jim_Obj *objPtr, const char *str);
 JIM_EXPORT int Jim_StringCompareObj(Jim_Interp *interp, Jim_Obj *firstObjPtr,
         Jim_Obj *secondObjPtr, int nocase);
-JIM_EXPORT int Jim_StringCompareLenObj(Jim_Interp *interp, Jim_Obj *firstObjPtr,
-        Jim_Obj *secondObjPtr, int nocase);
 JIM_EXPORT int Jim_Utf8Length(Jim_Interp *interp, Jim_Obj *objPtr);
 
 /* reference object */
diff --git a/tests/string.test b/tests/string.test
index b095f60..48451e4 100644
--- a/tests/string.test
+++ b/tests/string.test
@@ -116,6 +116,15 @@ test string-2.28 {string equal with length, unequal strings} {
 test string-2.29 {string equal with length, unequal strings} {
     string compare -length 2 ab abde
 } 0
+test string-2.30 {string compare - bytes vs chars} {
+    string compare abcd\u1000\u1100\u1200x abcd\u1000\u1100\u1200y
+} -1
+test string-2.31 {string compare - embedded nulls} {
+    string compare ab\0ghi0 ab\0ghi1
+} -1
+test string-2.31 {string compare - embedded nulls, nocase} {
+    string compare -nocase ab\0ghi0 AB\0GHi1
+} -1
 # only need a few tests on equal, since it uses the same code as
 # string compare, but just modifies the return output
 test string-3.1 {string equal} {
@@ -521,6 +530,18 @@ test string-10.16 {string map, one pair case} {
 test string-10.17 {string map, one pair case} {
     string map {Ab 4321} aAbCaBaAbAbcAb
 } {a4321CaBa43214321c4321}
+test string-10.18 {string map, nulls in string} {
+    string map {a bc} ade\0ghia\0jkl
+} "bcde\0ghibc\0jkl"
+test string-10.19 {string map, nulls in map source} {
+    string map {\0 bc} ade\0ghia\0jkl
+} "adebcghiabcjkl"
+test string-10.20 {string map, nulls in map dest} {
+    string map {a A\0A} adeghiajkl
+} "A\0AdeghiA\0Ajkl"
+test string-10.21 {string map, null bytes} {
+    string map "\u0000afternull #" foo\u0000afternull\u0000123456789bar
+} foo#\u0000123456789bar
 
 test string-11.1 {string match, too few args} {
     list [catch {string match a} msg]