From 5802d3f5e49737b761c93fceba1d169e0a26b763 Mon Sep 17 00:00:00 2001 From: Steve Bennett Date: Wed, 27 Nov 2019 23:09:34 +1100 Subject: string map and string compare now support embedded nulls Reported-by: dbohdan Signed-off-by: Steve Bennett --- jim-tclprefix.c | 30 +++++++++++++++++-- jim.c | 87 +++++++++++++++++-------------------------------------- jim.h | 2 -- tests/string.test | 21 ++++++++++++++ 4 files changed, 76 insertions(+), 64 deletions(-) diff --git a/jim-tclprefix.c b/jim-tclprefix.c index dcffd4d..c492234 100644 --- a/jim-tclprefix.c +++ b/jim-tclprefix.c @@ -28,6 +28,32 @@ static int JimStringCommonLength(const char *str1, int charlen1, const char *str return maxlen; } +/* + * Like Jim_StringCompareObj() except only matches as much as the length of firstObjPtr. + * So "abc" matches "abcdef" but "abcdef" does not match "abc". + */ +int JimStringComparePrefix(Jim_Interp *interp, Jim_Obj *firstObjPtr, Jim_Obj *secondObjPtr) +{ + /* We do this the easy way by creating a (possibly) shorter version of secondObjPtr */ + int l1 = Jim_Utf8Length(interp, firstObjPtr); + const char *s2 = Jim_String(secondObjPtr); + int l2 = Jim_Utf8Length(interp, secondObjPtr); + Jim_Obj *objPtr; + int ret; + + if (l2 > l1) { + objPtr = Jim_NewStringObjUtf8(interp, s2, l1); + } + else { + objPtr = secondObjPtr; + } + Jim_IncrRefCount(objPtr); + + ret = Jim_StringCompareObj(interp, firstObjPtr, objPtr, 0); + Jim_DecrRefCount(interp, objPtr); + return ret; +} + /* [tcl::prefix] */ static int Jim_TclPrefixCoreCommand(Jim_Interp *interp, int argc, Jim_Obj *const *argv) @@ -140,7 +166,7 @@ static int Jim_TclPrefixCoreCommand(Jim_Interp *interp, int argc, Jim_Obj *const objPtr = Jim_NewListObj(interp, NULL, 0); for (i = 0; i < listlen; i++) { Jim_Obj *valObj = Jim_ListGetIndex(interp, argv[2], i); - if (Jim_StringCompareLenObj(interp, argv[3], valObj, 0) == 0) { + if (JimStringComparePrefix(interp, argv[3], valObj) == 0) { Jim_ListAppendElement(interp, objPtr, valObj); } } @@ -164,7 +190,7 @@ static int Jim_TclPrefixCoreCommand(Jim_Interp *interp, int argc, Jim_Obj *const for (i = 0; i < listlen; i++) { Jim_Obj *valObj = Jim_ListGetIndex(interp, argv[2], i); - if (Jim_StringCompareLenObj(interp, stringObj, valObj, 0)) { + if (JimStringComparePrefix(interp, stringObj, valObj)) { /* Does not begin with 'string' */ continue; } diff --git a/jim.c b/jim.c index 499fa69..9ddf4e5 100644 --- a/jim.c +++ b/jim.c @@ -317,54 +317,34 @@ static int JimGlobMatch(const char *pattern, const char *string, int nocase) } /** - * string comparison. Works on binary data. + * utf-8 string comparison. case-insensitive if nocase is set. * * Returns -1, 0 or 1 * - * Note that the lengths are byte lengths, not char lengths. + * Note that the lengths are character lengths, not byte lengths. */ -static int JimStringCompare(const char *s1, int l1, const char *s2, int l2) +static int JimStringCompareUtf8(const char *s1, int l1, const char *s2, int l2, int nocase) { - if (l1 < l2) { - return memcmp(s1, s2, l1) <= 0 ? -1 : 1; - } - else if (l2 < l1) { - return memcmp(s1, s2, l2) >= 0 ? 1 : -1; + int minlen = l1; + if (l2 < l1) { + minlen = l2; } - else { - return JimSign(memcmp(s1, s2, l1)); - } -} - -/** - * Compare null terminated strings, up to a maximum of 'maxchars' characters, - * (or end of string if 'maxchars' is -1). - * - * Returns -1, 0, 1 for s1 < s2, s1 == s2, s1 > s2 respectively. - * - * Note: does not support embedded nulls. - */ -static int JimStringCompareLen(const char *s1, const char *s2, int maxchars, int nocase) -{ - while (*s1 && *s2 && maxchars) { + while (minlen) { int c1, c2; s1 += utf8_tounicode_case(s1, &c1, nocase); s2 += utf8_tounicode_case(s2, &c2, nocase); if (c1 != c2) { return JimSign(c1 - c2); } - maxchars--; + minlen--; } - if (!maxchars) { - return 0; + /* Equal to this point, so the shorter string is less */ + if (l1 < l2) { + return -1; } - /* One string or both terminated */ - if (*s1) { + if (l1 > l2) { return 1; } - if (*s2) { - return -1; - } return 0; } @@ -2548,33 +2528,13 @@ int Jim_StringMatchObj(Jim_Interp *interp, Jim_Obj *patternObjPtr, Jim_Obj *objP return JimGlobMatch(Jim_String(patternObjPtr), Jim_String(objPtr), nocase); } -/* - * Note: does not support embedded nulls for the nocase option. - */ int Jim_StringCompareObj(Jim_Interp *interp, Jim_Obj *firstObjPtr, Jim_Obj *secondObjPtr, int nocase) { - int l1, l2; - const char *s1 = Jim_GetString(firstObjPtr, &l1); - const char *s2 = Jim_GetString(secondObjPtr, &l2); - - if (nocase) { - /* Do a character compare for nocase */ - return JimStringCompareLen(s1, s2, -1, nocase); - } - return JimStringCompare(s1, l1, s2, l2); -} - -/** - * Like Jim_StringCompareObj() except compares to a maximum of the length of firstObjPtr. - * - * Note: does not support embedded nulls - */ -int Jim_StringCompareLenObj(Jim_Interp *interp, Jim_Obj *firstObjPtr, Jim_Obj *secondObjPtr, int nocase) -{ const char *s1 = Jim_String(firstObjPtr); + int l1 = Jim_Utf8Length(interp, firstObjPtr); const char *s2 = Jim_String(secondObjPtr); - - return JimStringCompareLen(s1, s2, Jim_Utf8Length(interp, firstObjPtr), nocase); + int l2 = Jim_Utf8Length(interp, secondObjPtr); + return JimStringCompareUtf8(s1, l1, s2, l2, nocase); } /* Convert a range, as returned by Jim_GetRange(), into @@ -13353,7 +13313,7 @@ static Jim_Obj *JimStringMap(Jim_Interp *interp, Jim_Obj *mapListObjPtr, if (strLen >= kl && kl) { int rc; - rc = JimStringCompareLen(str, k, kl, nocase); + rc = JimStringCompareUtf8(str, kl, k, kl, nocase); if (rc == 0) { if (noMatchStart) { Jim_AppendString(interp, resultObjPtr, noMatchStart, str - noMatchStart); @@ -13487,12 +13447,19 @@ badcompareargs: Jim_SetResultBool(interp, Jim_StringEqObj(argv[0], argv[1])); } else { + const char *s1 = Jim_String(argv[0]); + int l1 = Jim_Utf8Length(interp, argv[0]); + const char *s2 = Jim_String(argv[1]); + int l2 = Jim_Utf8Length(interp, argv[1]); if (opt_length >= 0) { - n = JimStringCompareLen(Jim_String(argv[0]), Jim_String(argv[1]), opt_length, !opt_case); - } - else { - n = Jim_StringCompareObj(interp, argv[0], argv[1], !opt_case); + if (l1 > opt_length) { + l1 = opt_length; + } + if (l2 > opt_length) { + l2 = opt_length; + } } + n = JimStringCompareUtf8(s1, l1, s2, l2, !opt_case); Jim_SetResultInt(interp, option == OPT_COMPARE ? n : n == 0); } return JIM_OK; diff --git a/jim.h b/jim.h index bbef251..f6d5f70 100644 --- a/jim.h +++ b/jim.h @@ -699,8 +699,6 @@ JIM_EXPORT int Jim_CompareStringImmediate (Jim_Interp *interp, Jim_Obj *objPtr, const char *str); JIM_EXPORT int Jim_StringCompareObj(Jim_Interp *interp, Jim_Obj *firstObjPtr, Jim_Obj *secondObjPtr, int nocase); -JIM_EXPORT int Jim_StringCompareLenObj(Jim_Interp *interp, Jim_Obj *firstObjPtr, - Jim_Obj *secondObjPtr, int nocase); JIM_EXPORT int Jim_Utf8Length(Jim_Interp *interp, Jim_Obj *objPtr); /* reference object */ diff --git a/tests/string.test b/tests/string.test index b095f60..48451e4 100644 --- a/tests/string.test +++ b/tests/string.test @@ -116,6 +116,15 @@ test string-2.28 {string equal with length, unequal strings} { test string-2.29 {string equal with length, unequal strings} { string compare -length 2 ab abde } 0 +test string-2.30 {string compare - bytes vs chars} { + string compare abcd\u1000\u1100\u1200x abcd\u1000\u1100\u1200y +} -1 +test string-2.31 {string compare - embedded nulls} { + string compare ab\0ghi0 ab\0ghi1 +} -1 +test string-2.31 {string compare - embedded nulls, nocase} { + string compare -nocase ab\0ghi0 AB\0GHi1 +} -1 # only need a few tests on equal, since it uses the same code as # string compare, but just modifies the return output test string-3.1 {string equal} { @@ -521,6 +530,18 @@ test string-10.16 {string map, one pair case} { test string-10.17 {string map, one pair case} { string map {Ab 4321} aAbCaBaAbAbcAb } {a4321CaBa43214321c4321} +test string-10.18 {string map, nulls in string} { + string map {a bc} ade\0ghia\0jkl +} "bcde\0ghibc\0jkl" +test string-10.19 {string map, nulls in map source} { + string map {\0 bc} ade\0ghia\0jkl +} "adebcghiabcjkl" +test string-10.20 {string map, nulls in map dest} { + string map {a A\0A} adeghiajkl +} "A\0AdeghiA\0Ajkl" +test string-10.21 {string map, null bytes} { + string map "\u0000afternull #" foo\u0000afternull\u0000123456789bar +} foo#\u0000123456789bar test string-11.1 {string match, too few args} { list [catch {string match a} msg] -- cgit v1.1