core: string match and other glob matching support embedded nulls

string match, switch -glob, info commands, etc. all now support patterns and strings with embedded nulls. Fixes #143 Signed-off-by: Steve Bennett <steveb@workware.net.au>
author: Steve Bennett <steveb@workware.net.au> 2020-06-01 08:47:13 +1000
committer: Steve Bennett <steveb@workware.net.au> 2020-06-05 21:13:11 +1000
commit: 7dbb01f6ca673f3b46231215695c848ffbee3989 (patch)
tree: 01dde8330cd75ed62fb5b29132bd5d0009310a1a
parent: d4cd7cabc6359bf5e1af8ce0219e621ae0ec3d86 (diff)
download: jimtcl-7dbb01f6ca673f3b46231215695c848ffbee3989.zip
jimtcl-7dbb01f6ca673f3b46231215695c848ffbee3989.tar.gz
jimtcl-7dbb01f6ca673f3b46231215695c848ffbee3989.tar.bz2
3 files changed, 79 insertions, 29 deletions
diff --git a/jim.c b/jim.c
index 432f55a..79f5ff7 100644
--- a/jim.c
+++ b/jim.c
@@ -188,12 +188,13 @@ static int utf8_tounicode_case(const char *s, int *uc, int upper)
  *
  * Returns NULL on no match.
  */
-static const char *JimCharsetMatch(const char *pattern, int c, int flags)
+static const char *JimCharsetMatch(const char *pattern, int plen, int c, int flags)
 {
     int not = 0;
     int pchar;
     int match = 0;
     int nocase = 0;
+    int n;
 
     if (flags & JIM_NOCASE) {
         nocase++;
@@ -204,6 +205,7 @@ static const char *JimCharsetMatch(const char *pattern, int c, int flags)
         if (*pattern == '^') {
             not++;
             pattern++;
+            plen--;
         }
 
         /* Special case. If the first char is ']', it is part of the set */
@@ -212,22 +214,27 @@ static const char *JimCharsetMatch(const char *pattern, int c, int flags)
         }
     }
 
-    while (*pattern && *pattern != ']') {
+    while (plen && *pattern != ']') {
         /* Exact match */
         if (pattern[0] == '\\') {
 first:
-            pattern += utf8_tounicode_case(pattern, &pchar, nocase);
+            n = utf8_tounicode_case(pattern, &pchar, nocase);
+            pattern += n;
+            plen -= n;
         }
         else {
             /* Is this a range? a-z */
             int start;
             int end;
 
-            pattern += utf8_tounicode_case(pattern, &start, nocase);
-            if (pattern[0] == '-' && pattern[1]) {
+            n = utf8_tounicode_case(pattern, &start, nocase);
+            pattern += n;
+            plen -= n;
+            if (pattern[0] == '-' && plen > 1) {
                 /* skip '-' */
-                pattern++;
-                pattern += utf8_tounicode_case(pattern, &end, nocase);
+                n = 1 + utf8_tounicode_case(pattern + 1, &end, nocase);
+                pattern += n;
+                plen -= n;
 
                 /* Handle reversed range too */
                 if ((c >= start && c <= end) || (c >= end && c <= start)) {
@@ -253,39 +260,52 @@ first:
 
 /* Note: string *must* be valid UTF-8 sequences
  */
-static int JimGlobMatch(const char *pattern, const char *string, int nocase)
+static int JimGlobMatch(const char *pattern, int plen, const char *string, int slen, int nocase)
 {
     int c;
     int pchar;
-    while (*pattern) {
+    int n;
+    const char *p;
+    while (plen) {
         switch (pattern[0]) {
             case '*':
-                while (pattern[1] == '*') {
+                while (pattern[1] == '*' && plen) {
                     pattern++;
+                    plen--;
                 }
                 pattern++;
-                if (!pattern[0]) {
+                plen--;
+                if (!plen) {
                     return 1;   /* match */
                 }
-                while (*string) {
+                while (slen) {
                     /* Recursive call - Does the remaining pattern match anywhere? */
-                    if (JimGlobMatch(pattern, string, nocase))
+                    if (JimGlobMatch(pattern, plen, string, slen, nocase))
                         return 1;       /* match */
-                    string += utf8_tounicode(string, &c);
+                    n = utf8_tounicode(string, &c);
+                    string += n;
+                    slen -= n;
                 }
                 return 0;       /* no match */
 
             case '?':
-                string += utf8_tounicode(string, &c);
+                n = utf8_tounicode(string, &c);
+                string += n;
+                slen -= n;
                 break;
 
             case '[': {
-                    string += utf8_tounicode(string, &c);
-                    pattern = JimCharsetMatch(pattern + 1, c, nocase ? JIM_NOCASE : 0);
-                    if (!pattern) {
+                    n = utf8_tounicode(string, &c);
+                    string += n;
+                    slen -= n;
+                    p = JimCharsetMatch(pattern + 1, plen - 1, c, nocase ? JIM_NOCASE : 0);
+                    if (!p) {
                         return 0;
                     }
-                    if (!*pattern) {
+                    plen -= p - pattern;
+                    pattern = p;
+
+                    if (!plen) {
                         /* Ran out of pattern (no ']') */
                         continue;
                     }
@@ -294,25 +314,31 @@ static int JimGlobMatch(const char *pattern, const char *string, int nocase)
             case '\\':
                 if (pattern[1]) {
                     pattern++;
+                    plen--;
                 }
                 /* fall through */
             default:
-                string += utf8_tounicode_case(string, &c, nocase);
+                n = utf8_tounicode_case(string, &c, nocase);
+                string += n;
+                slen -= n;
                 utf8_tounicode_case(pattern, &pchar, nocase);
                 if (pchar != c) {
                     return 0;
                 }
                 break;
         }
-        pattern += utf8_tounicode_case(pattern, &pchar, nocase);
-        if (!*string) {
-            while (*pattern == '*') {
+        n = utf8_tounicode_case(pattern, &pchar, nocase);
+        pattern += n;
+        plen -= n;
+        if (!slen) {
+            while (*pattern == '*' && plen) {
                 pattern++;
+                plen--;
             }
             break;
         }
     }
-    if (!*pattern && !*string) {
+    if (!plen && !slen) {
         return 1;
     }
     return 0;
@@ -2539,7 +2565,10 @@ int Jim_StringEqObj(Jim_Obj *aObjPtr, Jim_Obj *bObjPtr)
  */
 int Jim_StringMatchObj(Jim_Interp *interp, Jim_Obj *patternObjPtr, Jim_Obj *objPtr, int nocase)
 {
-    return JimGlobMatch(Jim_String(patternObjPtr), Jim_String(objPtr), nocase);
+    int plen, slen;
+    const char *pattern = Jim_GetString(patternObjPtr, &plen);
+    const char *string = Jim_GetString(objPtr, &slen);
+    return JimGlobMatch(pattern, plen, string, slen, nocase);
 }
 
 int Jim_StringCompareObj(Jim_Interp *interp, Jim_Obj *firstObjPtr, Jim_Obj *secondObjPtr, int nocase)
@@ -9699,7 +9728,7 @@ static Jim_Obj *JimScanAString(Jim_Interp *interp, const char *sdescr, const cha
             break;              /* EOS via WS if unspecified */
 
         n = utf8_tounicode(str, &c);
-        if (sdescr && !JimCharsetMatch(sdescr, c, JIM_CHARSET_SCAN))
+        if (sdescr && !JimCharsetMatch(sdescr, strlen(sdescr), c, JIM_CHARSET_SCAN))
             break;
         while (n--)
             *p++ = *str++;
@@ -11217,7 +11246,9 @@ static Jim_Obj *JimHashtablePatternMatch(Jim_Interp *interp, Jim_HashTable *ht,
                     nomatch = !Jim_StringMatchObj(interp, patternObjPtr, he->key, 0);
                 }
                 else {
-                    nomatch = !JimGlobMatch(Jim_String(patternObjPtr), he->key, 0);
+                    int plen;
+                    const char *pattern = Jim_GetString(patternObjPtr, &plen);
+                    nomatch = !JimGlobMatch(pattern, plen, he->key, strlen(he->key), 0);
                 }
             }
             if (!nomatch) {
diff --git a/tests/string.test b/tests/string.test
index e9d5399..5a22229 100644
--- a/tests/string.test
+++ b/tests/string.test
@@ -696,6 +696,17 @@ test string-11.50 {string match, *special case} tcl {
     string match "\\" "\\"
 } 0
 
+test string-11.51 {string match, nulls in pattern} {
+    string match "abc\0def" "abc\0def"
+} 1
+
+test string-11.52 {string match, nulls in pattern} {
+    string match "abc*\0def" "abcghi\0def"
+} 1
+
+test string-11.53 {string match, nulls in pattern} {
+    string match "abc\[ghi\0]def" "abc\0def"
+} 1
 
 test string-12.1 {string range} {
     list [catch {string range} msg]
diff --git a/tests/stringmatch.test b/tests/stringmatch.test
index 7fe3fcc..f0eab2a 100644
--- a/tests/stringmatch.test
+++ b/tests/stringmatch.test
@@ -214,12 +214,20 @@ test stringmatch-6.6 {charset with ^} {
     string match {a[\]]c} {a]c}
 } 0
 
-test stringmatch=7.1 {short string with ?} {
+test stringmatch-7.1 {short string with ?} {
     string match {ab?} ab
 } 0
 
-test stringmatch=7.1 {multiple * to end} {
+test stringmatch-7.2 {multiple * to end} {
     string match {ab**} ab
 } 1
 
+test stringmatch-7.3 {null in string} {
+    string match *bar* foo\0bar
+} 1
+
+test stringmatch-7.4 {null in pattern} {
+    string match *b\[\0a\]r* foobar
+} 1
+
 testreport
author	Steve Bennett <steveb@workware.net.au>	2020-06-01 08:47:13 +1000
committer	Steve Bennett <steveb@workware.net.au>	2020-06-05 21:13:11 +1000
commit	7dbb01f6ca673f3b46231215695c848ffbee3989 (patch)
tree	01dde8330cd75ed62fb5b29132bd5d0009310a1a
parent	d4cd7cabc6359bf5e1af8ce0219e621ae0ec3d86 (diff)
download	jimtcl-7dbb01f6ca673f3b46231215695c848ffbee3989.zip jimtcl-7dbb01f6ca673f3b46231215695c848ffbee3989.tar.gz jimtcl-7dbb01f6ca673f3b46231215695c848ffbee3989.tar.bz2