diff options
author | Steve Bennett <steveb@workware.net.au> | 2020-06-01 08:47:13 +1000 |
---|---|---|
committer | Steve Bennett <steveb@workware.net.au> | 2020-06-05 21:13:11 +1000 |
commit | 7dbb01f6ca673f3b46231215695c848ffbee3989 (patch) | |
tree | 01dde8330cd75ed62fb5b29132bd5d0009310a1a | |
parent | d4cd7cabc6359bf5e1af8ce0219e621ae0ec3d86 (diff) | |
download | jimtcl-7dbb01f6ca673f3b46231215695c848ffbee3989.zip jimtcl-7dbb01f6ca673f3b46231215695c848ffbee3989.tar.gz jimtcl-7dbb01f6ca673f3b46231215695c848ffbee3989.tar.bz2 |
core: string match and other glob matching support embedded nulls
string match, switch -glob, info commands, etc. all now support
patterns and strings with embedded nulls.
Fixes #143
Signed-off-by: Steve Bennett <steveb@workware.net.au>
-rw-r--r-- | jim.c | 85 | ||||
-rw-r--r-- | tests/string.test | 11 | ||||
-rw-r--r-- | tests/stringmatch.test | 12 |
3 files changed, 79 insertions, 29 deletions
@@ -188,12 +188,13 @@ static int utf8_tounicode_case(const char *s, int *uc, int upper) * * Returns NULL on no match. */ -static const char *JimCharsetMatch(const char *pattern, int c, int flags) +static const char *JimCharsetMatch(const char *pattern, int plen, int c, int flags) { int not = 0; int pchar; int match = 0; int nocase = 0; + int n; if (flags & JIM_NOCASE) { nocase++; @@ -204,6 +205,7 @@ static const char *JimCharsetMatch(const char *pattern, int c, int flags) if (*pattern == '^') { not++; pattern++; + plen--; } /* Special case. If the first char is ']', it is part of the set */ @@ -212,22 +214,27 @@ static const char *JimCharsetMatch(const char *pattern, int c, int flags) } } - while (*pattern && *pattern != ']') { + while (plen && *pattern != ']') { /* Exact match */ if (pattern[0] == '\\') { first: - pattern += utf8_tounicode_case(pattern, &pchar, nocase); + n = utf8_tounicode_case(pattern, &pchar, nocase); + pattern += n; + plen -= n; } else { /* Is this a range? a-z */ int start; int end; - pattern += utf8_tounicode_case(pattern, &start, nocase); - if (pattern[0] == '-' && pattern[1]) { + n = utf8_tounicode_case(pattern, &start, nocase); + pattern += n; + plen -= n; + if (pattern[0] == '-' && plen > 1) { /* skip '-' */ - pattern++; - pattern += utf8_tounicode_case(pattern, &end, nocase); + n = 1 + utf8_tounicode_case(pattern + 1, &end, nocase); + pattern += n; + plen -= n; /* Handle reversed range too */ if ((c >= start && c <= end) || (c >= end && c <= start)) { @@ -253,39 +260,52 @@ first: /* Note: string *must* be valid UTF-8 sequences */ -static int JimGlobMatch(const char *pattern, const char *string, int nocase) +static int JimGlobMatch(const char *pattern, int plen, const char *string, int slen, int nocase) { int c; int pchar; - while (*pattern) { + int n; + const char *p; + while (plen) { switch (pattern[0]) { case '*': - while (pattern[1] == '*') { + while (pattern[1] == '*' && plen) { pattern++; + plen--; } pattern++; - if (!pattern[0]) { + plen--; + if (!plen) { return 1; /* match */ } - while (*string) { + while (slen) { /* Recursive call - Does the remaining pattern match anywhere? */ - if (JimGlobMatch(pattern, string, nocase)) + if (JimGlobMatch(pattern, plen, string, slen, nocase)) return 1; /* match */ - string += utf8_tounicode(string, &c); + n = utf8_tounicode(string, &c); + string += n; + slen -= n; } return 0; /* no match */ case '?': - string += utf8_tounicode(string, &c); + n = utf8_tounicode(string, &c); + string += n; + slen -= n; break; case '[': { - string += utf8_tounicode(string, &c); - pattern = JimCharsetMatch(pattern + 1, c, nocase ? JIM_NOCASE : 0); - if (!pattern) { + n = utf8_tounicode(string, &c); + string += n; + slen -= n; + p = JimCharsetMatch(pattern + 1, plen - 1, c, nocase ? JIM_NOCASE : 0); + if (!p) { return 0; } - if (!*pattern) { + plen -= p - pattern; + pattern = p; + + if (!plen) { /* Ran out of pattern (no ']') */ continue; } @@ -294,25 +314,31 @@ static int JimGlobMatch(const char *pattern, const char *string, int nocase) case '\\': if (pattern[1]) { pattern++; + plen--; } /* fall through */ default: - string += utf8_tounicode_case(string, &c, nocase); + n = utf8_tounicode_case(string, &c, nocase); + string += n; + slen -= n; utf8_tounicode_case(pattern, &pchar, nocase); if (pchar != c) { return 0; } break; } - pattern += utf8_tounicode_case(pattern, &pchar, nocase); - if (!*string) { - while (*pattern == '*') { + n = utf8_tounicode_case(pattern, &pchar, nocase); + pattern += n; + plen -= n; + if (!slen) { + while (*pattern == '*' && plen) { pattern++; + plen--; } break; } } - if (!*pattern && !*string) { + if (!plen && !slen) { return 1; } return 0; @@ -2539,7 +2565,10 @@ int Jim_StringEqObj(Jim_Obj *aObjPtr, Jim_Obj *bObjPtr) */ int Jim_StringMatchObj(Jim_Interp *interp, Jim_Obj *patternObjPtr, Jim_Obj *objPtr, int nocase) { - return JimGlobMatch(Jim_String(patternObjPtr), Jim_String(objPtr), nocase); + int plen, slen; + const char *pattern = Jim_GetString(patternObjPtr, &plen); + const char *string = Jim_GetString(objPtr, &slen); + return JimGlobMatch(pattern, plen, string, slen, nocase); } int Jim_StringCompareObj(Jim_Interp *interp, Jim_Obj *firstObjPtr, Jim_Obj *secondObjPtr, int nocase) @@ -9699,7 +9728,7 @@ static Jim_Obj *JimScanAString(Jim_Interp *interp, const char *sdescr, const cha break; /* EOS via WS if unspecified */ n = utf8_tounicode(str, &c); - if (sdescr && !JimCharsetMatch(sdescr, c, JIM_CHARSET_SCAN)) + if (sdescr && !JimCharsetMatch(sdescr, strlen(sdescr), c, JIM_CHARSET_SCAN)) break; while (n--) *p++ = *str++; @@ -11217,7 +11246,9 @@ static Jim_Obj *JimHashtablePatternMatch(Jim_Interp *interp, Jim_HashTable *ht, nomatch = !Jim_StringMatchObj(interp, patternObjPtr, he->key, 0); } else { - nomatch = !JimGlobMatch(Jim_String(patternObjPtr), he->key, 0); + int plen; + const char *pattern = Jim_GetString(patternObjPtr, &plen); + nomatch = !JimGlobMatch(pattern, plen, he->key, strlen(he->key), 0); } } if (!nomatch) { diff --git a/tests/string.test b/tests/string.test index e9d5399..5a22229 100644 --- a/tests/string.test +++ b/tests/string.test @@ -696,6 +696,17 @@ test string-11.50 {string match, *special case} tcl { string match "\\" "\\" } 0 +test string-11.51 {string match, nulls in pattern} { + string match "abc\0def" "abc\0def" +} 1 + +test string-11.52 {string match, nulls in pattern} { + string match "abc*\0def" "abcghi\0def" +} 1 + +test string-11.53 {string match, nulls in pattern} { + string match "abc\[ghi\0]def" "abc\0def" +} 1 test string-12.1 {string range} { list [catch {string range} msg] diff --git a/tests/stringmatch.test b/tests/stringmatch.test index 7fe3fcc..f0eab2a 100644 --- a/tests/stringmatch.test +++ b/tests/stringmatch.test @@ -214,12 +214,20 @@ test stringmatch-6.6 {charset with ^} { string match {a[\]]c} {a]c} } 0 -test stringmatch=7.1 {short string with ?} { +test stringmatch-7.1 {short string with ?} { string match {ab?} ab } 0 -test stringmatch=7.1 {multiple * to end} { +test stringmatch-7.2 {multiple * to end} { string match {ab**} ab } 1 +test stringmatch-7.3 {null in string} { + string match *bar* foo\0bar +} 1 + +test stringmatch-7.4 {null in pattern} { + string match *b\[\0a\]r* foobar +} 1 + testreport |