diff options
author | Steve Bennett <steveb@workware.net.au> | 2025-08-08 19:57:57 +1000 |
---|---|---|
committer | Steve Bennett <steveb@workware.net.au> | 2025-08-08 20:07:50 +1000 |
commit | eeff606bb7860465b56782ba2ab2847f71963b59 (patch) | |
tree | 168d8a4cec99629c2f81cb5e8601b6c923f7e6ba /jim-regexp.c | |
parent | cd31c05cabd79aa1430213c7b9b240f245e6b773 (diff) | |
download | jimtcl-regsub-all-match-empty.zip jimtcl-regsub-all-match-empty.tar.gz jimtcl-regsub-all-match-empty.tar.bz2 |
regsub -all: don't loop forever when matching everywhereregsub-all-match-empty
Although "" and "x*" both match the empty string, the former
correctly exits, while the latter looped forever.
Match Tcl here by advancing by one char in both cases, but
in the latter case end of string is matched, while in the former
it is not.
Also prevent both cases from slicing a utf-8 char into bytes.
Fixes: #353
Signed-off-by: Steve Bennett <steveb@workware.net.au>
Diffstat (limited to 'jim-regexp.c')
-rw-r--r-- | jim-regexp.c | 37 |
1 files changed, 21 insertions, 16 deletions
diff --git a/jim-regexp.c b/jim-regexp.c index d19867e..f47f235 100644 --- a/jim-regexp.c +++ b/jim-regexp.c @@ -485,7 +485,13 @@ int Jim_RegsubCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv) n = source_len - offset; p = source_str + offset; - do { + while (1) { + /* To match Tcl, an empty pattern does not match at the end + * of the string. + */ + if (n == 0 && !pattern[0]) { + break; + } int match = jim_regexec(regex, p, MAX_SUB_MATCHES, pmatch, regexec_flags); if (match >= REG_BADPAT) { @@ -584,23 +590,22 @@ int Jim_RegsubCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv) break; } - /* If the pattern is empty, need to step forwards */ - if (pattern[0] == '\0' && n) { - /* Need to copy the char we are moving over */ - Jim_AppendString(interp, resultObj, p, 1); - p++; - n--; - } - + regexec_flags = 0; if (pmatch[0].rm_eo == pmatch[0].rm_so) { - /* The match did not advance the string, so set REG_NOTBOL to force the next match */ - regexec_flags = REG_NOTBOL; - } - else { - regexec_flags = 0; + /* Matched a zero length string. Need to avoid matching the same position again */ + if (pattern[0] == '^') { + /* An anchored search sets REG_BOL */ + regexec_flags = REG_NOTBOL; + } + else { + /* A non-anchored search advances by one char */ + int charlen = utf8_charlen(p[0]); + Jim_AppendString(interp, resultObj, p, charlen); + p += charlen; + n -= charlen; + } } - - } while (n); + } /* * Copy the portion of the string after the last match to the |