aboutsummaryrefslogtreecommitdiff
path: root/jim-regexp.c
diff options
context:
space:
mode:
authorSteve Bennett <steveb@workware.net.au>2025-08-08 19:57:57 +1000
committerSteve Bennett <steveb@workware.net.au>2025-08-08 20:07:50 +1000
commiteeff606bb7860465b56782ba2ab2847f71963b59 (patch)
tree168d8a4cec99629c2f81cb5e8601b6c923f7e6ba /jim-regexp.c
parentcd31c05cabd79aa1430213c7b9b240f245e6b773 (diff)
downloadjimtcl-regsub-all-match-empty.zip
jimtcl-regsub-all-match-empty.tar.gz
jimtcl-regsub-all-match-empty.tar.bz2
regsub -all: don't loop forever when matching everywhereregsub-all-match-empty
Although "" and "x*" both match the empty string, the former correctly exits, while the latter looped forever. Match Tcl here by advancing by one char in both cases, but in the latter case end of string is matched, while in the former it is not. Also prevent both cases from slicing a utf-8 char into bytes. Fixes: #353 Signed-off-by: Steve Bennett <steveb@workware.net.au>
Diffstat (limited to 'jim-regexp.c')
-rw-r--r--jim-regexp.c37
1 files changed, 21 insertions, 16 deletions
diff --git a/jim-regexp.c b/jim-regexp.c
index d19867e..f47f235 100644
--- a/jim-regexp.c
+++ b/jim-regexp.c
@@ -485,7 +485,13 @@ int Jim_RegsubCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv)
n = source_len - offset;
p = source_str + offset;
- do {
+ while (1) {
+ /* To match Tcl, an empty pattern does not match at the end
+ * of the string.
+ */
+ if (n == 0 && !pattern[0]) {
+ break;
+ }
int match = jim_regexec(regex, p, MAX_SUB_MATCHES, pmatch, regexec_flags);
if (match >= REG_BADPAT) {
@@ -584,23 +590,22 @@ int Jim_RegsubCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv)
break;
}
- /* If the pattern is empty, need to step forwards */
- if (pattern[0] == '\0' && n) {
- /* Need to copy the char we are moving over */
- Jim_AppendString(interp, resultObj, p, 1);
- p++;
- n--;
- }
-
+ regexec_flags = 0;
if (pmatch[0].rm_eo == pmatch[0].rm_so) {
- /* The match did not advance the string, so set REG_NOTBOL to force the next match */
- regexec_flags = REG_NOTBOL;
- }
- else {
- regexec_flags = 0;
+ /* Matched a zero length string. Need to avoid matching the same position again */
+ if (pattern[0] == '^') {
+ /* An anchored search sets REG_BOL */
+ regexec_flags = REG_NOTBOL;
+ }
+ else {
+ /* A non-anchored search advances by one char */
+ int charlen = utf8_charlen(p[0]);
+ Jim_AppendString(interp, resultObj, p, charlen);
+ p += charlen;
+ n -= charlen;
+ }
}
-
- } while (n);
+ }
/*
* Copy the portion of the string after the last match to the