aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--jim-regexp.c37
-rw-r--r--tests/regexp.test10
-rw-r--r--tests/regexp2.test6
3 files changed, 37 insertions, 16 deletions
diff --git a/jim-regexp.c b/jim-regexp.c
index d19867e..f47f235 100644
--- a/jim-regexp.c
+++ b/jim-regexp.c
@@ -485,7 +485,13 @@ int Jim_RegsubCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv)
n = source_len - offset;
p = source_str + offset;
- do {
+ while (1) {
+ /* To match Tcl, an empty pattern does not match at the end
+ * of the string.
+ */
+ if (n == 0 && !pattern[0]) {
+ break;
+ }
int match = jim_regexec(regex, p, MAX_SUB_MATCHES, pmatch, regexec_flags);
if (match >= REG_BADPAT) {
@@ -584,23 +590,22 @@ int Jim_RegsubCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv)
break;
}
- /* If the pattern is empty, need to step forwards */
- if (pattern[0] == '\0' && n) {
- /* Need to copy the char we are moving over */
- Jim_AppendString(interp, resultObj, p, 1);
- p++;
- n--;
- }
-
+ regexec_flags = 0;
if (pmatch[0].rm_eo == pmatch[0].rm_so) {
- /* The match did not advance the string, so set REG_NOTBOL to force the next match */
- regexec_flags = REG_NOTBOL;
- }
- else {
- regexec_flags = 0;
+ /* Matched a zero length string. Need to avoid matching the same position again */
+ if (pattern[0] == '^') {
+ /* An anchored search sets REG_BOL */
+ regexec_flags = REG_NOTBOL;
+ }
+ else {
+ /* A non-anchored search advances by one char */
+ int charlen = utf8_charlen(p[0]);
+ Jim_AppendString(interp, resultObj, p, charlen);
+ p += charlen;
+ n -= charlen;
+ }
}
-
- } while (n);
+ }
/*
* Copy the portion of the string after the last match to the
diff --git a/tests/regexp.test b/tests/regexp.test
index f7c589d..0d86d6b 100644
--- a/tests/regexp.test
+++ b/tests/regexp.test
@@ -666,6 +666,16 @@ test regexp-21.15 {Replace literal backslash} {
set value
} "\\abc\\def"
+test regexp-21.16 {Replace nothing} {
+ regsub -all {x*} anything !
+} {!a!n!y!t!h!i!n!g!}
+
+test regexp-21.17 {Replace nothing via empty pattern} {
+ # Interestingly in this case Tcl does not match
+ # at end of string while the previous case does
+ regsub -all {} anything !
+} {!a!n!y!t!h!i!n!g}
+
test regexp-22.1 {char range} {
regexp -all -inline {[a-c]+} "defaaghbcadfbaacccd"
} {aa bca baaccc}
diff --git a/tests/regexp2.test b/tests/regexp2.test
index 4d915c2..3f357b6 100644
--- a/tests/regexp2.test
+++ b/tests/regexp2.test
@@ -810,6 +810,12 @@ test regexpComp-21.10 {regexp command compiling tests} {
# list [regsub -all "" "" bar str] $str
# }
#} {0 {}}
+test regexpComp-21.12 {regexp empty pattern with utf8} utf8 {
+ # Make sure the second char isn't sliced up
+ evalInProc {
+ regsub -all "" a\u0442bc !
+ }
+} "!a!\u0442!b!c"
# We can forgive the underlying regexp engine for not supporting this.
# Why not use this instead? "((^X)*|\$)"