3 files changed, 37 insertions, 16 deletions
diff --git a/jim-regexp.c b/jim-regexp.c
index d19867e..f47f235 100644
--- a/jim-regexp.c
+++ b/jim-regexp.c
@@ -485,7 +485,13 @@ int Jim_RegsubCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv)
 
     n = source_len - offset;
     p = source_str + offset;
-    do {
+    while (1) {
+        /* To match Tcl, an empty pattern does not match at the end
+         * of the string.
+         */
+        if (n == 0 && !pattern[0]) {
+            break;
+        }
         int match = jim_regexec(regex, p, MAX_SUB_MATCHES, pmatch, regexec_flags);
 
         if (match >= REG_BADPAT) {
@@ -584,23 +590,22 @@ int Jim_RegsubCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv)
             break;
         }
 
-        /* If the pattern is empty, need to step forwards */
-        if (pattern[0] == '\0' && n) {
-            /* Need to copy the char we are moving over */
-            Jim_AppendString(interp, resultObj, p, 1);
-            p++;
-            n--;
-        }
-
+        regexec_flags = 0;
         if (pmatch[0].rm_eo == pmatch[0].rm_so) {
-            /* The match did not advance the string, so set REG_NOTBOL to force the next match */
-            regexec_flags = REG_NOTBOL;
-        }
-        else {
-            regexec_flags = 0;
+            /* Matched a zero length string. Need to avoid matching the same position again */
+            if (pattern[0] == '^') {
+                /* An anchored search sets REG_BOL */
+                regexec_flags = REG_NOTBOL;
+            }
+            else {
+                /* A non-anchored search advances by one char */
+                int charlen = utf8_charlen(p[0]);
+                Jim_AppendString(interp, resultObj, p, charlen);
+                p += charlen;
+                n -= charlen;
+            }
         }
-
-    } while (n);
+    }
 
     /*
      * Copy the portion of the string after the last match to the
diff --git a/tests/regexp.test b/tests/regexp.test
index f7c589d..0d86d6b 100644
--- a/tests/regexp.test
+++ b/tests/regexp.test
@@ -666,6 +666,16 @@ test regexp-21.15 {Replace literal backslash} {
     set value
 } "\\abc\\def"
 
+test regexp-21.16 {Replace nothing} {
+    regsub -all {x*} anything !
+} {!a!n!y!t!h!i!n!g!}
+
+test regexp-21.17 {Replace nothing via empty pattern} {
+    # Interestingly in this case Tcl does not match
+    # at end of string while the previous case does
+    regsub -all {} anything !
+} {!a!n!y!t!h!i!n!g}
+
 test regexp-22.1 {char range} {
     regexp -all -inline {[a-c]+} "defaaghbcadfbaacccd"
 } {aa bca baaccc}
diff --git a/tests/regexp2.test b/tests/regexp2.test
index 4d915c2..3f357b6 100644
--- a/tests/regexp2.test
+++ b/tests/regexp2.test
@@ -810,6 +810,12 @@ test regexpComp-21.10 {regexp command compiling tests} {
 #	list [regsub -all "" "" bar str] $str
 #    }
 #} {0 {}}
+test regexpComp-21.12 {regexp empty pattern with utf8} utf8 {
+    # Make sure the second char isn't sliced up
+    evalInProc {
+	regsub -all "" a\u0442bc !
+    }
+} "!a!\u0442!b!c"
 
 # We can forgive the underlying regexp engine for not supporting this.
 # Why not use this instead? "((^X)*|\$)"