regexp -indices should return character indices

Not byte indices Reported-by: dbohdan <dbohdan@dbohdan.com> Signed-off-by: Steve Bennett <steveb@workware.net.au>
author: Steve Bennett <steveb@workware.net.au> 2019-10-27 21:17:42 +1000
committer: Steve Bennett <steveb@workware.net.au> 2019-10-27 21:17:42 +1000
commit: 9eba9e87fc3ed257bf82c1bc7e2f77dba1aaf9a5 (patch)
tree: a439f53d186ac7431935b91c4b52a8f7d38eb7dc /jim-regexp.c
parent: 38c389a29e8ff582ba32c7737bb90f0f0df12078 (diff)
download: jimtcl-9eba9e87fc3ed257bf82c1bc7e2f77dba1aaf9a5.zip
jimtcl-9eba9e87fc3ed257bf82c1bc7e2f77dba1aaf9a5.tar.gz
jimtcl-9eba9e87fc3ed257bf82c1bc7e2f77dba1aaf9a5.tar.bz2
1 files changed, 7 insertions, 8 deletions
diff --git a/jim-regexp.c b/jim-regexp.c
index 81f3207..3134598 100644
--- a/jim-regexp.c
+++ b/jim-regexp.c
@@ -281,16 +281,15 @@ int Jim_RegexpCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv)
             }
         }
         else {
-            int len = pmatch[j].rm_eo - pmatch[j].rm_so;
-
             if (opt_indices) {
-                Jim_ListAppendElement(interp, resultObj, Jim_NewIntObj(interp,
-                        offset + pmatch[j].rm_so));
-                Jim_ListAppendElement(interp, resultObj, Jim_NewIntObj(interp,
-                        offset + pmatch[j].rm_so + len - 1));
+                /* rm_so and rm_eo are byte offsets. We need char offsets */
+                int so = utf8_strlen(source_str, pmatch[j].rm_so);
+                int eo = utf8_strlen(source_str + pmatch[j].rm_so, pmatch[j].rm_eo);
+                Jim_ListAppendElement(interp, resultObj, Jim_NewIntObj(interp, offset + so));
+                Jim_ListAppendElement(interp, resultObj, Jim_NewIntObj(interp, offset + eo - 1));
             }
             else {
-                Jim_AppendString(interp, resultObj, source_str + pmatch[j].rm_so, len);
+                Jim_AppendString(interp, resultObj, source_str + pmatch[j].rm_so, pmatch[j].rm_eo - pmatch[j].rm_so);
             }
         }
 
@@ -311,7 +310,7 @@ int Jim_RegexpCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv)
   try_next_match:
     if (opt_all && (pattern[0] != '^' || (regcomp_flags & REG_NEWLINE)) && *source_str) {
         if (pmatch[0].rm_eo) {
-            offset += pmatch[0].rm_eo;
+            offset += utf8_strlen(source_str, pmatch[0].rm_eo);
             source_str += pmatch[0].rm_eo;
         }
         else {
author	Steve Bennett <steveb@workware.net.au>	2019-10-27 21:17:42 +1000
committer	Steve Bennett <steveb@workware.net.au>	2019-10-27 21:17:42 +1000
commit	9eba9e87fc3ed257bf82c1bc7e2f77dba1aaf9a5 (patch)
tree	a439f53d186ac7431935b91c4b52a8f7d38eb7dc /jim-regexp.c
parent	38c389a29e8ff582ba32c7737bb90f0f0df12078 (diff)
download	jimtcl-9eba9e87fc3ed257bf82c1bc7e2f77dba1aaf9a5.zip jimtcl-9eba9e87fc3ed257bf82c1bc7e2f77dba1aaf9a5.tar.gz jimtcl-9eba9e87fc3ed257bf82c1bc7e2f77dba1aaf9a5.tar.bz2