diff options
author | Steve Bennett <steveb@workware.net.au> | 2019-10-27 20:46:56 +1000 |
---|---|---|
committer | Steve Bennett <steveb@workware.net.au> | 2019-10-27 20:46:56 +1000 |
commit | 38c389a29e8ff582ba32c7737bb90f0f0df12078 (patch) | |
tree | 3b23581f8e1465771aa45764a541a531e9f4c54b | |
parent | 80fea1f6e6c5af588faa9159e165198a13983efe (diff) | |
download | jimtcl-38c389a29e8ff582ba32c7737bb90f0f0df12078.zip jimtcl-38c389a29e8ff582ba32c7737bb90f0f0df12078.tar.gz jimtcl-38c389a29e8ff582ba32c7737bb90f0f0df12078.tar.bz2 |
regexp, regsub: -start is a character index
Not a byte index
Reported-by: dbohdan <dbohdan@dbohdan.com>
Signed-off-by: Steve Bennett <steveb@workware.net.au>
-rw-r--r-- | jim-regexp.c | 5 | ||||
-rw-r--r-- | tests/regexp2.test | 7 |
2 files changed, 10 insertions, 2 deletions
diff --git a/jim-regexp.c b/jim-regexp.c index 771773a..81f3207 100644 --- a/jim-regexp.c +++ b/jim-regexp.c @@ -55,6 +55,7 @@ #include <regex.h> #endif #include "jim.h" +#include "utf8.h" static void FreeRegexpInternalRep(Jim_Interp *interp, Jim_Obj *objPtr) { @@ -226,7 +227,7 @@ int Jim_RegexpCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv) source_str += source_len; } else if (offset > 0) { - source_str += offset; + source_str += utf8_index(source_str, offset); } eflags |= REG_NOTBOL; } @@ -441,6 +442,8 @@ int Jim_RegsubCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv) offset = 0; } } + /* Convert from character offset to byte offset */ + offset = utf8_index(source_str, offset); /* Copy the part before -start */ Jim_AppendString(interp, resultObj, source_str, offset); diff --git a/tests/regexp2.test b/tests/regexp2.test index f7cf516..76735e6 100644 --- a/tests/regexp2.test +++ b/tests/regexp2.test @@ -627,7 +627,12 @@ test regexpComp-16.3 {regsub -start} { # lappend out [regsub -start 0 -all {\A(\w)} {abcde} {/\1} x] $x # lappend out [regsub -start 2 -all {\A(\w)} {abcde} {/\1} x] $x #} {5 /a/b/c/d/e 3 ab/c/d/e} - +test regexpComp-16.5 {regexp -start with utf8} utf8 { + regexp -inline -start 1 . \u0442\u0435\u0441\u0442 +} \u0435 +test regexpComp-16.6 {regexp -start with utf8} utf8 { + regsub -start 1 . \u0442\u0435\u0441\u0442 x +} \u0442x\u0441\u0442 test regexpComp-17.1 {regexp -inline} { regexp -inline b ababa } {b} |