From a942eef12415e0b53e04279259dd92cca4d444f0 Mon Sep 17 00:00:00 2001 From: Steve Bennett Date: Mon, 4 May 2020 15:02:19 +1000 Subject: regsub: Fix regsub -all with \A regsub -all matches \A against the start of each match, but correctly handle a pattern like ^ which does not advance the match Signed-off-by: Steve Bennett --- jim-regexp.c | 9 ++++++++- jim_tcl.txt | 10 +++++----- tests/regexp.test | 10 +++++----- 3 files changed, 18 insertions(+), 11 deletions(-) diff --git a/jim-regexp.c b/jim-regexp.c index b0411f8..880bf33 100644 --- a/jim-regexp.c +++ b/jim-regexp.c @@ -540,7 +540,14 @@ int Jim_RegsubCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv) n--; } - regexec_flags |= REG_NOTBOL; + if (pmatch[0].rm_eo == pmatch[0].rm_so) { + /* The match did not advance the string, so set REG_NOTBOL to force the next match */ + regexec_flags = REG_NOTBOL; + } + else { + regexec_flags = 0; + } + } while (n); /* diff --git a/jim_tcl.txt b/jim_tcl.txt index 159dea9..0ba2e5b 100644 --- a/jim_tcl.txt +++ b/jim_tcl.txt @@ -61,6 +61,7 @@ Changes between 0.78 and 0.79 5. Add support for `file split` 6. Add support for `json::encode` and `json::decode` 7. `aio tty` now allows setting +echo+ without full +raw+ mode +8. `regsub` now fully supports +{backslash}A+ Changes between 0.77 and 0.78 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -1088,12 +1089,11 @@ and POSIX are highlighted below. 2. All Tcl character classes are supported (e.g. +[:alnum:]+, +[:digit:]+, +[:space:]+), but... 3. Character classes apply to ASCII characters only 4. Supported shorthand character classes: +{backslash}w+ = +[:alnum:]+, +{backslash}W+ = +^[:alnum:]+, +{backslash}d+ = +[:digit:],+ +{backslash}D+ = +^[:digit:],+ +{backslash}s+ = +[:space:]+, + +{backslash}S+ = +^[:space:]+ -5. Supported constraint escapes: +{backslash}m+ = +{backslash}<+ = start of word, +{backslash}M+ = +{backslash}>+ = end of word +5. Supported constraint escapes: +{backslash}m+ = +{backslash}<+ = start of word, +{backslash}M+ = +{backslash}>+ = end of word, +{backslash}A+ = start of string, +{backslash}Z+ = end of string 6. Backslash escapes may be used within regular expressions, such as +{backslash}n+ = newline, +{backslash}uNNNN+ = unicode -7. Partially supported constraint escapes: +{backslash}A+ = start of string, +{backslash}Z+ = end of string -8. Support for the +?+ non-greedy quantifier. e.g. +*?+ -9. Support for non-capturing parentheses +(?:...)+ -10. Jim Tcl considers that both patterns and strings end at a null character (+\x00+) +7. Support for the +?+ non-greedy quantifier. e.g. +*?+ +8. Support for non-capturing parentheses +(?:...)+ +9. Jim Tcl considers that both patterns and strings end at a null character (+\x00+) STRING MATCHING --------------- diff --git a/tests/regexp.test b/tests/regexp.test index c6133e7..45eddbb 100644 --- a/tests/regexp.test +++ b/tests/regexp.test @@ -489,11 +489,11 @@ test regexp-16.3 {regsub -start} { catch {unset x} list [regsub -all -start 3 {z} hello {/&} x] $x } {0 hello} -#test regexp-16.4 {regsub -start, \A behavior} { -# set out {} -# lappend out [regsub -start 0 -all {\A(\w)} {abcde} {/\1} x] $x -# lappend out [regsub -start 2 -all {\A(\w)} {abcde} {/\1} x] $x -#} {5 /a/b/c/d/e 3 ab/c/d/e} +test regexp-16.4 {regsub -start, \A behavior} { + set out {} + lappend out [regsub -start 0 -all {\A(\w)} {abcde} {/\1} x] $x + lappend out [regsub -start 2 -all {\A(\w)} {abcde} {/\1} x] $x +} {5 /a/b/c/d/e 3 ab/c/d/e} test regexp-16.5 {regsub -start, double option} { list [regsub -start 2 -start 0 a abc c x] $x } {1 cbc} -- cgit v1.1