diff options
author | Steve Bennett <steveb@workware.net.au> | 2024-09-21 08:33:42 +1000 |
---|---|---|
committer | Steve Bennett <steveb@workware.net.au> | 2025-03-13 10:26:16 +1000 |
commit | 70846641e4fc5494df1f4d10d93d73a31276b0f6 (patch) | |
tree | d5e18479a400d7d9f634ee525b20b80caf2f588b | |
parent | 6ebd6620fb9807932b81d696c2594c3f5a875990 (diff) | |
download | jimtcl-70846641e4fc5494df1f4d10d93d73a31276b0f6.zip jimtcl-70846641e4fc5494df1f4d10d93d73a31276b0f6.tar.gz jimtcl-70846641e4fc5494df1f4d10d93d73a31276b0f6.tar.bz2 |
regexp, regsub: add support for -expanded
Fixes #311
Signed-off-by: Steve Bennett <steveb@workware.net.au>
-rw-r--r-- | jim-regexp.c | 26 | ||||
-rw-r--r-- | jim_tcl.txt | 13 | ||||
-rw-r--r-- | jimregexp.c | 27 | ||||
-rw-r--r-- | jimregexp.h | 1 | ||||
-rw-r--r-- | tests/regexp.test | 6 | ||||
-rw-r--r-- | tests/regexp2.test | 22 |
6 files changed, 75 insertions, 20 deletions
diff --git a/jim-regexp.c b/jim-regexp.c index 8516c80..28cede4 100644 --- a/jim-regexp.c +++ b/jim-regexp.c @@ -137,10 +137,10 @@ int Jim_RegexpCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv) int eflags = 0; int option; enum { - OPT_INDICES, OPT_NOCASE, OPT_LINE, OPT_ALL, OPT_INLINE, OPT_START, OPT_END + OPT_INDICES, OPT_NOCASE, OPT_LINE, OPT_ALL, OPT_INLINE, OPT_START, OPT_EXPANDED, OPT_END }; static const char * const options[] = { - "-indices", "-nocase", "-line", "-all", "-inline", "-start", "--", NULL + "-indices", "-nocase", "-line", "-all", "-inline", "-start", "-expanded", "--", NULL }; for (i = 1; i < argc; i++) { @@ -185,6 +185,15 @@ int Jim_RegexpCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv) return JIM_ERR; } break; + + case OPT_EXPANDED: +#ifdef REG_EXPANDED + regcomp_flags |= REG_EXPANDED; + break; +#else + Jim_SetResultFormatted(interp, "not supported: %#s", argv[i]); + return JIM_ERR; +#endif } } if (argc - i < 2) { @@ -361,10 +370,10 @@ int Jim_RegsubCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv) const char *pattern; int option; enum { - OPT_NOCASE, OPT_LINE, OPT_ALL, OPT_START, OPT_COMMAND, OPT_END + OPT_NOCASE, OPT_LINE, OPT_ALL, OPT_START, OPT_COMMAND, OPT_EXPANDED, OPT_END }; static const char * const options[] = { - "-nocase", "-line", "-all", "-start", "-command", "--", NULL + "-nocase", "-line", "-all", "-start", "-command", "-expanded", "--", NULL }; for (i = 1; i < argc; i++) { @@ -405,6 +414,15 @@ int Jim_RegsubCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv) case OPT_COMMAND: opt_command = 1; break; + + case OPT_EXPANDED: +#ifdef REG_EXPANDED + regcomp_flags |= REG_EXPANDED; + break; +#else + Jim_SetResultFormatted(interp, "not supported: %#s", argv[i]); + return JIM_ERR; +#endif } } if (argc - i != 3 && argc - i != 4) { diff --git a/jim_tcl.txt b/jim_tcl.txt index 81ced03..275fe13 100644 --- a/jim_tcl.txt +++ b/jim_tcl.txt @@ -59,6 +59,7 @@ Changes since 0.83 #. Add `aio translation` support (and fconfigure -translation) #. `exec` TIP 424 - support safer +exec | + syntax (also +open "|| pipeline..."+) (see https://core.tcl-lang.org/tips/doc/trunk/tip/424.md) #. New `lsubst` command to create lists using subst-style substitution +#. Add support for `regexp -expanded` and `regsub -expanded` Changes between 0.82 and 0.83 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -3884,7 +3885,7 @@ See `aio read` regexp ~~~~~~ -+*regexp ?-nocase? ?-line? ?-indices? ?-start* 'offset'? *?-all? ?-inline? ?--?* 'exp string ?matchVar? ?subMatchVar subMatchVar \...?'+ ++*regexp ?-nocase? ?-line? ?-indices? ?-start* 'offset'? *?-all? ?-inline? ?-expanded? ?--?* 'exp string ?matchVar? ?subMatchVar subMatchVar \...?'+ Determines whether the regular expression +'exp'+ matches part or all of +'string'+ and returns 1 if it does, 0 if it doesn't. @@ -3957,13 +3958,17 @@ The following switches modify the behaviour of +'regexp'+ data, plus one element for each subexpression in the regular expression. ++*-expanded*+:: + Enables use of the expanded regular expression syntax where whitespace + and comments are ignored. + +*--*+:: Marks the end of switches. The argument following this one will be treated as +'exp'+ even if it starts with a +-+. regsub ~~~~~~ -+*regsub ?-nocase? ?-all? ?-line? ?-command? ?-start* 'offset'? ?*--*? 'exp string subSpec ?varName?'+ ++*regsub ?-nocase? ?-all? ?-line? ?-command? ?-expanded? ?-start* 'offset'? ?*--*? 'exp string subSpec ?varName?'+ This command matches the regular expression +'exp'+ against +'string'+ using the rules described in REGULAR EXPRESSIONS @@ -4045,6 +4050,10 @@ The following switches modify the behaviour of +'regsub'+ start matching the regular expression. +'offset'+ will be constrained to the bounds of the input string. ++*-expanded*+:: + Enables use of the expanded regular expression syntax where whitespace + and comments are ignored. + +*--*+:: Marks the end of switches. The argument following this one will be treated as +'exp'+ even if it starts with a +-+. diff --git a/jimregexp.c b/jimregexp.c index 136b0c0..38db210 100644 --- a/jimregexp.c +++ b/jimregexp.c @@ -948,6 +948,27 @@ cc_switch: ret = regnode(preg, EXACTLY); + if (preg->cflags & REG_EXPANDED) { + /* Skip leading white space */ + while ((ch = *preg->regparse) != 0) { + if (strchr(" \t\r\n\f\v", ch)) { + preg->regparse++; + continue; + } + break; + } + if (ch == '#') { + /* And skip comments to end of line */ + preg->regparse++; + while ((ch = *preg->regparse) != 0) { + preg->regparse++; + if (ch == '\n') { + break; + } + } + } + } + /* Note that a META operator such as ? or * consumes the * preceding char. * Thus we must be careful to look ahead by 2 and add the @@ -993,6 +1014,12 @@ cc_switch: break; } + /* For REG_EXPANDED, if we hit white space, stop */ + if ((preg->cflags & REG_EXPANDED) && n == 1 && strchr(" \t\r\n\f\v", ch)) { + preg->regparse += n; + break; + } + /* No, so just add this char normally */ regc(preg, ch); added++; diff --git a/jimregexp.h b/jimregexp.h index e18178e..2c81e68 100644 --- a/jimregexp.h +++ b/jimregexp.h @@ -73,6 +73,7 @@ typedef regexp regex_t; #define REG_ICASE 2 #define REG_NOTBOL 16 +#define REG_EXPANDED 32 enum { REG_NOERROR, /* Success. */ diff --git a/tests/regexp.test b/tests/regexp.test index 7aeb72e..c7f949f 100644 --- a/tests/regexp.test +++ b/tests/regexp.test @@ -199,9 +199,9 @@ test regexp-6.1 {regexp errors} { test regexp-6.2 {regexp errors} { list [catch {regexp -nocase a} msg] $msg } {1 {wrong # args: should be "regexp ?-switch ...? exp string ?matchVar? ?subMatchVar ...?"}} -test regexp-6.3 {regexp errors} jim { +test regexp-6.3 {regexp errors} -constraints jim -body { list [catch {regexp -gorp a} msg] $msg -} {1 {bad switch "-gorp": must be --, -all, -indices, -inline, -line, -nocase, or -start}} +} -result {1 {bad switch "-gorp": must be --, -all, -expanded, -indices, -inline, -line, -nocase, or -start}} test regexp-6.4 {regexp errors} { catch {regexp a( b} msg } 1 @@ -367,7 +367,7 @@ test regexp-11.4 {regsub errors} { } {1 {wrong # args: should be "regsub ?-switch ...? exp string subSpec ?varName?"}} test regexp-11.5 {regsub errors} -constraints jim -body { list [catch {regsub -gorp a b c} msg] $msg -} -result {1 {bad switch "-gorp": must be --, -all, -command, -line, -nocase, or -start}} +} -result {1 {bad switch "-gorp": must be --, -all, -command, -expanded, -line, -nocase, or -start}} test regexp-11.6 {regsub errors} { catch {regsub -nocase a( b c d} msg } 1 diff --git a/tests/regexp2.test b/tests/regexp2.test index c3db17d..dfefe41 100644 --- a/tests/regexp2.test +++ b/tests/regexp2.test @@ -463,12 +463,12 @@ test regexpComp-9.6 {-all option to regsub} { } } {1 123xxx} -#test regexpComp-10.1 {expanded syntax in regsub} { -# evalInProc { -# set foo xxx -# list [regsub -expanded ". \#comment\n . \#comment2" abc def foo] $foo -# } -#} {1 defc} +test regexpComp-10.1 {expanded syntax in regsub} { + evalInProc { + set foo xxx + list [regsub -expanded ". \#comment\n . \#comment2" abc def foo] $foo + } +} {1 defc} test regexpComp-10.2 {newline sensitivity in regsub} { evalInProc { set foo xxx @@ -523,11 +523,11 @@ test regexpComp-11.4 {regsub errors} { list [catch {regsub a b c d e f} msg] $msg } } {1 {wrong # args: should be "regsub ?-switch ...? exp string subSpec ?varName?"}} -#test regexpComp-11.5 {regsub errors} { -# evalInProc { -# list [catch {regsub -gorp a b c} msg] $msg -# } -#} {1 {bad switch "-gorp": must be -all, -nocase, -expanded, -line, -linestop, -lineanchor, -start, or --}} +test regexpComp-11.5 {regsub errors} { + evalInProc { + list [catch {regsub -gorp a b c} msg] $msg + } +} {1 {bad switch "-gorp": must be --, -all, -command, -expanded, -line, -nocase, or -start}} test regexpComp-11.6 {regsub errors} { evalInProc { list [catch {regsub -nocase a( b c d} msg] $msg |