diff options
author | Steve Bennett <steveb@workware.net.au> | 2024-09-21 08:33:42 +1000 |
---|---|---|
committer | Steve Bennett <steveb@workware.net.au> | 2025-07-16 09:34:08 +1000 |
commit | 1eff7d23a8259d12495cf6a1480ef3f62c8b347d (patch) | |
tree | 927065e39c7de5b07fb7f630df7a7ba048f41698 | |
parent | 84dad5df4943195f798a2dd816832a0757c755c1 (diff) | |
download | jimtcl-1eff7d23a8259d12495cf6a1480ef3f62c8b347d.zip jimtcl-1eff7d23a8259d12495cf6a1480ef3f62c8b347d.tar.gz jimtcl-1eff7d23a8259d12495cf6a1480ef3f62c8b347d.tar.bz2 |
regexp, regsub: add support for -expanded
Fixes #311
Signed-off-by: Steve Bennett <steveb@workware.net.au>
-rw-r--r-- | jim-regexp.c | 26 | ||||
-rw-r--r-- | jim_tcl.txt | 12 | ||||
-rw-r--r-- | jimregexp.c | 43 | ||||
-rw-r--r-- | jimregexp.h | 2 | ||||
-rw-r--r-- | tests/regexp.test | 6 | ||||
-rw-r--r-- | tests/regexp2.test | 26 |
6 files changed, 94 insertions, 21 deletions
diff --git a/jim-regexp.c b/jim-regexp.c index 8516c80..28cede4 100644 --- a/jim-regexp.c +++ b/jim-regexp.c @@ -137,10 +137,10 @@ int Jim_RegexpCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv) int eflags = 0; int option; enum { - OPT_INDICES, OPT_NOCASE, OPT_LINE, OPT_ALL, OPT_INLINE, OPT_START, OPT_END + OPT_INDICES, OPT_NOCASE, OPT_LINE, OPT_ALL, OPT_INLINE, OPT_START, OPT_EXPANDED, OPT_END }; static const char * const options[] = { - "-indices", "-nocase", "-line", "-all", "-inline", "-start", "--", NULL + "-indices", "-nocase", "-line", "-all", "-inline", "-start", "-expanded", "--", NULL }; for (i = 1; i < argc; i++) { @@ -185,6 +185,15 @@ int Jim_RegexpCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv) return JIM_ERR; } break; + + case OPT_EXPANDED: +#ifdef REG_EXPANDED + regcomp_flags |= REG_EXPANDED; + break; +#else + Jim_SetResultFormatted(interp, "not supported: %#s", argv[i]); + return JIM_ERR; +#endif } } if (argc - i < 2) { @@ -361,10 +370,10 @@ int Jim_RegsubCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv) const char *pattern; int option; enum { - OPT_NOCASE, OPT_LINE, OPT_ALL, OPT_START, OPT_COMMAND, OPT_END + OPT_NOCASE, OPT_LINE, OPT_ALL, OPT_START, OPT_COMMAND, OPT_EXPANDED, OPT_END }; static const char * const options[] = { - "-nocase", "-line", "-all", "-start", "-command", "--", NULL + "-nocase", "-line", "-all", "-start", "-command", "-expanded", "--", NULL }; for (i = 1; i < argc; i++) { @@ -405,6 +414,15 @@ int Jim_RegsubCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv) case OPT_COMMAND: opt_command = 1; break; + + case OPT_EXPANDED: +#ifdef REG_EXPANDED + regcomp_flags |= REG_EXPANDED; + break; +#else + Jim_SetResultFormatted(interp, "not supported: %#s", argv[i]); + return JIM_ERR; +#endif } } if (argc - i != 3 && argc - i != 4) { diff --git a/jim_tcl.txt b/jim_tcl.txt index fd58d50..6975401 100644 --- a/jim_tcl.txt +++ b/jim_tcl.txt @@ -3901,7 +3901,7 @@ See `aio read` regexp ~~~~~~ -+*regexp ?-nocase? ?-line? ?-indices? ?-start* 'offset'? *?-all? ?-inline? ?--?* 'exp string ?matchVar? ?subMatchVar subMatchVar \...?'+ ++*regexp ?-nocase? ?-line? ?-indices? ?-start* 'offset'? *?-all? ?-inline? ?-expanded? ?--?* 'exp string ?matchVar? ?subMatchVar subMatchVar \...?'+ Determines whether the regular expression +'exp'+ matches part or all of +'string'+ and returns 1 if it does, 0 if it doesn't. @@ -3974,13 +3974,17 @@ The following switches modify the behaviour of +'regexp'+ data, plus one element for each subexpression in the regular expression. ++*-expanded*+:: + Enables use of the expanded regular expression syntax where whitespace + and comments are ignored. + +*--*+:: Marks the end of switches. The argument following this one will be treated as +'exp'+ even if it starts with a +-+. regsub ~~~~~~ -+*regsub ?-nocase? ?-all? ?-line? ?-command? ?-start* 'offset'? ?*--*? 'exp string subSpec ?varName?'+ ++*regsub ?-nocase? ?-all? ?-line? ?-command? ?-expanded? ?-start* 'offset'? ?*--*? 'exp string subSpec ?varName?'+ This command matches the regular expression +'exp'+ against +'string'+ using the rules described in REGULAR EXPRESSIONS @@ -4062,6 +4066,10 @@ The following switches modify the behaviour of +'regsub'+ start matching the regular expression. +'offset'+ will be constrained to the bounds of the input string. ++*-expanded*+:: + Enables use of the expanded regular expression syntax where whitespace + and comments are ignored. + +*--*+:: Marks the end of switches. The argument following this one will be treated as +'exp'+ even if it starts with a +-+. diff --git a/jimregexp.c b/jimregexp.c index 136b0c0..1fe6d8b 100644 --- a/jimregexp.c +++ b/jimregexp.c @@ -1,5 +1,5 @@ /* - * vi:se ts=8: + * vi:se ts=8 sw=8: * * regcomp and regexec -- regsub and regerror are elsewhere * @@ -216,6 +216,41 @@ static int str_int_len(const int *seq) return n; } +/* skips preg->regparse past white space and comments to end of line if REG_EXPANDED */ +static char *reg_expanded_new_pattern(const char *exp) +{ + /* Make a copy and do removal in place as the final will always be no longer than the original */ + char *newexp = strdup(exp); + char *d = newexp; + const char *s = exp; + int escape = 0; + + while (*s) { + if (escape) { + escape = 0; + continue; + } + else if (*s == '\\') { + escape = 1; + } + else if (strchr(" \t\r\n\f\v", *s)) { + s++; + continue; + } + else if (*s == '#') { + /* skip comments to end of line */ + s++; + while (*s && *s != '\n') { + s++; + } + continue; + } + *d++ = *s++; + } + *d++ = '\0'; + return newexp; +} + /* - regcomp - compile a regular expression into internal code * @@ -246,6 +281,11 @@ int jim_regcomp(regex_t *preg, const char *exp, int cflags) if (exp == NULL) FAIL(preg, REG_ERR_NULL_ARGUMENT); + if (cflags & REG_EXPANDED) { + preg->exp = reg_expanded_new_pattern(exp); + exp = preg->exp; + } + /* First pass: determine size, legality. */ preg->cflags = cflags; preg->regparse = exp; @@ -1911,6 +1951,7 @@ size_t jim_regerror(int errcode, const regex_t *preg, char *errbuf, size_t errb void jim_regfree(regex_t *preg) { + free(preg->exp); free(preg->program); } diff --git a/jimregexp.h b/jimregexp.h index e18178e..86a94f6 100644 --- a/jimregexp.h +++ b/jimregexp.h @@ -49,6 +49,7 @@ typedef struct regexp { int regmust; /* Internal use only. */ int regmlen; /* Internal use only. */ int *program; /* Allocated */ + char *exp; /* NULL or allocated version of regcomp expression (for REG_EXPANDED) */ /* working state - compile */ const char *regparse; /* Input-scan pointer. */ @@ -73,6 +74,7 @@ typedef regexp regex_t; #define REG_ICASE 2 #define REG_NOTBOL 16 +#define REG_EXPANDED 32 enum { REG_NOERROR, /* Success. */ diff --git a/tests/regexp.test b/tests/regexp.test index 7aeb72e..c7f949f 100644 --- a/tests/regexp.test +++ b/tests/regexp.test @@ -199,9 +199,9 @@ test regexp-6.1 {regexp errors} { test regexp-6.2 {regexp errors} { list [catch {regexp -nocase a} msg] $msg } {1 {wrong # args: should be "regexp ?-switch ...? exp string ?matchVar? ?subMatchVar ...?"}} -test regexp-6.3 {regexp errors} jim { +test regexp-6.3 {regexp errors} -constraints jim -body { list [catch {regexp -gorp a} msg] $msg -} {1 {bad switch "-gorp": must be --, -all, -indices, -inline, -line, -nocase, or -start}} +} -result {1 {bad switch "-gorp": must be --, -all, -expanded, -indices, -inline, -line, -nocase, or -start}} test regexp-6.4 {regexp errors} { catch {regexp a( b} msg } 1 @@ -367,7 +367,7 @@ test regexp-11.4 {regsub errors} { } {1 {wrong # args: should be "regsub ?-switch ...? exp string subSpec ?varName?"}} test regexp-11.5 {regsub errors} -constraints jim -body { list [catch {regsub -gorp a b c} msg] $msg -} -result {1 {bad switch "-gorp": must be --, -all, -command, -line, -nocase, or -start}} +} -result {1 {bad switch "-gorp": must be --, -all, -command, -expanded, -line, -nocase, or -start}} test regexp-11.6 {regsub errors} { catch {regsub -nocase a( b c d} msg } 1 diff --git a/tests/regexp2.test b/tests/regexp2.test index c3db17d..ebcf01b 100644 --- a/tests/regexp2.test +++ b/tests/regexp2.test @@ -463,12 +463,12 @@ test regexpComp-9.6 {-all option to regsub} { } } {1 123xxx} -#test regexpComp-10.1 {expanded syntax in regsub} { -# evalInProc { -# set foo xxx -# list [regsub -expanded ". \#comment\n . \#comment2" abc def foo] $foo -# } -#} {1 defc} +test regexpComp-10.1 {expanded syntax in regsub} { + evalInProc { + set foo xxx + list [regsub -expanded ". \#comment\n . \#comment2" abc def foo] $foo + } +} {1 defc} test regexpComp-10.2 {newline sensitivity in regsub} { evalInProc { set foo xxx @@ -523,11 +523,11 @@ test regexpComp-11.4 {regsub errors} { list [catch {regsub a b c d e f} msg] $msg } } {1 {wrong # args: should be "regsub ?-switch ...? exp string subSpec ?varName?"}} -#test regexpComp-11.5 {regsub errors} { -# evalInProc { -# list [catch {regsub -gorp a b c} msg] $msg -# } -#} {1 {bad switch "-gorp": must be -all, -nocase, -expanded, -line, -linestop, -lineanchor, -start, or --}} +test regexpComp-11.5 {regsub errors} { + evalInProc { + list [catch {regsub -gorp a b c} msg] $msg + } +} {1 {bad switch "-gorp": must be --, -all, -command, -expanded, -line, -nocase, or -start}} test regexpComp-11.6 {regsub errors} { evalInProc { list [catch {regsub -nocase a( b c d} msg] $msg @@ -959,4 +959,8 @@ test regexp-26.3 {regexp operator =~ invalid regexp} -body { expr {"abc" =~ {[}} } -returnCodes error -result {couldn't compile regular expression pattern: brackets [] not balanced} +test regexp-27.1 {regexp expanded} -body { + regexp -expanded -all -inline { a ( b b ) + } {abbbbbbcde} +} -returnCodes ok -result {abbbbbb bb} + testreport |