aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSteve Bennett <steveb@workware.net.au>2024-09-21 08:33:42 +1000
committerSteve Bennett <steveb@workware.net.au>2025-03-13 10:26:16 +1000
commit70846641e4fc5494df1f4d10d93d73a31276b0f6 (patch)
treed5e18479a400d7d9f634ee525b20b80caf2f588b
parent6ebd6620fb9807932b81d696c2594c3f5a875990 (diff)
downloadjimtcl-70846641e4fc5494df1f4d10d93d73a31276b0f6.zip
jimtcl-70846641e4fc5494df1f4d10d93d73a31276b0f6.tar.gz
jimtcl-70846641e4fc5494df1f4d10d93d73a31276b0f6.tar.bz2
regexp, regsub: add support for -expanded
Fixes #311 Signed-off-by: Steve Bennett <steveb@workware.net.au>
-rw-r--r--jim-regexp.c26
-rw-r--r--jim_tcl.txt13
-rw-r--r--jimregexp.c27
-rw-r--r--jimregexp.h1
-rw-r--r--tests/regexp.test6
-rw-r--r--tests/regexp2.test22
6 files changed, 75 insertions, 20 deletions
diff --git a/jim-regexp.c b/jim-regexp.c
index 8516c80..28cede4 100644
--- a/jim-regexp.c
+++ b/jim-regexp.c
@@ -137,10 +137,10 @@ int Jim_RegexpCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv)
int eflags = 0;
int option;
enum {
- OPT_INDICES, OPT_NOCASE, OPT_LINE, OPT_ALL, OPT_INLINE, OPT_START, OPT_END
+ OPT_INDICES, OPT_NOCASE, OPT_LINE, OPT_ALL, OPT_INLINE, OPT_START, OPT_EXPANDED, OPT_END
};
static const char * const options[] = {
- "-indices", "-nocase", "-line", "-all", "-inline", "-start", "--", NULL
+ "-indices", "-nocase", "-line", "-all", "-inline", "-start", "-expanded", "--", NULL
};
for (i = 1; i < argc; i++) {
@@ -185,6 +185,15 @@ int Jim_RegexpCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv)
return JIM_ERR;
}
break;
+
+ case OPT_EXPANDED:
+#ifdef REG_EXPANDED
+ regcomp_flags |= REG_EXPANDED;
+ break;
+#else
+ Jim_SetResultFormatted(interp, "not supported: %#s", argv[i]);
+ return JIM_ERR;
+#endif
}
}
if (argc - i < 2) {
@@ -361,10 +370,10 @@ int Jim_RegsubCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv)
const char *pattern;
int option;
enum {
- OPT_NOCASE, OPT_LINE, OPT_ALL, OPT_START, OPT_COMMAND, OPT_END
+ OPT_NOCASE, OPT_LINE, OPT_ALL, OPT_START, OPT_COMMAND, OPT_EXPANDED, OPT_END
};
static const char * const options[] = {
- "-nocase", "-line", "-all", "-start", "-command", "--", NULL
+ "-nocase", "-line", "-all", "-start", "-command", "-expanded", "--", NULL
};
for (i = 1; i < argc; i++) {
@@ -405,6 +414,15 @@ int Jim_RegsubCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv)
case OPT_COMMAND:
opt_command = 1;
break;
+
+ case OPT_EXPANDED:
+#ifdef REG_EXPANDED
+ regcomp_flags |= REG_EXPANDED;
+ break;
+#else
+ Jim_SetResultFormatted(interp, "not supported: %#s", argv[i]);
+ return JIM_ERR;
+#endif
}
}
if (argc - i != 3 && argc - i != 4) {
diff --git a/jim_tcl.txt b/jim_tcl.txt
index 81ced03..275fe13 100644
--- a/jim_tcl.txt
+++ b/jim_tcl.txt
@@ -59,6 +59,7 @@ Changes since 0.83
#. Add `aio translation` support (and fconfigure -translation)
#. `exec` TIP 424 - support safer +exec | + syntax (also +open "|| pipeline..."+) (see https://core.tcl-lang.org/tips/doc/trunk/tip/424.md)
#. New `lsubst` command to create lists using subst-style substitution
+#. Add support for `regexp -expanded` and `regsub -expanded`
Changes between 0.82 and 0.83
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -3884,7 +3885,7 @@ See `aio read`
regexp
~~~~~~
-+*regexp ?-nocase? ?-line? ?-indices? ?-start* 'offset'? *?-all? ?-inline? ?--?* 'exp string ?matchVar? ?subMatchVar subMatchVar \...?'+
++*regexp ?-nocase? ?-line? ?-indices? ?-start* 'offset'? *?-all? ?-inline? ?-expanded? ?--?* 'exp string ?matchVar? ?subMatchVar subMatchVar \...?'+
Determines whether the regular expression +'exp'+ matches part or
all of +'string'+ and returns 1 if it does, 0 if it doesn't.
@@ -3957,13 +3958,17 @@ The following switches modify the behaviour of +'regexp'+
data, plus one element for each subexpression in the regular
expression.
++*-expanded*+::
+ Enables use of the expanded regular expression syntax where whitespace
+ and comments are ignored.
+
+*--*+::
Marks the end of switches. The argument following this one will be
treated as +'exp'+ even if it starts with a +-+.
regsub
~~~~~~
-+*regsub ?-nocase? ?-all? ?-line? ?-command? ?-start* 'offset'? ?*--*? 'exp string subSpec ?varName?'+
++*regsub ?-nocase? ?-all? ?-line? ?-command? ?-expanded? ?-start* 'offset'? ?*--*? 'exp string subSpec ?varName?'+
This command matches the regular expression +'exp'+ against
+'string'+ using the rules described in REGULAR EXPRESSIONS
@@ -4045,6 +4050,10 @@ The following switches modify the behaviour of +'regsub'+
start matching the regular expression. +'offset'+ will be
constrained to the bounds of the input string.
++*-expanded*+::
+ Enables use of the expanded regular expression syntax where whitespace
+ and comments are ignored.
+
+*--*+::
Marks the end of switches. The argument following this one will be
treated as +'exp'+ even if it starts with a +-+.
diff --git a/jimregexp.c b/jimregexp.c
index 136b0c0..38db210 100644
--- a/jimregexp.c
+++ b/jimregexp.c
@@ -948,6 +948,27 @@ cc_switch:
ret = regnode(preg, EXACTLY);
+ if (preg->cflags & REG_EXPANDED) {
+ /* Skip leading white space */
+ while ((ch = *preg->regparse) != 0) {
+ if (strchr(" \t\r\n\f\v", ch)) {
+ preg->regparse++;
+ continue;
+ }
+ break;
+ }
+ if (ch == '#') {
+ /* And skip comments to end of line */
+ preg->regparse++;
+ while ((ch = *preg->regparse) != 0) {
+ preg->regparse++;
+ if (ch == '\n') {
+ break;
+ }
+ }
+ }
+ }
+
/* Note that a META operator such as ? or * consumes the
* preceding char.
* Thus we must be careful to look ahead by 2 and add the
@@ -993,6 +1014,12 @@ cc_switch:
break;
}
+ /* For REG_EXPANDED, if we hit white space, stop */
+ if ((preg->cflags & REG_EXPANDED) && n == 1 && strchr(" \t\r\n\f\v", ch)) {
+ preg->regparse += n;
+ break;
+ }
+
/* No, so just add this char normally */
regc(preg, ch);
added++;
diff --git a/jimregexp.h b/jimregexp.h
index e18178e..2c81e68 100644
--- a/jimregexp.h
+++ b/jimregexp.h
@@ -73,6 +73,7 @@ typedef regexp regex_t;
#define REG_ICASE 2
#define REG_NOTBOL 16
+#define REG_EXPANDED 32
enum {
REG_NOERROR, /* Success. */
diff --git a/tests/regexp.test b/tests/regexp.test
index 7aeb72e..c7f949f 100644
--- a/tests/regexp.test
+++ b/tests/regexp.test
@@ -199,9 +199,9 @@ test regexp-6.1 {regexp errors} {
test regexp-6.2 {regexp errors} {
list [catch {regexp -nocase a} msg] $msg
} {1 {wrong # args: should be "regexp ?-switch ...? exp string ?matchVar? ?subMatchVar ...?"}}
-test regexp-6.3 {regexp errors} jim {
+test regexp-6.3 {regexp errors} -constraints jim -body {
list [catch {regexp -gorp a} msg] $msg
-} {1 {bad switch "-gorp": must be --, -all, -indices, -inline, -line, -nocase, or -start}}
+} -result {1 {bad switch "-gorp": must be --, -all, -expanded, -indices, -inline, -line, -nocase, or -start}}
test regexp-6.4 {regexp errors} {
catch {regexp a( b} msg
} 1
@@ -367,7 +367,7 @@ test regexp-11.4 {regsub errors} {
} {1 {wrong # args: should be "regsub ?-switch ...? exp string subSpec ?varName?"}}
test regexp-11.5 {regsub errors} -constraints jim -body {
list [catch {regsub -gorp a b c} msg] $msg
-} -result {1 {bad switch "-gorp": must be --, -all, -command, -line, -nocase, or -start}}
+} -result {1 {bad switch "-gorp": must be --, -all, -command, -expanded, -line, -nocase, or -start}}
test regexp-11.6 {regsub errors} {
catch {regsub -nocase a( b c d} msg
} 1
diff --git a/tests/regexp2.test b/tests/regexp2.test
index c3db17d..dfefe41 100644
--- a/tests/regexp2.test
+++ b/tests/regexp2.test
@@ -463,12 +463,12 @@ test regexpComp-9.6 {-all option to regsub} {
}
} {1 123xxx}
-#test regexpComp-10.1 {expanded syntax in regsub} {
-# evalInProc {
-# set foo xxx
-# list [regsub -expanded ". \#comment\n . \#comment2" abc def foo] $foo
-# }
-#} {1 defc}
+test regexpComp-10.1 {expanded syntax in regsub} {
+ evalInProc {
+ set foo xxx
+ list [regsub -expanded ". \#comment\n . \#comment2" abc def foo] $foo
+ }
+} {1 defc}
test regexpComp-10.2 {newline sensitivity in regsub} {
evalInProc {
set foo xxx
@@ -523,11 +523,11 @@ test regexpComp-11.4 {regsub errors} {
list [catch {regsub a b c d e f} msg] $msg
}
} {1 {wrong # args: should be "regsub ?-switch ...? exp string subSpec ?varName?"}}
-#test regexpComp-11.5 {regsub errors} {
-# evalInProc {
-# list [catch {regsub -gorp a b c} msg] $msg
-# }
-#} {1 {bad switch "-gorp": must be -all, -nocase, -expanded, -line, -linestop, -lineanchor, -start, or --}}
+test regexpComp-11.5 {regsub errors} {
+ evalInProc {
+ list [catch {regsub -gorp a b c} msg] $msg
+ }
+} {1 {bad switch "-gorp": must be --, -all, -command, -expanded, -line, -nocase, or -start}}
test regexpComp-11.6 {regsub errors} {
evalInProc {
list [catch {regsub -nocase a( b c d} msg] $msg