aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSteve Bennett <steveb@workware.net.au>2024-09-21 08:33:42 +1000
committerSteve Bennett <steveb@workware.net.au>2025-07-16 09:34:08 +1000
commit1eff7d23a8259d12495cf6a1480ef3f62c8b347d (patch)
tree927065e39c7de5b07fb7f630df7a7ba048f41698
parent84dad5df4943195f798a2dd816832a0757c755c1 (diff)
downloadjimtcl-1eff7d23a8259d12495cf6a1480ef3f62c8b347d.zip
jimtcl-1eff7d23a8259d12495cf6a1480ef3f62c8b347d.tar.gz
jimtcl-1eff7d23a8259d12495cf6a1480ef3f62c8b347d.tar.bz2
regexp, regsub: add support for -expanded
Fixes #311 Signed-off-by: Steve Bennett <steveb@workware.net.au>
-rw-r--r--jim-regexp.c26
-rw-r--r--jim_tcl.txt12
-rw-r--r--jimregexp.c43
-rw-r--r--jimregexp.h2
-rw-r--r--tests/regexp.test6
-rw-r--r--tests/regexp2.test26
6 files changed, 94 insertions, 21 deletions
diff --git a/jim-regexp.c b/jim-regexp.c
index 8516c80..28cede4 100644
--- a/jim-regexp.c
+++ b/jim-regexp.c
@@ -137,10 +137,10 @@ int Jim_RegexpCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv)
int eflags = 0;
int option;
enum {
- OPT_INDICES, OPT_NOCASE, OPT_LINE, OPT_ALL, OPT_INLINE, OPT_START, OPT_END
+ OPT_INDICES, OPT_NOCASE, OPT_LINE, OPT_ALL, OPT_INLINE, OPT_START, OPT_EXPANDED, OPT_END
};
static const char * const options[] = {
- "-indices", "-nocase", "-line", "-all", "-inline", "-start", "--", NULL
+ "-indices", "-nocase", "-line", "-all", "-inline", "-start", "-expanded", "--", NULL
};
for (i = 1; i < argc; i++) {
@@ -185,6 +185,15 @@ int Jim_RegexpCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv)
return JIM_ERR;
}
break;
+
+ case OPT_EXPANDED:
+#ifdef REG_EXPANDED
+ regcomp_flags |= REG_EXPANDED;
+ break;
+#else
+ Jim_SetResultFormatted(interp, "not supported: %#s", argv[i]);
+ return JIM_ERR;
+#endif
}
}
if (argc - i < 2) {
@@ -361,10 +370,10 @@ int Jim_RegsubCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv)
const char *pattern;
int option;
enum {
- OPT_NOCASE, OPT_LINE, OPT_ALL, OPT_START, OPT_COMMAND, OPT_END
+ OPT_NOCASE, OPT_LINE, OPT_ALL, OPT_START, OPT_COMMAND, OPT_EXPANDED, OPT_END
};
static const char * const options[] = {
- "-nocase", "-line", "-all", "-start", "-command", "--", NULL
+ "-nocase", "-line", "-all", "-start", "-command", "-expanded", "--", NULL
};
for (i = 1; i < argc; i++) {
@@ -405,6 +414,15 @@ int Jim_RegsubCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv)
case OPT_COMMAND:
opt_command = 1;
break;
+
+ case OPT_EXPANDED:
+#ifdef REG_EXPANDED
+ regcomp_flags |= REG_EXPANDED;
+ break;
+#else
+ Jim_SetResultFormatted(interp, "not supported: %#s", argv[i]);
+ return JIM_ERR;
+#endif
}
}
if (argc - i != 3 && argc - i != 4) {
diff --git a/jim_tcl.txt b/jim_tcl.txt
index fd58d50..6975401 100644
--- a/jim_tcl.txt
+++ b/jim_tcl.txt
@@ -3901,7 +3901,7 @@ See `aio read`
regexp
~~~~~~
-+*regexp ?-nocase? ?-line? ?-indices? ?-start* 'offset'? *?-all? ?-inline? ?--?* 'exp string ?matchVar? ?subMatchVar subMatchVar \...?'+
++*regexp ?-nocase? ?-line? ?-indices? ?-start* 'offset'? *?-all? ?-inline? ?-expanded? ?--?* 'exp string ?matchVar? ?subMatchVar subMatchVar \...?'+
Determines whether the regular expression +'exp'+ matches part or
all of +'string'+ and returns 1 if it does, 0 if it doesn't.
@@ -3974,13 +3974,17 @@ The following switches modify the behaviour of +'regexp'+
data, plus one element for each subexpression in the regular
expression.
++*-expanded*+::
+ Enables use of the expanded regular expression syntax where whitespace
+ and comments are ignored.
+
+*--*+::
Marks the end of switches. The argument following this one will be
treated as +'exp'+ even if it starts with a +-+.
regsub
~~~~~~
-+*regsub ?-nocase? ?-all? ?-line? ?-command? ?-start* 'offset'? ?*--*? 'exp string subSpec ?varName?'+
++*regsub ?-nocase? ?-all? ?-line? ?-command? ?-expanded? ?-start* 'offset'? ?*--*? 'exp string subSpec ?varName?'+
This command matches the regular expression +'exp'+ against
+'string'+ using the rules described in REGULAR EXPRESSIONS
@@ -4062,6 +4066,10 @@ The following switches modify the behaviour of +'regsub'+
start matching the regular expression. +'offset'+ will be
constrained to the bounds of the input string.
++*-expanded*+::
+ Enables use of the expanded regular expression syntax where whitespace
+ and comments are ignored.
+
+*--*+::
Marks the end of switches. The argument following this one will be
treated as +'exp'+ even if it starts with a +-+.
diff --git a/jimregexp.c b/jimregexp.c
index 136b0c0..1fe6d8b 100644
--- a/jimregexp.c
+++ b/jimregexp.c
@@ -1,5 +1,5 @@
/*
- * vi:se ts=8:
+ * vi:se ts=8 sw=8:
*
* regcomp and regexec -- regsub and regerror are elsewhere
*
@@ -216,6 +216,41 @@ static int str_int_len(const int *seq)
return n;
}
+/* skips preg->regparse past white space and comments to end of line if REG_EXPANDED */
+static char *reg_expanded_new_pattern(const char *exp)
+{
+ /* Make a copy and do removal in place as the final will always be no longer than the original */
+ char *newexp = strdup(exp);
+ char *d = newexp;
+ const char *s = exp;
+ int escape = 0;
+
+ while (*s) {
+ if (escape) {
+ escape = 0;
+ continue;
+ }
+ else if (*s == '\\') {
+ escape = 1;
+ }
+ else if (strchr(" \t\r\n\f\v", *s)) {
+ s++;
+ continue;
+ }
+ else if (*s == '#') {
+ /* skip comments to end of line */
+ s++;
+ while (*s && *s != '\n') {
+ s++;
+ }
+ continue;
+ }
+ *d++ = *s++;
+ }
+ *d++ = '\0';
+ return newexp;
+}
+
/*
- regcomp - compile a regular expression into internal code
*
@@ -246,6 +281,11 @@ int jim_regcomp(regex_t *preg, const char *exp, int cflags)
if (exp == NULL)
FAIL(preg, REG_ERR_NULL_ARGUMENT);
+ if (cflags & REG_EXPANDED) {
+ preg->exp = reg_expanded_new_pattern(exp);
+ exp = preg->exp;
+ }
+
/* First pass: determine size, legality. */
preg->cflags = cflags;
preg->regparse = exp;
@@ -1911,6 +1951,7 @@ size_t jim_regerror(int errcode, const regex_t *preg, char *errbuf, size_t errb
void jim_regfree(regex_t *preg)
{
+ free(preg->exp);
free(preg->program);
}
diff --git a/jimregexp.h b/jimregexp.h
index e18178e..86a94f6 100644
--- a/jimregexp.h
+++ b/jimregexp.h
@@ -49,6 +49,7 @@ typedef struct regexp {
int regmust; /* Internal use only. */
int regmlen; /* Internal use only. */
int *program; /* Allocated */
+ char *exp; /* NULL or allocated version of regcomp expression (for REG_EXPANDED) */
/* working state - compile */
const char *regparse; /* Input-scan pointer. */
@@ -73,6 +74,7 @@ typedef regexp regex_t;
#define REG_ICASE 2
#define REG_NOTBOL 16
+#define REG_EXPANDED 32
enum {
REG_NOERROR, /* Success. */
diff --git a/tests/regexp.test b/tests/regexp.test
index 7aeb72e..c7f949f 100644
--- a/tests/regexp.test
+++ b/tests/regexp.test
@@ -199,9 +199,9 @@ test regexp-6.1 {regexp errors} {
test regexp-6.2 {regexp errors} {
list [catch {regexp -nocase a} msg] $msg
} {1 {wrong # args: should be "regexp ?-switch ...? exp string ?matchVar? ?subMatchVar ...?"}}
-test regexp-6.3 {regexp errors} jim {
+test regexp-6.3 {regexp errors} -constraints jim -body {
list [catch {regexp -gorp a} msg] $msg
-} {1 {bad switch "-gorp": must be --, -all, -indices, -inline, -line, -nocase, or -start}}
+} -result {1 {bad switch "-gorp": must be --, -all, -expanded, -indices, -inline, -line, -nocase, or -start}}
test regexp-6.4 {regexp errors} {
catch {regexp a( b} msg
} 1
@@ -367,7 +367,7 @@ test regexp-11.4 {regsub errors} {
} {1 {wrong # args: should be "regsub ?-switch ...? exp string subSpec ?varName?"}}
test regexp-11.5 {regsub errors} -constraints jim -body {
list [catch {regsub -gorp a b c} msg] $msg
-} -result {1 {bad switch "-gorp": must be --, -all, -command, -line, -nocase, or -start}}
+} -result {1 {bad switch "-gorp": must be --, -all, -command, -expanded, -line, -nocase, or -start}}
test regexp-11.6 {regsub errors} {
catch {regsub -nocase a( b c d} msg
} 1
diff --git a/tests/regexp2.test b/tests/regexp2.test
index c3db17d..ebcf01b 100644
--- a/tests/regexp2.test
+++ b/tests/regexp2.test
@@ -463,12 +463,12 @@ test regexpComp-9.6 {-all option to regsub} {
}
} {1 123xxx}
-#test regexpComp-10.1 {expanded syntax in regsub} {
-# evalInProc {
-# set foo xxx
-# list [regsub -expanded ". \#comment\n . \#comment2" abc def foo] $foo
-# }
-#} {1 defc}
+test regexpComp-10.1 {expanded syntax in regsub} {
+ evalInProc {
+ set foo xxx
+ list [regsub -expanded ". \#comment\n . \#comment2" abc def foo] $foo
+ }
+} {1 defc}
test regexpComp-10.2 {newline sensitivity in regsub} {
evalInProc {
set foo xxx
@@ -523,11 +523,11 @@ test regexpComp-11.4 {regsub errors} {
list [catch {regsub a b c d e f} msg] $msg
}
} {1 {wrong # args: should be "regsub ?-switch ...? exp string subSpec ?varName?"}}
-#test regexpComp-11.5 {regsub errors} {
-# evalInProc {
-# list [catch {regsub -gorp a b c} msg] $msg
-# }
-#} {1 {bad switch "-gorp": must be -all, -nocase, -expanded, -line, -linestop, -lineanchor, -start, or --}}
+test regexpComp-11.5 {regsub errors} {
+ evalInProc {
+ list [catch {regsub -gorp a b c} msg] $msg
+ }
+} {1 {bad switch "-gorp": must be --, -all, -command, -expanded, -line, -nocase, or -start}}
test regexpComp-11.6 {regsub errors} {
evalInProc {
list [catch {regsub -nocase a( b c d} msg] $msg
@@ -959,4 +959,8 @@ test regexp-26.3 {regexp operator =~ invalid regexp} -body {
expr {"abc" =~ {[}}
} -returnCodes error -result {couldn't compile regular expression pattern: brackets [] not balanced}
+test regexp-27.1 {regexp expanded} -body {
+ regexp -expanded -all -inline { a ( b b ) + } {abbbbbbcde}
+} -returnCodes ok -result {abbbbbb bb}
+
testreport