aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSteve Bennett <steveb@workware.net.au>2024-01-27 10:39:28 +1000
committerSteve Bennett <steveb@workware.net.au>2024-01-29 10:40:38 +1000
commitadb3252e5b213579ad26f101c25461c78829569c (patch)
treea62995a5b9bdaf51025f1cf8088f7eb8100bed79
parentbe3f8d5371de98934ce8344e4bfafe15ba57ecca (diff)
downloadjimtcl-adb3252e5b213579ad26f101c25461c78829569c.zip
jimtcl-adb3252e5b213579ad26f101c25461c78829569c.tar.gz
jimtcl-adb3252e5b213579ad26f101c25461c78829569c.tar.bz2
regexp: implement regsub -command
Per Tcl 8.7 Signed-off-by: Steve Bennett <steveb@workware.net.au>
-rw-r--r--jim-regexp.c147
-rw-r--r--jim_tcl.txt23
-rw-r--r--tests/regexp.test43
3 files changed, 165 insertions, 48 deletions
diff --git a/jim-regexp.c b/jim-regexp.c
index 1486c3a..f370e5e 100644
--- a/jim-regexp.c
+++ b/jim-regexp.c
@@ -348,27 +348,30 @@ int Jim_RegsubCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv)
int regcomp_flags = 0;
int regexec_flags = 0;
int opt_all = 0;
+ int opt_command = 0;
int offset = 0;
regex_t *regex;
const char *p;
- int result;
+ int result = JIM_OK;
regmatch_t pmatch[MAX_SUB_MATCHES + 1];
int num_matches = 0;
int i, j, n;
Jim_Obj *varname;
Jim_Obj *resultObj;
+ Jim_Obj *cmd_prefix = NULL;
+ Jim_Obj *regcomp_obj = NULL;
const char *source_str;
int source_len;
- const char *replace_str;
+ const char *replace_str = NULL;
int replace_len;
const char *pattern;
int option;
enum {
- OPT_NOCASE, OPT_LINE, OPT_ALL, OPT_START, OPT_END
+ OPT_NOCASE, OPT_LINE, OPT_ALL, OPT_START, OPT_COMMAND, OPT_END
};
static const char * const options[] = {
- "-nocase", "-line", "-all", "-start", "--", NULL
+ "-nocase", "-line", "-all", "-start", "-command", "--", NULL
};
if (argc < 4) {
@@ -412,20 +415,39 @@ int Jim_RegsubCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv)
return JIM_ERR;
}
break;
+
+ case OPT_COMMAND:
+ opt_command = 1;
+ break;
}
}
if (argc - i != 3 && argc - i != 4) {
goto wrongNumArgs;
}
- regex = SetRegexpFromAny(interp, argv[i], regcomp_flags);
+ /* Need to ensure that this is unshared, so just duplicate it always */
+ regcomp_obj = Jim_DuplicateObj(interp, argv[i]);
+ Jim_IncrRefCount(regcomp_obj);
+ regex = SetRegexpFromAny(interp, regcomp_obj, regcomp_flags);
if (!regex) {
+ Jim_DecrRefCount(interp, regcomp_obj);
return JIM_ERR;
}
pattern = Jim_String(argv[i]);
source_str = Jim_GetString(argv[i + 1], &source_len);
- replace_str = Jim_GetString(argv[i + 2], &replace_len);
+ if (opt_command) {
+ cmd_prefix = argv[i + 2];
+ if (Jim_ListLength(interp, cmd_prefix) == 0) {
+ Jim_SetResultString(interp, "command prefix must be a list of at least one element", -1);
+ Jim_DecrRefCount(interp, regcomp_obj);
+ return JIM_ERR;
+ }
+ Jim_IncrRefCount(cmd_prefix);
+ }
+ else {
+ replace_str = Jim_GetString(argv[i + 2], &replace_len);
+ }
varname = argv[i + 3];
/* Create the result string */
@@ -482,44 +504,67 @@ int Jim_RegsubCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv)
*/
Jim_AppendString(interp, resultObj, p, pmatch[0].rm_so);
- /*
- * Append the subSpec (replace_str) argument to the variable, making appropriate
- * substitutions. This code is a bit hairy because of the backslash
- * conventions and because the code saves up ranges of characters in
- * subSpec to reduce the number of calls to Jim_SetVar.
- */
-
- for (j = 0; j < replace_len; j++) {
- int idx;
- int c = replace_str[j];
+ if (opt_command) {
+ /* construct the command as a list */
+ Jim_Obj *cmdListObj = Jim_DuplicateObj(interp, cmd_prefix);
+ for (j = 0; j < MAX_SUB_MATCHES; j++) {
+ if (pmatch[j].rm_so == -1) {
+ break;
+ }
+ else {
+ Jim_Obj *srcObj = Jim_NewStringObj(interp, p + pmatch[j].rm_so, pmatch[j].rm_eo - pmatch[j].rm_so);
+ Jim_ListAppendElement(interp, cmdListObj, srcObj);
+ }
+ }
+ Jim_IncrRefCount(cmdListObj);
- if (c == '&') {
- idx = 0;
+ result = Jim_EvalObj(interp, cmdListObj);
+ Jim_DecrRefCount(interp, cmdListObj);
+ if (result != JIM_OK) {
+ goto cmd_error;
}
- else if (c == '\\' && j < replace_len) {
- c = replace_str[++j];
- if ((c >= '0') && (c <= '9')) {
- idx = c - '0';
+ Jim_AppendString(interp, resultObj, Jim_String(Jim_GetResult(interp)), -1);
+ }
+ else {
+ /*
+ * Append the subSpec (replace_str) argument to the variable, making appropriate
+ * substitutions. This code is a bit hairy because of the backslash
+ * conventions and because the code saves up ranges of characters in
+ * subSpec to reduce the number of calls to Jim_SetVar.
+ */
+
+ for (j = 0; j < replace_len; j++) {
+ int idx;
+ int c = replace_str[j];
+
+ if (c == '&') {
+ idx = 0;
}
- else if ((c == '\\') || (c == '&')) {
- Jim_AppendString(interp, resultObj, replace_str + j, 1);
- continue;
+ else if (c == '\\' && j < replace_len) {
+ c = replace_str[++j];
+ if ((c >= '0') && (c <= '9')) {
+ idx = c - '0';
+ }
+ else if ((c == '\\') || (c == '&')) {
+ Jim_AppendString(interp, resultObj, replace_str + j, 1);
+ continue;
+ }
+ else {
+ /* If the replacement is a trailing backslash, just replace with a backslash, otherwise
+ * with the literal backslash and the following character
+ */
+ Jim_AppendString(interp, resultObj, replace_str + j - 1, (j == replace_len) ? 1 : 2);
+ continue;
+ }
}
else {
- /* If the replacement is a trailing backslash, just replace with a backslash, otherwise
- * with the literal backslash and the following character
- */
- Jim_AppendString(interp, resultObj, replace_str + j - 1, (j == replace_len) ? 1 : 2);
+ Jim_AppendString(interp, resultObj, replace_str + j, 1);
continue;
}
- }
- else {
- Jim_AppendString(interp, resultObj, replace_str + j, 1);
- continue;
- }
- if ((idx < MAX_SUB_MATCHES) && pmatch[idx].rm_so != -1 && pmatch[idx].rm_eo != -1) {
- Jim_AppendString(interp, resultObj, p + pmatch[idx].rm_so,
- pmatch[idx].rm_eo - pmatch[idx].rm_so);
+ if ((idx < MAX_SUB_MATCHES) && pmatch[idx].rm_so != -1 && pmatch[idx].rm_eo != -1) {
+ Jim_AppendString(interp, resultObj, p + pmatch[idx].rm_so,
+ pmatch[idx].rm_eo - pmatch[idx].rm_so);
+ }
}
}
@@ -560,22 +605,34 @@ int Jim_RegsubCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv)
*/
Jim_AppendString(interp, resultObj, p, -1);
- /* And now set or return the result variable */
- if (argc - i == 4) {
- result = Jim_SetVariable(interp, varname, resultObj);
+cmd_error:
+ if (result == JIM_OK) {
+ /* And now set or return the result variable */
+ if (argc - i == 4) {
+ result = Jim_SetVariable(interp, varname, resultObj);
- if (result == JIM_OK) {
- Jim_SetResultInt(interp, num_matches);
+ if (result == JIM_OK) {
+ Jim_SetResultInt(interp, num_matches);
+ }
+ else {
+ Jim_FreeObj(interp, resultObj);
+ }
}
else {
- Jim_FreeObj(interp, resultObj);
+ Jim_SetResult(interp, resultObj);
+ result = JIM_OK;
}
}
else {
- Jim_SetResult(interp, resultObj);
- result = JIM_OK;
+ Jim_FreeObj(interp, resultObj);
+ }
+
+ if (opt_command) {
+ Jim_DecrRefCount(interp, cmd_prefix);
}
+ Jim_DecrRefCount(interp, regcomp_obj);
+
return result;
}
diff --git a/jim_tcl.txt b/jim_tcl.txt
index df94e9a..8684ed9 100644
--- a/jim_tcl.txt
+++ b/jim_tcl.txt
@@ -62,6 +62,7 @@ Changes since 0.82
6. `socket` , `open` and `aio accept` now support '-noclose'
7. Add support for hinting with `history hints`
8. Support for `proc` statics by reference (lexical closure) rather than by value
+9. `regsub` now supports '-command' (per Tcl 8.7)
Changes between 0.81 and 0.82
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -3840,7 +3841,7 @@ The following switches modify the behaviour of +'regexp'+
regsub
~~~~~~
-+*regsub ?-nocase? ?-all? ?-line? ?-start* 'offset'? ?*--*? 'exp string subSpec ?varName?'+
++*regsub ?-nocase? ?-all? ?-line? ?-command? ?-start* 'offset'? ?*--*? 'exp string subSpec ?varName?'+
This command matches the regular expression +'exp'+ against
+'string'+ using the rules described in REGULAR EXPRESSIONS
@@ -3897,6 +3898,26 @@ The following switches modify the behaviour of +'regsub'+
function, and the +$+ anchor matches the empty string before any
newline in the string in addition to its normal function.
++*-command*+::
+
+ Changes the handling of +'subSpec'+ so that it is not treated
+ as a template for a substitution string and the substrings +*&*+
+ and +*\n*+ no longer have special meaning. Instead +'subSpec'+ must
+ be a command prefix, that is, a non-empty list. The substring
+ of string that matches +'exp'+, and then each substring that matches
+ each capturing sub-RE within +'exp'+, are appended as additional
+ elements to that list. (The items appended to the list are much
+ like what `regexp -inline` would return). The completed list is
+ then evaluated as a Tcl command, and the result of that command
+ is the substitution string. Any error or exception from command
+ evaluation becomes an error or exception from the regsub command.
+ ::
+ If +*-all*+ is not also given, the command callback will be invoked
+ at most once (exactly when the regular expression matches). If
+ +*-all*+ is given, the command callback will be invoked for each
+ matched location, in sequence. The exact location indices that
+ matched are not made available to the script.
+
+*-start* 'offset'+::
Specifies a character index offset into the string at which to
start matching the regular expression. +'offset'+ will be
diff --git a/tests/regexp.test b/tests/regexp.test
index a7c8a4a..7aeb72e 100644
--- a/tests/regexp.test
+++ b/tests/regexp.test
@@ -365,9 +365,9 @@ test regexp-11.3 {regsub errors} {
test regexp-11.4 {regsub errors} {
list [catch {regsub a b c d e f} msg] $msg
} {1 {wrong # args: should be "regsub ?-switch ...? exp string subSpec ?varName?"}}
-test regexp-11.5 {regsub errors} jim {
+test regexp-11.5 {regsub errors} -constraints jim -body {
list [catch {regsub -gorp a b c} msg] $msg
-} {1 {bad switch "-gorp": must be --, -all, -line, -nocase, or -start}}
+} -result {1 {bad switch "-gorp": must be --, -all, -command, -line, -nocase, or -start}}
test regexp-11.6 {regsub errors} {
catch {regsub -nocase a( b c d} msg
} 1
@@ -742,6 +742,45 @@ test regexp-22.17 {\d in set} {
regexp -all -inline {[a\d]+} "a0ac\[b a\]44c\tb-1aa7"
} {a0a a 44 1aa7}
+test regexp-27.1 {regsub -command} {
+ regsub -command {.x.} {abcxdef} {string length}
+} ab3ef
+test regexp-27.2 {regsub -command} {
+ regsub -command {.x.} {abcxdefxghi} {string length}
+} ab3efxghi
+test regexp-27.3 {regsub -command} {
+ set x 0
+ regsub -all -command {(.)} abcde {apply {args {string cat [incr ::x] [lindex $args 1]}}}
+} 1a2b3c4d5e
+test regexp-27.4 {regsub -command} -body {
+ regsub -command {.x.} {abcxdef} error
+} -returnCodes error -result cxd
+test regexp-27.5 {regsub -command} {
+ regsub -command {(.)(.)} {abcdef} {list ,}
+} {, ab a bcdef}
+test regexp-27.6 {regsub -command} {
+ regsub -command -all {(.)(.)} {abcdef} {list ,}
+} {, ab a b, cd c d, ef e f}
+test regexp-27.7 {regsub -command representation smash} {
+ set ::s {123=456 789}
+ regsub -command -all {\d+} $::s {apply {n {
+ expr {[llength $::s] + $n}
+ }}}
+} {125=458 791}
+test regexp-27.8 {regsub -command representation smash} {
+ set ::t {apply {n {
+ expr {[llength [lindex $::t 1 1 1]] + $n}
+ }}}
+ regsub -command -all {\d+} "123=456 789" $::t
+} {131=464 797}
+test regexp-27.11 {regsub -command error cases} -returnCodes error -body {
+ regsub -command . abc {}
+} -result {command prefix must be a list of at least one element}
+test regexp-27.12 {regsub -command representation smash} {
+ set s {list (.+)}
+ regsub -command $s {list list} $s
+} {(.+) {list list} list}
+
# Tests resulting from bugs reported by users
test reg-31.1 {[[:xdigit:]] behaves correctly when followed by [[:space:]]} {
set str {2:::DebugWin32}