add lsubst command

Halfway between list and subst, makes it easy to construct lists with substitutions (as opposed to strings with substitutions) Signed-off-by: Steve Bennett <steveb@workware.net.au>
author: Steve Bennett <steveb@workware.net.au> 2024-07-24 13:28:35 +1000
committer: Steve Bennett <steveb@workware.net.au> 2025-03-13 10:26:16 +1000
commit: cb2d6849783da13954cd74baafeb9afa5e4cf4c0 (patch)
tree: f05dacb1ea99e52fdf1497b244ea94e4f5fa80a0
parent: fba21805f6e6d8561e9490865089f842436a8758 (diff)
download: jimtcl-cb2d6849783da13954cd74baafeb9afa5e4cf4c0.zip
jimtcl-cb2d6849783da13954cd74baafeb9afa5e4cf4c0.tar.gz
jimtcl-cb2d6849783da13954cd74baafeb9afa5e4cf4c0.tar.bz2
3 files changed, 329 insertions, 2 deletions
diff --git a/jim.c b/jim.c
index e1a67ca..2532d72 100644
--- a/jim.c
+++ b/jim.c
@@ -11125,6 +11125,7 @@ static Jim_Obj *JimInterpolateTokens(Jim_Interp *interp, const ScriptToken * tok
     Jim_Obj *objPtr;
     char *s;
     int taint = 0;
+    const char *error_action = NULL;
 
     if (tokens <= JIM_EVAL_SINTV_LEN)
         intv = sintv;
@@ -11144,14 +11145,16 @@ static Jim_Obj *JimInterpolateTokens(Jim_Interp *interp, const ScriptToken * tok
                     tokens = i;
                     continue;
                 }
-                /* XXX: Should probably set an error about break outside loop */
+                error_action = "break";
                 /* fall through to error */
             case JIM_CONTINUE:
                 if (flags & JIM_SUBST_FLAG) {
                     intv[i] = NULL;
                     continue;
                 }
-                /* XXX: Ditto continue outside loop */
+                if (!error_action) {
+                    error_action = "continue";
+                }
                 /* fall through to error */
             default:
                 while (i--) {
@@ -11160,6 +11163,9 @@ static Jim_Obj *JimInterpolateTokens(Jim_Interp *interp, const ScriptToken * tok
                 if (intv != sintv) {
                     Jim_Free(intv);
                 }
+                if (error_action) {
+                    Jim_SetResultFormatted(interp, "invoked \"%s\" outside of a loop", error_action);
+                }
                 return NULL;
         }
         taint |= intv[i]->taint;
@@ -11214,6 +11220,117 @@ static Jim_Obj *JimInterpolateTokens(Jim_Interp *interp, const ScriptToken * tok
     return objPtr;
 }
 
+#define JIM_LSUBST_LINE 0x0001
+
+/* Parse a string as an 'lsubst' argument and sets the interp result.
+ * Return JIM_OK if ok, or JIM_ERR on error.
+ *
+ * Modelled on Jim_EvalObj()
+ *
+ * If flags contains JIM_LSUBST_LINE, each "statement" is returned as list of {command arg...}
+ */
+static int JimListSubstObj(Jim_Interp *interp, struct Jim_Obj *objPtr, unsigned flags)
+{
+    int i;
+    ScriptObj *script;
+    ScriptToken *token;
+    Jim_Obj *resultListObj;
+    int retcode = JIM_OK;
+
+    Jim_IncrRefCount(objPtr);     /* Make sure it's shared. */
+    script = JimGetScript(interp, objPtr);
+    if (JimParseCheckMissing(interp, script->missing) == JIM_ERR) {
+        JimSetErrorStack(interp, script);
+        Jim_DecrRefCount(interp, objPtr);
+        return JIM_ERR;
+    }
+
+    token = script->token;
+
+    script->inUse++;
+
+    /* Build the result list here */
+    resultListObj = Jim_NewListObj(interp, NULL, 0);
+
+    /* Add every command, arg to the result list */
+    for (i = 0; i < script->len && retcode == JIM_OK; ) {
+        int argc;
+        int j;
+        Jim_Obj *lineListObj = resultListObj;
+
+        /* First token of the line is always JIM_TT_LINE */
+        argc = token[i].objPtr->internalRep.scriptLineValue.argc;
+        script->linenr = token[i].objPtr->internalRep.scriptLineValue.line;
+
+        /* Skip the JIM_TT_LINE token */
+        i++;
+
+        if (flags & JIM_LSUBST_LINE) {
+            lineListObj = Jim_NewListObj(interp, NULL, 0);
+        }
+
+        /* Extract the words from this line */
+        for (j = 0; j < argc; j++) {
+            long wordtokens = 1;
+            int expand = 0;
+            Jim_Obj *wordObjPtr = NULL;
+
+            if (token[i].type == JIM_TT_WORD) {
+                wordtokens = JimWideValue(token[i++].objPtr);
+                if (wordtokens < 0) {
+                    expand = 1;
+                    wordtokens = -wordtokens;
+                }
+            }
+
+            /* Note we don't worry about a fast path here */
+            wordObjPtr = JimInterpolateTokens(interp, token + i, wordtokens, JIM_NONE);
+
+            if (!wordObjPtr) {
+                if (retcode == JIM_OK) {
+                    retcode = JIM_ERR;
+                }
+                break;
+            }
+
+            Jim_IncrRefCount(wordObjPtr);
+            i += wordtokens;
+
+            if (!expand) {
+                Jim_ListAppendElement(interp, lineListObj, wordObjPtr);
+            }
+            else {
+                int k;
+                /* Need to add each word of wordObjPtr list to the result list */
+                for (k = 0; k < Jim_ListLength(interp, wordObjPtr); k++) {
+                    Jim_ListAppendElement(interp, lineListObj, Jim_ListGetIndex(interp, wordObjPtr, k));
+                }
+            }
+            Jim_DecrRefCount(interp, wordObjPtr);
+        }
+
+        if (flags & JIM_LSUBST_LINE) {
+            Jim_ListAppendElement(interp, resultListObj, lineListObj);
+        }
+    }
+
+    /* Note that we don't have to decrement inUse, because the
+     * following code transfers our use of the reference again to
+     * the script object. */
+    Jim_FreeIntRep(interp, objPtr);
+    objPtr->typePtr = &scriptObjType;
+    Jim_SetIntRepPtr(objPtr, script);
+    Jim_DecrRefCount(interp, objPtr);
+
+    if (retcode == JIM_OK) {
+        Jim_SetResult(interp, resultListObj);
+    }
+    else {
+        Jim_FreeNewObj(interp, resultListObj);
+    }
+
+    return retcode;
+}
 
 /* listPtr *must* be a list.
  * The contents of the list is evaluated with the first element as the command and
@@ -15636,6 +15753,18 @@ static int Jim_SubstCoreCommand(Jim_Interp *interp, int argc, Jim_Obj *const *ar
     return JIM_OK;
 }
 
+/* [lsubst] */
+static int Jim_LsubstCoreCommand(Jim_Interp *interp, int argc, Jim_Obj *const *argv)
+{
+    if (argc == 3) {
+        if (Jim_CompareStringImmediate(interp, argv[1], "-line")) {
+            return JimListSubstObj(interp, argv[2], JIM_LSUBST_LINE);
+        }
+        return JIM_USAGE;
+    }
+    return JimListSubstObj(interp, argv[1], 0);
+}
+
 #ifdef jim_ext_namespace
 static int JimIsGlobalNamespace(Jim_Obj *objPtr)
 {
@@ -16525,6 +16654,7 @@ static const struct jim_core_cmd_def_t {
     {"lsearch", Jim_LsearchCoreCommand, 2, -1, "?-exact|-glob|-regexp|-command 'command'? ?-bool|-inline? ?-not? ?-nocase? ?-all? ?-stride len? ?-index val? list value" },
     {"lset", Jim_LsetCoreCommand, 2, -1, "listVar ?index ...? value" },
     {"lsort", Jim_LsortCoreCommand, 1, -1, "?options? list" },
+    {"lsubst", Jim_LsubstCoreCommand, 1, 2, "?-line? string" },
     {"proc", Jim_ProcCoreCommand, 3, 4, "name arglist ?statics? body" },
     {"puts", Jim_PutsCoreCommand, 1, 2, "?-nonewline? string" },
     {"rand", Jim_RandCoreCommand, 0, 2, "?min? ?max?" },
diff --git a/jim_tcl.txt b/jim_tcl.txt
index 3ae8ae1..81ced03 100644
--- a/jim_tcl.txt
+++ b/jim_tcl.txt
@@ -58,6 +58,7 @@ Changes since 0.83
 #. Add support for `package forget`
 #. Add `aio translation` support (and fconfigure -translation)
 #. `exec` TIP 424 - support safer +exec | + syntax (also +open "|| pipeline..."+) (see https://core.tcl-lang.org/tips/doc/trunk/tip/424.md)
+#. New `lsubst` command to create lists using subst-style substitution
 
 Changes between 0.82 and 0.83
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -3148,6 +3149,63 @@ than variables, a list of unassigned elements is returned.
     a=1,b=2
 ----
 
+lsubst
+~~~~
++*lsubst ?-line?* 'string'+
+
+This command is similar to `list` in that it creates a list, but uses
+the same rules as scripts when constructing the elements of the list.
+It is somewhat similar to `subst` except it produces a list instead of a string.
+
+This means that variables are substituted, commands are evaluated, backslashes are
+interpreted, the expansion operator is applied and comments are skipped.
+
+Consider the following example.
+
+---
+    set x 1
+    set y {2 3}
+    set z 3
+    lsubst {
+        # This is a list with interpolation
+        $x; # The x variable
+        {*}$y; # The y variable expanded
+        [string cat a b c]; # A command
+        {*}[list 4 5]; # A list expanded into multiple elements
+        "$z$z"; # A string with interpolation
+    }
+---
+
+The result of `lsubst` is the following list with 7 elements.
+
+---
+    1 2 3 abc 4 5 33
+---
+
+This is particularly useful when constructing a list (or dict)
+as a data structure as it easily allows for comments and variable and command
+substitution.
+
+Sometimes it is useful to return each "command" as a separate list rather than
+simply running all the words together. This can be accomplished with `lsubst -line`.
+
+Consider the following example.
+
+---
+    lsubst -line {
+        # two "lines" because of the semicolon
+        one a; two b
+        # one line with three elements
+        {*}{a b c}
+    }
+---
+
+The result of `lsubst -line` is the following list with 3 elements, one for each "command".
+
+---
+{one a} {two b} {a b c}
+---
+
 local
 ~~~~~
 +*local* 'cmd ?arg\...?'+
diff --git a/tests/lsubst.test b/tests/lsubst.test
new file mode 100644
index 0000000..1c2c082
--- /dev/null
+++ b/tests/lsubst.test
@@ -0,0 +1,139 @@
+source [file dirname [info script]]/testing.tcl
+
+needs cmd lsubst
+
+test lsubst-1.1 {no args} -body {
+    lsubst
+} -returnCodes error -result {wrong # args: should be "lsubst ?-line? string"}
+
+test lsubst-1.2 {too many args} -body {
+    lsubst a b c
+} -returnCodes error -result {wrong # args: should be "lsubst ?-line? string"}
+
+test lsubst-1.3 {basic, no subst} -body {
+    lsubst {a b c}
+} -result {a b c}
+
+test lsubst-1.4 {basics, vars} -body {
+    set a 1
+    set b "2 3"
+    set c "4 5 6"
+    set d ".1"
+    lsubst {$a $b $c$d}
+} -result {1 {2 3} {4 5 6.1}}
+
+test lsubst-1.5 {comments} -body {
+    # It is helpful to be able to include comments in a list definition
+    # just like in a script
+    lsubst {
+        # comment line
+        1
+        2 3
+        # comment line with continuation \
+        this is also a comments
+        4 ;# comment at end of line
+        5
+    }
+} -result {1 2 3 4 5}
+
+test lsubst-1.6 {commands} -body {
+    set a 0
+    lsubst {
+        [incr a]
+        [incr a]
+        [list d e]
+        [string cat f g][string cat h i]
+    }
+} -result {1 2 {d e} fghi}
+
+test lsubst-1.7 {expand} -body {
+    set a {1 2}
+    set space " "
+    set b {3 4 5}
+    lsubst {
+        {*}$a
+        {*}$a$space$b$space[list 6 7]
+    }
+} -result {1 2 1 2 3 4 5 6 7}
+
+test lsubst-1.8 {empty case} -body {
+    lsubst {
+        # Nothing
+    }
+} -result {}
+
+test lsubst-1.9 {backslash escapes} -body {
+    lsubst {
+        # char escapes
+        \r\n\t
+        # unicode escapes
+        \u00b5
+        # hex escapes
+        \x41\x42
+    }
+} -result [list \r\n\t \u00b5 AB]
+
+test lsubst-1.10 {simple -line} -body {
+    set a {1 2}
+    set b {3 4 5}
+    lsubst -line {
+        # This line won't produce a list, but the next will produce a list with two elements
+        {*}$a
+        # And this one will have three elements
+        one two $b
+    }
+} -result {{1 2} {one two {3 4 5}}}
+
+test lsubst-2.1 {error, missing [} -body {
+    lsubst {
+        # Missing bracket
+        [string cat
+    }
+} -returnCodes error -result {unmatched "["}
+
+test lsubst-2.2 {error, invalid command} -body {
+    lsubst {
+        a
+        [dummy]
+        b
+    }
+} -returnCodes error -result {invalid command name "dummy"}
+
+test lsubst-2.3 {error, unset variable} -body {
+    lsubst {
+        a
+        $doesnotexist
+        b
+    }
+} -returnCodes error -result {can't read "doesnotexist": no such variable}
+
+test lsubst-2.4 {break} -body {
+    lsubst {
+        a
+        [break]
+        b
+    }
+} -returnCodes error -result {invoked "break" outside of a loop}
+
+test lsubst-2.5 {continue} -body {
+    lsubst {
+        a
+        [continue]
+        b
+    }
+} -returnCodes error -result {invoked "continue" outside of a loop}
+
+test lsubst-3.1 {preservation of line numbers} -body {
+    set x abc
+    set src1 [info source $x]
+    set list [lsubst {
+        a
+        $x
+        b
+    }]
+    if {[info source [lindex $list 1]] ne [info source $x]} {
+        error "source does not match
+    }
+} -result {}
+
+testreport
author	Steve Bennett <steveb@workware.net.au>	2024-07-24 13:28:35 +1000
committer	Steve Bennett <steveb@workware.net.au>	2025-03-13 10:26:16 +1000
commit	cb2d6849783da13954cd74baafeb9afa5e4cf4c0 (patch)
tree	f05dacb1ea99e52fdf1497b244ea94e4f5fa80a0
parent	fba21805f6e6d8561e9490865089f842436a8758 (diff)
download	jimtcl-cb2d6849783da13954cd74baafeb9afa5e4cf4c0.zip jimtcl-cb2d6849783da13954cd74baafeb9afa5e4cf4c0.tar.gz jimtcl-cb2d6849783da13954cd74baafeb9afa5e4cf4c0.tar.bz2