2008-11-05 Steve Bennett <steveb@workware.net.au>

* jim-regexp.c: Add regexp extension for regexp and regsub commands
author: oharboe <oharboe> 2008-11-06 13:31:22 +0000
committer: oharboe <oharboe> 2008-11-06 13:31:22 +0000
commit: fefae9375a3cdec3d39a36ccc9754e5d5a7c4cca (patch)
tree: 67b78310c9c0cc52b05d12710830c4016c09db9f
parent: 5ccb870e18de05cbb90a0c1b90bd647f3b30b94c (diff)
download: jimtcl-fefae9375a3cdec3d39a36ccc9754e5d5a7c4cca.zip
jimtcl-fefae9375a3cdec3d39a36ccc9754e5d5a7c4cca.tar.gz
jimtcl-fefae9375a3cdec3d39a36ccc9754e5d5a7c4cca.tar.bz2
3 files changed, 1069 insertions, 3 deletions
diff --git a/ChangeLog b/ChangeLog
index 0d667c6..99aeb91 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,7 +1,16 @@
+2008-11-05 Steve Bennett <steveb@workware.net.au>
+
+	* jim-regexp.c: Add regexp extension for regexp and regsub commands
+
 2008-11-04 Uwe Klein <uklein@klein-messgeraete.de>
+ 
+     * jim-hwio.c: file added
+     * provides memory area and hardware access on linux i386
+
+2008-11-05 Steve Bennett <steveb@workware.net.au>
 
-        * jim-hwio.c: file added
-        * provides memory area and hardware access on linux i386
+	* jim.c: add support for proc default arguments to jim
+	* jim.c: fix lazy expression evaluation with unary not
 
 2008-11-04 Steve Bennett <steveb@workware.net.au>
 
diff --git a/jim-regexp.c b/jim-regexp.c
new file mode 100644
index 0000000..72d2f1f
--- /dev/null
+++ b/jim-regexp.c
@@ -0,0 +1,482 @@
+/* 
+ * (c) 2008 Steve Bennett <steveb@workware.net.au>
+ *
+ * Implements the regexp and regsub commands for Jim
+ *
+ * Uses C library regcomp()/regexec() for the matching.
+ *
+ * The FreeBSD license
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above
+ *    copyright notice, this list of conditions and the following
+ *    disclaimer in the documentation and/or other materials
+ *    provided with the distribution.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE JIM TCL PROJECT ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * JIM TCL PROJECT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * 
+ * The views and conclusions contained in the software and documentation
+ * are those of the authors and should not be interpreted as representing
+ * official policies, either expressed or implied, of the Jim Tcl Project.
+ *
+ * Based on code originally from Tcl 6.7:
+ *
+ * Copyright 1987-1991 Regents of the University of California
+ * Permission to use, copy, modify, and distribute this
+ * software and its documentation for any purpose and without
+ * fee is hereby granted, provided that the above copyright
+ * notice appear in all copies.  The University of California
+ * makes no representations about the suitability of this
+ * software for any purpose.  It is provided "as is" without
+ * express or implied warranty.
+ */
+
+#include <regex.h>
+#include <string.h>
+
+#define JIM_EXTENSION
+#include "jim.h"
+
+/* REVISIT: Would be useful in jim.h */
+static void Jim_SetIntResult(Jim_Interp *interp, jim_wide wide)
+{
+    Jim_SetResult(interp, Jim_NewIntObj(interp, wide));
+}
+
+/**
+ * REVISIT: Should cache a number of compiled regexps for performance reasons.
+ */
+static regex_t * 
+compile_regexp(Jim_Interp *interp, const char *pattern, int flags)
+{
+    int ret;
+
+    regex_t *result = (regex_t *)Jim_Alloc(sizeof(*result));
+
+    if ((ret = regcomp(result, pattern, REG_EXTENDED | flags)) != 0) {
+        char buf[100];
+        regerror(ret, result, buf, sizeof(buf));
+        Jim_SetResult(interp, Jim_NewEmptyStringObj(interp));
+        Jim_AppendStrings(interp, Jim_GetResult(interp), "couldn't compile regular expression pattern: ", buf, NULL);
+        Jim_Free(result);
+        return NULL;
+    }
+    return result;
+}
+
+int Jim_RegexpCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv)
+{
+    int opt_indices = 0;
+    int opt_all = 0;
+    int opt_inline = 0;
+    regex_t *regex;
+    int match, i, j;
+    long offset = 0;
+    regmatch_t *pmatch = NULL;
+    int source_len;
+    int result = JIM_OK;
+    const char *pattern;
+    const char *source_str;
+    int num_matches = 0;
+    int num_vars;
+    Jim_Obj *resultListObj = NULL;
+    int regcomp_flags = 0;
+
+    if (argc < 3) {
+        wrongNumArgs:
+        Jim_WrongNumArgs(interp, 1, argv, "?-nocase? ?-line? ?-indices? ?-start offset? ?-all? ?-inline? exp string ?matchVar? ?subMatchVar ...?");
+        return JIM_ERR;
+    }
+
+    for (i = 1; i < argc; i++) {
+        if (Jim_CompareStringImmediate(interp, argv[i], "-indices")) {
+            opt_indices = 1;
+        }
+        else if (Jim_CompareStringImmediate(interp, argv[i], "-nocase")) {
+            regcomp_flags |= REG_ICASE;
+        }
+        else if (Jim_CompareStringImmediate(interp, argv[i], "-line")) {
+            regcomp_flags |= REG_NEWLINE;
+        }
+        else if (Jim_CompareStringImmediate(interp, argv[i], "-all")) {
+            opt_all = 1;
+        }
+        else if (Jim_CompareStringImmediate(interp, argv[i], "-inline")) {
+            opt_inline = 1;
+        }
+        else if (Jim_CompareStringImmediate(interp, argv[i], "-start")) {
+            if (++i == argc) {
+                goto wrongNumArgs;
+            }
+            if (Jim_GetLong(interp, argv[i], &offset) != JIM_OK) {
+                return JIM_ERR;
+            }
+        }
+        else if (Jim_CompareStringImmediate(interp, argv[i], "--")) {
+            i++;
+            break;
+        }
+        else {
+            const char *opt = Jim_GetString(argv[i], NULL);
+            if (*opt == '-') {
+                /* Bad option */
+                goto wrongNumArgs;
+            }
+            break;
+        }
+    }
+    if (argc - i < 2) {
+        goto wrongNumArgs;
+    }
+
+    pattern = Jim_GetString(argv[i], NULL);
+    regex = compile_regexp(interp, pattern, regcomp_flags);
+    if (regex == NULL) {
+        return JIM_ERR;
+    }
+
+    source_str = Jim_GetString(argv[i + 1], &source_len);
+
+    num_vars = argc - i - 2;
+
+    if (opt_inline) {
+        if (num_vars) {
+            Jim_SetResultString(interp, "regexp match variables not allowed when using -inline", -1);
+            result = JIM_ERR;
+            goto done;
+        }
+        /* REVISIT: Ugly! */
+        num_vars = 100;
+    }
+
+    pmatch = Jim_Alloc((num_vars + 1) * sizeof(*pmatch));
+
+    /* If an offset has been specified, adjust for that now.
+     * If it points past the end of the string, point to the terminating null
+     */
+    if (offset) {
+        if (offset > source_len) {
+            source_str += source_len;
+        } else if (offset > 0) {
+            source_str += offset;
+        }
+    }
+
+    if (opt_inline) {
+        resultListObj = Jim_NewListObj(interp, NULL, 0);
+    }
+
+    next_match:
+    match = regexec(regex, source_str, num_vars + 1, pmatch, 0);
+    if (match >= REG_BADPAT) {
+        char buf[100];
+        regerror(match, regex, buf, sizeof(buf));
+        Jim_SetResultString(interp, "", 0);
+        Jim_AppendStrings(interp, Jim_GetResult(interp), "error while matching pattern: ", buf, NULL);
+        result = JIM_ERR;
+        goto done;
+    }
+
+    if (match == REG_NOMATCH) {
+        goto done;
+    }
+
+    num_matches++;
+
+    if (opt_all && !opt_inline) {
+        /* Just count the number of matches, so skip the substitution h*/
+        goto try_next_match;
+    }
+
+    /*
+     * If additional variable names have been specified, return
+     * index information in those variables.
+     */
+
+    //fprintf(stderr, "source_str=%s, [0].rm_eo=%d\n", source_str, pmatch[0].rm_eo);
+
+    j = 0;
+    for (i += 2; opt_inline ? pmatch[j].rm_so != -1 : i < argc; i++, j++) {
+        Jim_Obj *resultObj;
+
+        if (opt_indices) {
+            resultObj = Jim_NewListObj(interp, NULL, 0);
+        }
+        else {
+            resultObj = Jim_NewStringObj(interp, "", 0);
+        }
+
+        if (pmatch[j].rm_so == -1) {
+            if (opt_indices) {
+                Jim_ListAppendElement(interp, resultObj, Jim_NewIntObj(interp, -1));
+                Jim_ListAppendElement(interp, resultObj, Jim_NewIntObj(interp, -1));
+            }
+        } else {
+            int len = pmatch[j].rm_eo - pmatch[j].rm_so;
+            if (opt_indices) {
+                Jim_ListAppendElement(interp, resultObj, Jim_NewIntObj(interp, offset + pmatch[j].rm_so));
+                Jim_ListAppendElement(interp, resultObj, Jim_NewIntObj(interp, offset + pmatch[j].rm_so + len - 1));
+            } else {
+                Jim_AppendString(interp, resultObj, source_str + pmatch[j].rm_so, len);
+            }
+        }
+
+        if (opt_inline) {
+            Jim_ListAppendElement(interp, resultListObj, resultObj);
+        }
+        else {
+            /* And now set the result variable */
+            result = Jim_SetVariable(interp, argv[i], resultObj);
+
+            if (result != JIM_OK) {
+                Jim_SetResult(interp, Jim_NewEmptyStringObj(interp));
+                Jim_AppendStrings(interp, Jim_GetResult(interp), "couldn't set variable \"", Jim_GetString(argv[i], NULL), "\"", NULL);
+                Jim_FreeObj(interp, resultObj);
+                break;
+            }
+        }
+    }
+
+    try_next_match:
+    if (opt_all && pattern[0] != '^' && *source_str) {
+        if (pmatch[0].rm_eo) {
+            source_str += pmatch[0].rm_eo;
+        }
+        else {
+            source_str++;
+        }
+        if (*source_str) {
+            goto next_match;
+        }
+    }
+
+    done:
+    if (result == JIM_OK) {
+        if (opt_inline) {
+            Jim_SetResult(interp, resultListObj);
+        }
+        else {
+            Jim_SetIntResult(interp, num_matches);
+        }
+    }
+
+    Jim_Free(pmatch);
+    regfree(regex);
+    Jim_Free(regex);
+    return result;
+}
+
+#define MAX_SUB_MATCHES 10
+
+int Jim_RegsubCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv)
+{
+    int regcomp_flags = 0;
+    int opt_all = 0;
+    long offset = 0;
+    regex_t *regex;
+    const char *p;
+    int result = JIM_ERR;
+    regmatch_t pmatch[MAX_SUB_MATCHES + 1];
+    int num_matches = 0;
+
+    int i;
+    Jim_Obj *varname;
+    Jim_Obj *resultObj;
+    const char *source_str;
+    int source_len;
+    const char *replace_str;
+    const char *pattern;
+
+    if (argc < 5) {
+        wrongNumArgs:
+        Jim_WrongNumArgs(interp, 1, argv, "?-nocase? ?-all? exp string subSpec varName");
+        return JIM_ERR;
+    }
+
+    for (i = 1; i < argc; i++) {
+        if (Jim_CompareStringImmediate(interp, argv[i], "-nocase")) {
+            regcomp_flags |= REG_ICASE;
+        }
+        else if (Jim_CompareStringImmediate(interp, argv[i], "-line")) {
+            regcomp_flags |= REG_NEWLINE;
+        }
+        else if (Jim_CompareStringImmediate(interp, argv[i], "-all")) {
+            opt_all = 1;
+        }
+        else if (Jim_CompareStringImmediate(interp, argv[i], "-start")) {
+            if (++i == argc) {
+                goto wrongNumArgs;
+            }
+            if (Jim_GetLong(interp, argv[i], &offset) != JIM_OK) {
+                return JIM_ERR;
+            }
+        }
+        else if (Jim_CompareStringImmediate(interp, argv[i], "--")) {
+            i++;
+            break;
+        }
+        else {
+            const char *opt = Jim_GetString(argv[i], NULL);
+            if (*opt == '-') {
+                /* Bad option */
+                goto wrongNumArgs;
+            }
+            break;
+        }
+    }
+    if (argc - i != 4) {
+        goto wrongNumArgs;
+    }
+
+    pattern = Jim_GetString(argv[i], NULL);
+    regex = compile_regexp(interp, pattern, regcomp_flags);
+    if (regex == NULL) {
+        return JIM_ERR;
+    }
+
+    source_str = Jim_GetString(argv[i + 1], &source_len);
+    replace_str = Jim_GetString(argv[i + 2], NULL);
+    varname = argv[i + 3];
+
+    /* Create the result string */
+    resultObj = Jim_NewStringObj(interp, "", 0);
+
+    /* If an offset has been specified, adjust for that now.
+     * If it points past the end of the string, point to the terminating null
+     */
+    if (offset) {
+        if (offset > source_len) {
+            offset = source_len;
+        } else if (offset < 0) {
+            offset = 0;
+        }
+    }
+
+    /* Copy the part before -start */
+    Jim_AppendString(interp, resultObj, source_str, offset);
+
+    /*
+     * The following loop is to handle multiple matches within the
+     * same source string;  each iteration handles one match and its
+     * corresponding substitution.  If "-all" hasn't been specified
+     * then the loop body only gets executed once.
+     */
+
+    for (p = source_str + offset; *p != 0; ) {
+        const char *src;
+        int match = regexec(regex, p, MAX_SUB_MATCHES, pmatch, 0);
+        if (match >= REG_BADPAT) {
+            char buf[100];
+            regerror(match, regex, buf, sizeof(buf));
+            Jim_SetResultString(interp, "", 0);
+            Jim_AppendStrings(interp, Jim_GetResult(interp), "error while matching pattern: ", buf, NULL);
+            goto done;
+        }
+        if (match == REG_NOMATCH) {
+            break;
+        }
+
+        num_matches++;
+
+        /*
+         * Copy the portion of the source string before the match to the
+         * result variable.
+         */
+        Jim_AppendString(interp, resultObj, p, pmatch[0].rm_so);
+
+        /*
+         * Append the subSpec (replace_str) argument to the variable, making appropriate
+         * substitutions.  This code is a bit hairy because of the backslash
+         * conventions and because the code saves up ranges of characters in
+         * subSpec to reduce the number of calls to Jim_SetVar.
+         */
+    
+        for (src = replace_str; *src; src++) {
+            int index;
+            int c = *src;
+
+            if (c == '&') {
+                index = 0;
+            }
+            else if (c == '\\') {
+                c = *++src;
+                if ((c >= '0') && (c <= '9')) {
+                    index = c - '0';
+                }
+                else if ((c == '\\') || (c == '&')) {
+                    Jim_AppendString(interp, resultObj, src, 1);
+                    continue;
+                }
+                else {
+                    Jim_AppendString(interp, resultObj, src - 1, 2);
+                    continue;
+                }
+            }
+            else {
+                Jim_AppendString(interp, resultObj, src, 1);
+                continue;
+            }
+            if ((index < MAX_SUB_MATCHES) && pmatch[index].rm_so != -1 && pmatch[index].rm_eo != -1) {
+                Jim_AppendString(interp, resultObj, p + pmatch[index].rm_so, pmatch[index].rm_eo - pmatch[index].rm_so);
+            }
+        }
+
+        p += pmatch[0].rm_eo;
+
+        if (!opt_all || pmatch[0].rm_eo == 0 || pattern[0] == '^') {
+            /* If we are doing a single match, or we haven't moved with this match
+             * or this is an anchored match, we stop */
+            break;
+        }
+    }
+
+    /*
+     * Copy the portion of the string after the last match to the
+     * result variable.
+     */
+    Jim_AppendString(interp, resultObj, p, -1);
+
+    /* And now set the result variable */
+    result = Jim_SetVariable(interp, varname, resultObj);
+
+    if (result == JIM_OK) {
+        Jim_SetIntResult(interp, num_matches);
+    }
+    else {
+        Jim_SetResult(interp, Jim_NewEmptyStringObj(interp));
+        Jim_AppendStrings(interp, Jim_GetResult(interp), "couldn't set variable \"", Jim_GetString(varname, NULL), "\"", NULL);
+        Jim_FreeObj(interp, resultObj);
+    }
+
+    done:
+    regfree(regex);
+    Jim_Free(regex);
+    return result;
+}
+
+int Jim_OnLoad(Jim_Interp *interp)
+{
+        Jim_InitExtension(interp);
+        if (Jim_PackageProvide(interp, "regexp", "1.0", JIM_ERRMSG) != JIM_OK) {
+                return JIM_ERR;
+        }
+        Jim_CreateCommand(interp, "regexp", Jim_RegexpCmd, NULL, NULL);
+        Jim_CreateCommand(interp, "regsub", Jim_RegsubCmd, NULL, NULL);
+        return JIM_OK;
+}
diff --git a/test.tcl b/test.tcl
index e425ea3..fc7e670 100644
--- a/test.tcl
+++ b/test.tcl
@@ -1,4 +1,4 @@
-# $Id: test.tcl,v 1.30 2005/04/05 12:18:27 antirez Exp $
+# $Id: test.tcl,v 1.31 2008/11/06 13:31:22 oharboe Exp $
 #
 # This are Tcl tests imported into Jim. Tests that will probably not be passed
 # in the long term are usually removed (for example all the tests about
@@ -4044,6 +4044,581 @@ test scan-13.8 {Tcl_ScanObjCmd, inline XPG case lots of arguments} {
 } {200 10 20 30}
 
 ################################################################################
+# REGEXP and REGSUB
+################################################################################
+
+catch {package require regexp}
+
+test regexp-1.1 {basic regexp operation} {
+    regexp ab*c abbbc
+} {1}
+
+test regexp-1.2 {basic regexp operation} {
+    regexp ab*c ac
+} {1}
+
+test regexp-1.3 {basic regexp operation} {
+    regexp ab*c ab
+} {0}
+
+test regexp-1.4 {basic regexp operation} {
+    regexp -- -gorp abc-gorpxxx
+} {1}
+
+test regexp-1.5 {basic regexp operation} {
+    regexp {^([^ ]*)[ ]*([^ ]*)} "" a
+} {1}
+
+test regexp-1.6 {basic regexp operation} {
+    list [catch {regexp {} abc} msg] $msg
+} {0 1}
+
+test regexp-2.1 {getting substrings back from regexp} {
+    set foo {}
+    list [regexp ab*c abbbbc foo] $foo
+} {1 abbbbc}
+
+test regexp-2.2 {getting substrings back from regexp} {
+    set foo {}
+    set f2 {}
+    list [regexp a(b*)c abbbbc foo f2] $foo $f2
+} {1 abbbbc bbbb}
+
+test regexp-2.3 {getting substrings back from regexp} {
+    set foo {}
+    set f2 {}
+    list [regexp a(b*)(c) abbbbc foo f2] $foo $f2
+} {1 abbbbc bbbb}
+
+test regexp-2.4 {getting substrings back from regexp} {
+    set foo {}
+    set f2 {}
+    set f3 {}
+    list [regexp a(b*)(c) abbbbc foo f2 f3] $foo $f2 $f3
+} {1 abbbbc bbbb c}
+
+test regexp-2.5 {getting substrings back from regexp} {
+    set foo {}; set f1 {}; set f2 {}; set f3 {}; set f4 {}; set f5 {};
+    set f6 {}; set f7 {}; set f8 {}; set f9 {}; set fa {}; set fb {};
+    list [regexp (1*)(2*)(3*)(4*)(5*)(6*)(7*)(8*)(9*)(a*)(b*) \
+	      12223345556789999aabbb \
+	    foo f1 f2 f3 f4 f5 f6 f7 f8 f9 fa fb] $foo $f1 $f2 $f3 $f4 $f5 \
+	    $f6 $f7 $f8 $f9 $fa $fb
+} {1 12223345556789999aabbb 1 222 33 4 555 6 7 8 9999 aa bbb}
+
+test regexp-2.6 {getting substrings back from regexp} {
+    set foo 2; set f2 2; set f3 2; set f4 2
+    list [regexp (a)(b)? xay foo f2 f3 f4] $foo $f2 $f3 $f4
+} {1 a a {} {}}
+
+test regexp-2.7 {getting substrings back from regexp} {
+    set foo 1; set f2 1; set f3 1; set f4 1
+    list [regexp (a)(b)?(c) xacy foo f2 f3 f4] $foo $f2 $f3 $f4
+} {1 ac a {} c}
+
+test regexp-2.8 {getting substrings back from regexp} {
+    set match {}
+    list [regexp {^a*b} aaaab match] $match
+} {1 aaaab}
+
+test regexp-3.1 {-indices option to regexp} {
+    set foo {}
+    list [regexp -indices ab*c abbbbc foo] $foo
+} {1 {0 5}}
+
+test regexp-3.2 {-indices option to regexp} {
+    set foo {}
+    set f2 {}
+    list [regexp -indices a(b*)c abbbbc foo f2] $foo $f2
+} {1 {0 5} {1 4}}
+
+test regexp-3.3 {-indices option to regexp} {
+    set foo {}
+    set f2 {}
+    list [regexp -indices a(b*)(c) abbbbc foo f2] $foo $f2
+} {1 {0 5} {1 4}}
+
+test regexp-3.4 {-indices option to regexp} {
+    set foo {}
+    set f2 {}
+    set f3 {}
+    list [regexp -indices a(b*)(c) abbbbc foo f2 f3] $foo $f2 $f3
+} {1 {0 5} {1 4} {5 5}}
+
+test regexp-3.5 {-indices option to regexp} {
+    set foo {}; set f1 {}; set f2 {}; set f3 {}; set f4 {}; set f5 {};
+    set f6 {}; set f7 {}; set f8 {}; set f9 {}
+    list [regexp -indices (1*)(2*)(3*)(4*)(5*)(6*)(7*)(8*)(9*) \
+	    12223345556789999 \
+	    foo f1 f2 f3 f4 f5 f6 f7 f8 f9] $foo $f1 $f2 $f3 $f4 $f5 \
+	    $f6 $f7 $f8 $f9
+} {1 {0 16} {0 0} {1 3} {4 5} {6 6} {7 9} {10 10} {11 11} {12 12} {13 16}}
+
+test regexp-3.6 {getting substrings back from regexp} {
+    set foo 2; set f2 2; set f3 2; set f4 2
+    list [regexp -indices (a)(b)? xay foo f2 f3 f4] $foo $f2 $f3 $f4
+} {1 {1 1} {1 1} {-1 -1} {-1 -1}}
+
+test regexp-3.7 {getting substrings back from regexp} {
+    set foo 1; set f2 1; set f3 1; set f4 1
+    list [regexp -indices (a)(b)?(c) xacy foo f2 f3 f4] $foo $f2 $f3 $f4
+} {1 {1 2} {1 1} {-1 -1} {2 2}}
+
+test regexp-4.1 {-nocase option to regexp} {
+    regexp -nocase foo abcFOo
+} {1}
+
+test regexp-4.2 {-nocase option to regexp} {
+    set f1 22
+    set f2 33
+    set f3 44
+    list [regexp -nocase {a(b*)([xy]*)z} aBbbxYXxxZ22 f1 f2 f3] $f1 $f2 $f3
+} {1 aBbbxYXxxZ Bbb xYXxx}
+
+test regexp-4.3 {-nocase option to regexp} {
+    regexp -nocase FOo abcFOo
+} {1}
+
+test regexp-4.4 {case conversion in regexp} {
+    set x abcdefghijklmnopqrstuvwxyz1234567890abcdefghijklmnopqrstuvwxyz1234567890abcdefghijklmnopqrstuvwxyz1234567890abcdefghijklmnopqrstuvwxyz1234567890abcdefghijklmnopqrstuvwxyz1234567890abcdefghijklmnopqrstuvwxyz1234567890abcdefghijklmnopqrstuvwxyz1234567890abcdefghijklmnopqrstuvwxyz1234567890abcdefghijklmnopqrstuvwxyz1234567890abcdefghijklmnopqrstuvwxyz1234567890abcdefghijklmnopqrstuvwxyz1234567890abcdefghijklmnopqrstuvwxyz1234567890
+    list [regexp -nocase $x $x foo] $foo
+} {1 abcdefghijklmnopqrstuvwxyz1234567890abcdefghijklmnopqrstuvwxyz1234567890abcdefghijklmnopqrstuvwxyz1234567890abcdefghijklmnopqrstuvwxyz1234567890abcdefghijklmnopqrstuvwxyz1234567890abcdefghijklmnopqrstuvwxyz1234567890abcdefghijklmnopqrstuvwxyz1234567890abcdefghijklmnopqrstuvwxyz1234567890abcdefghijklmnopqrstuvwxyz1234567890abcdefghijklmnopqrstuvwxyz1234567890abcdefghijklmnopqrstuvwxyz1234567890abcdefghijklmnopqrstuvwxyz1234567890}
+
+test regexp-5.1 {exercise cache of compiled expressions} {
+    regexp .*a b
+    regexp .*b c
+    regexp .*c d
+    regexp .*d e
+    regexp .*e f
+    regexp .*a bbba
+} {1}
+
+test regexp-5.2 {exercise cache of compiled expressions} {
+    regexp .*a b
+    regexp .*b c
+    regexp .*c d
+    regexp .*d e
+    regexp .*e f
+    regexp .*b xxxb
+} {1}
+
+test regexp-5.3 {exercise cache of compiled expressions} {
+    regexp .*a b
+    regexp .*b c
+    regexp .*c d
+    regexp .*d e
+    regexp .*e f
+    regexp .*c yyyc
+} {1}
+
+test regexp-5.4 {exercise cache of compiled expressions} {
+    regexp .*a b
+    regexp .*b c
+    regexp .*c d
+    regexp .*d e
+    regexp .*e f
+    regexp .*d 1d
+} {1}
+
+test regexp-5.5 {exercise cache of compiled expressions} {
+    regexp .*a b
+    regexp .*b c
+    regexp .*c d
+    regexp .*d e
+    regexp .*e f
+    regexp .*e xe
+} {1}
+
+test regexp-6.1 {regexp errors} {
+    list [catch {regexp a} msg] $msg
+} {1 {wrong # args: should be "regexp ?-nocase? ?-line? ?-indices? ?-start offset? ?-all? ?-inline? exp string ?matchVar? ?subMatchVar ...?"}}
+
+test regexp-6.2 {regexp errors} {
+    list [catch {regexp -nocase a} msg] $msg
+} {1 {wrong # args: should be "regexp ?-nocase? ?-line? ?-indices? ?-start offset? ?-all? ?-inline? exp string ?matchVar? ?subMatchVar ...?"}}
+
+test regexp-6.3 {regexp errors} {
+    list [catch {regexp -gorp a} msg] $msg
+} {1 {wrong # args: should be "regexp ?-nocase? ?-line? ?-indices? ?-start offset? ?-all? ?-inline? exp string ?matchVar? ?subMatchVar ...?"}}
+
+test regexp-6.4 {regexp errors} {
+    list [catch {regexp a( b} msg] $msg
+} {1 {couldn't compile regular expression pattern: parentheses not balanced}}
+
+test regexp-6.5 {regexp errors} {
+    list [catch {regexp a( b} msg] $msg
+} {1 {couldn't compile regular expression pattern: parentheses not balanced}}
+
+test regexp-6.6 {regexp errors} {
+    list [catch {regexp a a f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1 f1} msg] $msg
+} {0 1}
+
+test regexp-6.7 {regexp errors} {
+    list [catch {regexp (x)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.) xyzzy} msg] $msg
+} {0 0}
+
+test regexp-6.8 {regexp errors} {
+    catch {unset f1}
+    set f1 44
+    list [catch {regexp abc abc f1(f2)} msg] $msg
+} {1 {couldn't set variable "f1(f2)"}}
+
+test regexp-6.9 {regexp errors, -start bad int check} {
+    list [catch {regexp -start bogus {^$} {}} msg] $msg
+} {1 {expected integer but got "bogus"}}
+
+test regexp-7.1 {basic regsub operation} {
+    list [regsub aa+ xaxaaaxaa 111&222 foo] $foo
+} {1 xax111aaa222xaa}
+
+test regexp-7.2 {basic regsub operation} {
+    list [regsub aa+ aaaxaa &111 foo] $foo
+} {1 aaa111xaa}
+
+test regexp-7.3 {basic regsub operation} {
+    list [regsub aa+ xaxaaa 111& foo] $foo
+} {1 xax111aaa}
+
+test regexp-7.4 {basic regsub operation} {
+    list [regsub aa+ aaa 11&2&333 foo] $foo
+} {1 11aaa2aaa333}
+
+test regexp-7.5 {basic regsub operation} {
+    list [regsub aa+ xaxaaaxaa &2&333 foo] $foo
+} {1 xaxaaa2aaa333xaa}
+
+test regexp-7.6 {basic regsub operation} {
+    list [regsub aa+ xaxaaaxaa 1&22& foo] $foo
+} {1 xax1aaa22aaaxaa}
+
+test regexp-7.7 {basic regsub operation} {
+    list [regsub a(a+) xaxaaaxaa {1\122\1} foo] $foo
+} {1 xax1aa22aaxaa}
+
+test regexp-7.8 {basic regsub operation} {
+    list [regsub a(a+) xaxaaaxaa {1\\\122\1} foo] $foo
+} {1 {xax1\aa22aaxaa}}
+
+test regexp-7.9 {basic regsub operation} {
+    list [regsub a(a+) xaxaaaxaa {1\\122\1} foo] $foo
+} {1 {xax1\122aaxaa}}
+
+test regexp-7.10 {basic regsub operation} {
+    list [regsub a(a+) xaxaaaxaa {1\\&\1} foo] $foo
+} {1 {xax1\aaaaaxaa}}
+
+test regexp-7.11 {basic regsub operation} {
+    list [regsub a(a+) xaxaaaxaa {1\&\1} foo] $foo
+} {1 xax1&aaxaa}
+
+test regexp-7.12 {basic regsub operation} {
+    list [regsub a(a+) xaxaaaxaa {\1\1\1\1&&} foo] $foo
+} {1 xaxaaaaaaaaaaaaaaxaa}
+
+test regexp-7.13 {basic regsub operation} {
+    set foo xxx
+    list [regsub abc xyz 111 foo] $foo
+} {0 xyz}
+
+test regexp-7.14 {basic regsub operation} {
+    set foo xxx
+    list [regsub ^ xyz "111 " foo] $foo
+} {1 {111 xyz}}
+
+test regexp-7.15 {basic regsub operation} {
+    set foo xxx
+    list [regsub -- -foo abc-foodef "111 " foo] $foo
+} {1 {abc111 def}}
+
+test regexp-7.16 {basic regsub operation} {
+    set foo xxx
+    list [regsub x "" y foo] $foo
+} {0 {}}
+
+test regexp-8.1 {case conversion in regsub} {
+    list [regsub -nocase a(a+) xaAAaAAay & foo] $foo
+} {1 xaAAaAAay}
+
+test regexp-8.2 {case conversion in regsub} {
+    list [regsub -nocase a(a+) xaAAaAAay & foo] $foo
+} {1 xaAAaAAay}
+
+test regexp-8.3 {case conversion in regsub} {
+    set foo 123
+    list [regsub a(a+) xaAAaAAay & foo] $foo
+} {0 xaAAaAAay}
+
+test regexp-8.4 {case conversion in regsub} {
+    set foo 123
+    list [regsub -nocase a CaDE b foo] $foo
+} {1 CbDE}
+
+test regexp-8.5 {case conversion in regsub} {
+    set foo 123
+    list [regsub -nocase XYZ CxYzD b foo] $foo
+} {1 CbD}
+
+test regexp-8.6 {case conversion in regsub} {
+    set x abcdefghijklmnopqrstuvwxyz1234567890
+    set x $x$x$x$x$x$x$x$x$x$x$x$x
+    set foo 123
+    list [regsub -nocase $x $x b foo] $foo
+} {1 b}
+
+test regexp-9.1 {-all option to regsub} {
+    set foo 86
+    list [regsub -all x+ axxxbxxcxdx |&| foo] $foo
+} {4 a|xxx|b|xx|c|x|d|x|}
+
+test regexp-9.2 {-all option to regsub} {
+    set foo 86
+    list [regsub -nocase -all x+ aXxXbxxcXdx |&| foo] $foo
+} {4 a|XxX|b|xx|c|X|d|x|}
+
+test regexp-9.3 {-all option to regsub} {
+    set foo 86
+    list [regsub x+ axxxbxxcxdx |&| foo] $foo
+} {1 a|xxx|bxxcxdx}
+
+test regexp-9.4 {-all option to regsub} {
+    set foo 86
+    list [regsub -all bc axxxbxxcxdx |&| foo] $foo
+} {0 axxxbxxcxdx}
+
+test regexp-9.5 {-all option to regsub} {
+    set foo xxx
+    list [regsub -all node "node node more" yy foo] $foo
+} {2 {yy yy more}}
+
+test regexp-9.6 {-all option to regsub} {
+    set foo xxx
+    list [regsub -all ^ xxx 123 foo] $foo
+} {1 123xxx}
+
+test regexp-10.2 {newline sensitivity in regsub} {
+    set foo xxx
+    list [regsub -line {^a.*b$} "dabc\naxyb\n" 123 foo] $foo
+} {1 {dabc
+123
+}}
+
+test regexp-10.3 {newline sensitivity in regsub} {
+    set foo xxx
+    list [regsub -line {^a.*b$} "dabc\naxyb\nxb" 123 foo] $foo
+} {1 {dabc
+123
+xb}}
+
+test regexp-11.1 {regsub errors} {
+    list [catch {regsub a b c} msg] $msg
+} {1 {wrong # args: should be "regsub ?-nocase? ?-all? exp string subSpec varName"}}
+
+test regexp-11.2 {regsub errors} {
+    list [catch {regsub -nocase a b c} msg] $msg
+} {1 {wrong # args: should be "regsub ?-nocase? ?-all? exp string subSpec varName"}}
+
+test regexp-11.3 {regsub errors} {
+    list [catch {regsub -nocase -all a b c} msg] $msg
+} {1 {wrong # args: should be "regsub ?-nocase? ?-all? exp string subSpec varName"}}
+
+test regexp-11.4 {regsub errors} {
+    list [catch {regsub a b c d e f} msg] $msg
+} {1 {wrong # args: should be "regsub ?-nocase? ?-all? exp string subSpec varName"}}
+
+test regexp-11.5 {regsub errors} {
+    list [catch {regsub -gorp a b c} msg] $msg
+} {1 {wrong # args: should be "regsub ?-nocase? ?-all? exp string subSpec varName"}}
+
+test regexp-11.6 {regsub errors} {
+    list [catch {regsub -nocase a( b c d} msg] $msg
+} {1 {couldn't compile regular expression pattern: parentheses not balanced}}
+
+test regexp-11.7 {regsub errors} {
+    catch {unset f1}
+    set f1 44
+    list [catch {regsub -nocase aaa aaa xxx f1(f2)} msg] $msg
+} {1 {couldn't set variable "f1(f2)"}}
+
+test regexp-11.8 {regsub errors, -start bad int check} {
+    list [catch {regsub -start bogus pattern string rep var} msg] $msg
+} {1 {expected integer but got "bogus"}}
+
+test regexp-12.1 {Tcl_RegExpExec: large number of subexpressions} {
+    list [regexp (.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.) abcdefghijklmnopqrstuvwxyz all a b c d e f g h i j k l m n o p q r s t u v w x y z] $all $a $b $c $d $e $f $g $h $i $j $k $l $m $n $o $p $q $r $s $t $u $v $w $x $y $z
+} {1 abcdefghijklmnopqrstuvwxyz a b c d e f g h i j k l m n o p q r s t u v w x y z}
+
+test regexp-13.1 {regsub of a very large string} {
+    # This test is designed to stress the memory subsystem in order
+    # to catch Bug #933.  It only fails if the Tcl memory allocator
+    # is in use.
+
+    set line {BEGIN_TABLE ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; END_TABLE}
+    set filedata [string repeat $line 200]
+    for {set i 1} {$i<10} {incr i} {
+	regsub -all "BEGIN_TABLE " $filedata "" newfiledata
+    }
+    set x done
+} {done}
+
+test regexp-14.1 {CompileRegexp: regexp cache} {
+    regexp .*a b
+    regexp .*b c
+    regexp .*c d
+    regexp .*d e
+    regexp .*e f
+    set x .
+    append x *a
+    regexp $x bbba
+} {1}
+
+test regexp-14.2 {CompileRegexp: regexp cache, different flags} {
+    regexp .*a b
+    regexp .*b c
+    regexp .*c d
+    regexp .*d e
+    regexp .*e f
+    set x .
+    append x *a
+    regexp -nocase $x bbba
+} {1}
+
+test regexp-15.1 {regexp -start} {
+    catch {unset x}
+    list [regexp -start -10 {[0-9]} 1abc2de3 x] $x
+} {1 1}
+
+test regexp-15.2 {regexp -start} {
+    catch {unset x}
+    list [regexp -start 2 {[0-9]} 1abc2de3 x] $x
+} {1 2}
+
+test regexp-15.3 {regexp -start} {
+    catch {unset x}
+    list [regexp -start 4 {[0-9]} 1abc2de3 x] $x
+} {1 2}
+
+test regexp-15.4 {regexp -start} {
+    catch {unset x}
+    list [regexp -start 5 {[0-9]} 1abc2de3 x] $x
+} {1 3}
+
+test regexp-15.5 {regexp -start, over end of string} {
+    catch {unset x}
+    list [regexp -start [string length 1abc2de3] {[0-9]} 1abc2de3 x] [info exists x]
+} {0 0}
+
+test regexp-15.6 {regexp -start, loss of ^$ behavior} {
+    list [regexp -start 2 {^$} {}]
+} {1}
+
+test regexp-16.1 {regsub -start} {
+    catch {unset x}
+    list [regsub -all -start 2 {[0-9]} a1b2c3d4e5 {/&} x] $x
+} {4 a1b/2c/3d/4e/5}
+
+test regexp-16.2 {regsub -start} {
+    catch {unset x}
+    list [regsub -all -start -25 {z} hello {/&} x] $x
+} {0 hello}
+
+test regexp-16.3 {regsub -start} {
+    catch {unset x}
+    list [regsub -all -start 3 {z} hello {/&} x] $x
+} {0 hello}
+
+test regexp-17.1 {regexp -inline} {
+    regexp -inline b ababa
+} {b}
+
+test regexp-17.2 {regexp -inline} {
+    regexp -inline (b) ababa
+} {b b}
+
+test regexp-17.3 {regexp -inline -indices} {
+    regexp -inline -indices (b) ababa
+} {{1 1} {1 1}}
+
+test regexp-17.4 {regexp -inline} {
+    regexp -inline {[[:alnum:]_]([0-9]+)[[:alnum:]_]} "   hello 23 there456def "
+} {e456d 456}
+
+test regexp-17.5 {regexp -inline no matches} {
+    regexp -inline {[[:alnum:]_]([0-9]+)[[:alnum:]_]} ""
+} {}
+
+test regexp-17.6 {regexp -inline no matches} {
+    regexp -inline hello goodbye
+} {}
+
+test regexp-17.7 {regexp -inline, no matchvars allowed} {
+    list [catch {regexp -inline b abc match} msg] $msg
+} {1 {regexp match variables not allowed when using -inline}}
+
+test regexp-18.1 {regexp -all} {
+    regexp -all b bbbbb
+} {5}
+
+test regexp-18.2 {regexp -all} {
+    regexp -all b abababbabaaaaaaaaaab
+} {6}
+
+test regexp-18.3 {regexp -all -inline} {
+    regexp -all -inline b abababbabaaaaaaaaaab
+} {b b b b b b}
+
+test regexp-18.4 {regexp -all -inline} {
+    regexp -all -inline {[[:alnum:]_]([[:alnum:]_])} abcdefg
+} {ab b cd d ef f}
+
+test regexp-18.5 {regexp -all -inline} {
+    regexp -all -inline {[[:alnum:]_]([[:alnum:]_])$} abcdefg
+} {fg g}
+
+test regexp-18.6 {regexp -all -inline} {
+    regexp -all -inline {[0-9]+} 10:20:30:40
+} {10 20 30 40}
+
+test regexp-18.7 {regexp -all -inline} {
+    list [catch {regexp -all -inline b abc match} msg] $msg
+} {1 {regexp match variables not allowed when using -inline}}
+
+test regexp-18.8 {regexp -all} {
+    # This should not cause an infinite loop
+    regexp -all -inline {a*} a
+} {a}
+
+test regexp-18.9 {regexp -all} {
+    # Yes, the expected result is {a {}}.  Here's why:
+    # Start at index 0; a* matches the "a" there then stops.
+    # Go to index 1; a* matches the lambda (or {}) there then stops.  Recall
+    #   that a* matches zero or more "a"'s; thus it matches the string "b", as
+    #   there are zero or more "a"'s there.
+    # Go to index 2; this is past the end of the string, so stop.
+    regexp -all -inline {a*} ab
+} {a {}}
+
+test regexp-18.10 {regexp -all} {
+    # Yes, the expected result is {a {} a}.  Here's why:
+    # Start at index 0; a* matches the "a" there then stops.
+    # Go to index 1; a* matches the lambda (or {}) there then stops.   Recall
+    #   that a* matches zero or more "a"'s; thus it matches the string "b", as
+    #   there are zero or more "a"'s there.
+    # Go to index 2; a* matches the "a" there then stops.
+    # Go to index 3; this is past the end of the string, so stop.
+    regexp -all -inline {a*} aba
+} {a {} a}
+
+test regexp-18.11 {regexp -all} {
+    regexp -all -inline {^a} aaaa
+} {a}
+
+test regexp-19.1 {regsub null replacement} {
+    regsub -all {@} {@hel@lo@} "\0a\0" result
+    list $result [string length $result]
+} {hello 5}
+
+
+################################################################################
 # RANGE
 ################################################################################
author	oharboe <oharboe>	2008-11-06 13:31:22 +0000
committer	oharboe <oharboe>	2008-11-06 13:31:22 +0000
commit	fefae9375a3cdec3d39a36ccc9754e5d5a7c4cca (patch)
tree	67b78310c9c0cc52b05d12710830c4016c09db9f
parent	5ccb870e18de05cbb90a0c1b90bd647f3b30b94c (diff)
download	jimtcl-fefae9375a3cdec3d39a36ccc9754e5d5a7c4cca.zip jimtcl-fefae9375a3cdec3d39a36ccc9754e5d5a7c4cca.tar.gz jimtcl-fefae9375a3cdec3d39a36ccc9754e5d5a7c4cca.tar.bz2