diff options
author | Steve Bennett <steveb@workware.net.au> | 2009-07-28 17:25:37 +1000 |
---|---|---|
committer | Steve Bennett <steveb@workware.net.au> | 2010-10-15 10:11:02 +1000 |
commit | b08812254a5da3f3d3331b05962d479db2aab314 (patch) | |
tree | 5a84fe7674221046106113c30150490c2826010e /jim-regexp.c | |
parent | 6461e8bf6ddf82ee974b4c63458915490d74a5ce (diff) | |
download | jimtcl-b08812254a5da3f3d3331b05962d479db2aab314.zip jimtcl-b08812254a5da3f3d3331b05962d479db2aab314.tar.gz jimtcl-b08812254a5da3f3d3331b05962d479db2aab314.tar.bz2 |
Convert package to use jim-subcmd
Also, no need to install tcl extensions
Display any errors from loading static extensions
Diffstat (limited to 'jim-regexp.c')
-rw-r--r-- | jim-regexp.c | 962 |
1 files changed, 481 insertions, 481 deletions
diff --git a/jim-regexp.c b/jim-regexp.c index 051d060..e3c194e 100644 --- a/jim-regexp.c +++ b/jim-regexp.c @@ -1,481 +1,481 @@ -/*
- * (c) 2008 Steve Bennett <steveb@workware.net.au>
- *
- * Implements the regexp and regsub commands for Jim
- *
- * Uses C library regcomp()/regexec() for the matching.
- *
- * The FreeBSD license
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE JIM TCL PROJECT ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
- * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
- * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- * JIM TCL PROJECT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
- * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
- * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
- * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * The views and conclusions contained in the software and documentation
- * are those of the authors and should not be interpreted as representing
- * official policies, either expressed or implied, of the Jim Tcl Project.
- *
- * Based on code originally from Tcl 6.7:
- *
- * Copyright 1987-1991 Regents of the University of California
- * Permission to use, copy, modify, and distribute this
- * software and its documentation for any purpose and without
- * fee is hereby granted, provided that the above copyright
- * notice appear in all copies. The University of California
- * makes no representations about the suitability of this
- * software for any purpose. It is provided "as is" without
- * express or implied warranty.
- */
-
-#include <regex.h>
-#include <string.h>
-
-#define JIM_EXTENSION
-#include "jim.h"
-
-/* REVISIT: Would be useful in jim.h */
-static void Jim_SetIntResult(Jim_Interp *interp, jim_wide wide)
-{
- Jim_SetResult(interp, Jim_NewIntObj(interp, wide));
-}
-
-/**
- * REVISIT: Should cache a number of compiled regexps for performance reasons.
- */
-static regex_t *
-compile_regexp(Jim_Interp *interp, const char *pattern, int flags)
-{
- int ret;
-
- regex_t *result = (regex_t *)Jim_Alloc(sizeof(*result));
-
- if ((ret = regcomp(result, pattern, REG_EXTENDED | flags)) != 0) {
- char buf[100];
- regerror(ret, result, buf, sizeof(buf));
- Jim_SetResult(interp, Jim_NewEmptyStringObj(interp));
- Jim_AppendStrings(interp, Jim_GetResult(interp), "couldn't compile regular expression pattern: ", buf, NULL);
- Jim_Free(result);
- return NULL;
- }
- return result;
-}
-
-int Jim_RegexpCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv)
-{
- int opt_indices = 0;
- int opt_all = 0;
- int opt_inline = 0;
- regex_t *regex;
- int match, i, j;
- long offset = 0;
- regmatch_t *pmatch = NULL;
- int source_len;
- int result = JIM_OK;
- const char *pattern;
- const char *source_str;
- int num_matches = 0;
- int num_vars;
- Jim_Obj *resultListObj = NULL;
- int regcomp_flags = 0;
-
- if (argc < 3) {
- wrongNumArgs:
- Jim_WrongNumArgs(interp, 1, argv, "?-nocase? ?-line? ?-indices? ?-start offset? ?-all? ?-inline? exp string ?matchVar? ?subMatchVar ...?");
- return JIM_ERR;
- }
-
- for (i = 1; i < argc; i++) {
- if (Jim_CompareStringImmediate(interp, argv[i], "-indices")) {
- opt_indices = 1;
- }
- else if (Jim_CompareStringImmediate(interp, argv[i], "-nocase")) {
- regcomp_flags |= REG_ICASE;
- }
- else if (Jim_CompareStringImmediate(interp, argv[i], "-line")) {
- regcomp_flags |= REG_NEWLINE;
- }
- else if (Jim_CompareStringImmediate(interp, argv[i], "-all")) {
- opt_all = 1;
- }
- else if (Jim_CompareStringImmediate(interp, argv[i], "-inline")) {
- opt_inline = 1;
- }
- else if (Jim_CompareStringImmediate(interp, argv[i], "-start")) {
- if (++i == argc) {
- goto wrongNumArgs;
- }
- if (Jim_GetLong(interp, argv[i], &offset) != JIM_OK) {
- return JIM_ERR;
- }
- }
- else if (Jim_CompareStringImmediate(interp, argv[i], "--")) {
- i++;
- break;
- }
- else {
- const char *opt = Jim_GetString(argv[i], NULL);
- if (*opt == '-') {
- /* Bad option */
- goto wrongNumArgs;
- }
- break;
- }
- }
- if (argc - i < 2) {
- goto wrongNumArgs;
- }
-
- pattern = Jim_GetString(argv[i], NULL);
- regex = compile_regexp(interp, pattern, regcomp_flags);
- if (regex == NULL) {
- return JIM_ERR;
- }
-
- source_str = Jim_GetString(argv[i + 1], &source_len);
-
- num_vars = argc - i - 2;
-
- if (opt_inline) {
- if (num_vars) {
- Jim_SetResultString(interp, "regexp match variables not allowed when using -inline", -1);
- result = JIM_ERR;
- goto done;
- }
- /* REVISIT: Ugly! */
- num_vars = 100;
- }
-
- pmatch = Jim_Alloc((num_vars + 1) * sizeof(*pmatch));
-
- /* If an offset has been specified, adjust for that now.
- * If it points past the end of the string, point to the terminating null
- */
- if (offset) {
- if (offset > source_len) {
- source_str += source_len;
- } else if (offset > 0) {
- source_str += offset;
- }
- }
-
- if (opt_inline) {
- resultListObj = Jim_NewListObj(interp, NULL, 0);
- }
-
- next_match:
- match = regexec(regex, source_str, num_vars + 1, pmatch, 0);
- if (match >= REG_BADPAT) {
- char buf[100];
- regerror(match, regex, buf, sizeof(buf));
- Jim_SetResultString(interp, "", 0);
- Jim_AppendStrings(interp, Jim_GetResult(interp), "error while matching pattern: ", buf, NULL);
- result = JIM_ERR;
- goto done;
- }
-
- if (match == REG_NOMATCH) {
- goto done;
- }
-
- num_matches++;
-
- if (opt_all && !opt_inline) {
- /* Just count the number of matches, so skip the substitution h*/
- goto try_next_match;
- }
-
- /*
- * If additional variable names have been specified, return
- * index information in those variables.
- */
-
- //fprintf(stderr, "source_str=%s, [0].rm_eo=%d\n", source_str, pmatch[0].rm_eo);
-
- j = 0;
- for (i += 2; opt_inline ? pmatch[j].rm_so != -1 : i < argc; i++, j++) {
- Jim_Obj *resultObj;
-
- if (opt_indices) {
- resultObj = Jim_NewListObj(interp, NULL, 0);
- }
- else {
- resultObj = Jim_NewStringObj(interp, "", 0);
- }
-
- if (pmatch[j].rm_so == -1) {
- if (opt_indices) {
- Jim_ListAppendElement(interp, resultObj, Jim_NewIntObj(interp, -1));
- Jim_ListAppendElement(interp, resultObj, Jim_NewIntObj(interp, -1));
- }
- } else {
- int len = pmatch[j].rm_eo - pmatch[j].rm_so;
- if (opt_indices) {
- Jim_ListAppendElement(interp, resultObj, Jim_NewIntObj(interp, offset + pmatch[j].rm_so));
- Jim_ListAppendElement(interp, resultObj, Jim_NewIntObj(interp, offset + pmatch[j].rm_so + len - 1));
- } else {
- Jim_AppendString(interp, resultObj, source_str + pmatch[j].rm_so, len);
- }
- }
-
- if (opt_inline) {
- Jim_ListAppendElement(interp, resultListObj, resultObj);
- }
- else {
- /* And now set the result variable */
- result = Jim_SetVariable(interp, argv[i], resultObj);
-
- if (result != JIM_OK) {
- Jim_SetResult(interp, Jim_NewEmptyStringObj(interp));
- Jim_AppendStrings(interp, Jim_GetResult(interp), "couldn't set variable \"", Jim_GetString(argv[i], NULL), "\"", NULL);
- Jim_FreeObj(interp, resultObj);
- break;
- }
- }
- }
-
- try_next_match:
- if (opt_all && pattern[0] != '^' && *source_str) {
- if (pmatch[0].rm_eo) {
- source_str += pmatch[0].rm_eo;
- }
- else {
- source_str++;
- }
- if (*source_str) {
- goto next_match;
- }
- }
-
- done:
- if (result == JIM_OK) {
- if (opt_inline) {
- Jim_SetResult(interp, resultListObj);
- }
- else {
- Jim_SetIntResult(interp, num_matches);
- }
- }
-
- Jim_Free(pmatch);
- regfree(regex);
- Jim_Free(regex);
- return result;
-}
-
-#define MAX_SUB_MATCHES 10
-
-int Jim_RegsubCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv)
-{
- int regcomp_flags = 0;
- int opt_all = 0;
- long offset = 0;
- regex_t *regex;
- const char *p;
- int result = JIM_ERR;
- regmatch_t pmatch[MAX_SUB_MATCHES + 1];
- int num_matches = 0;
-
- int i;
- Jim_Obj *varname;
- Jim_Obj *resultObj;
- const char *source_str;
- int source_len;
- const char *replace_str;
- const char *pattern;
-
- if (argc < 5) {
- wrongNumArgs:
- Jim_WrongNumArgs(interp, 1, argv, "?-nocase? ?-all? exp string subSpec varName");
- return JIM_ERR;
- }
-
- for (i = 1; i < argc; i++) {
- if (Jim_CompareStringImmediate(interp, argv[i], "-nocase")) {
- regcomp_flags |= REG_ICASE;
- }
- else if (Jim_CompareStringImmediate(interp, argv[i], "-line")) {
- regcomp_flags |= REG_NEWLINE;
- }
- else if (Jim_CompareStringImmediate(interp, argv[i], "-all")) {
- opt_all = 1;
- }
- else if (Jim_CompareStringImmediate(interp, argv[i], "-start")) {
- if (++i == argc) {
- goto wrongNumArgs;
- }
- if (Jim_GetLong(interp, argv[i], &offset) != JIM_OK) {
- return JIM_ERR;
- }
- }
- else if (Jim_CompareStringImmediate(interp, argv[i], "--")) {
- i++;
- break;
- }
- else {
- const char *opt = Jim_GetString(argv[i], NULL);
- if (*opt == '-') {
- /* Bad option */
- goto wrongNumArgs;
- }
- break;
- }
- }
- if (argc - i != 4) {
- goto wrongNumArgs;
- }
-
- pattern = Jim_GetString(argv[i], NULL);
- regex = compile_regexp(interp, pattern, regcomp_flags);
- if (regex == NULL) {
- return JIM_ERR;
- }
-
- source_str = Jim_GetString(argv[i + 1], &source_len);
- replace_str = Jim_GetString(argv[i + 2], NULL);
- varname = argv[i + 3];
-
- /* Create the result string */
- resultObj = Jim_NewStringObj(interp, "", 0);
-
- /* If an offset has been specified, adjust for that now.
- * If it points past the end of the string, point to the terminating null
- */
- if (offset) {
- if (offset > source_len) {
- offset = source_len;
- } else if (offset < 0) {
- offset = 0;
- }
- }
-
- /* Copy the part before -start */
- Jim_AppendString(interp, resultObj, source_str, offset);
-
- /*
- * The following loop is to handle multiple matches within the
- * same source string; each iteration handles one match and its
- * corresponding substitution. If "-all" hasn't been specified
- * then the loop body only gets executed once.
- */
-
- for (p = source_str + offset; *p != 0; ) {
- const char *src;
- int match = regexec(regex, p, MAX_SUB_MATCHES, pmatch, 0);
- if (match >= REG_BADPAT) {
- char buf[100];
- regerror(match, regex, buf, sizeof(buf));
- Jim_SetResultString(interp, "", 0);
- Jim_AppendStrings(interp, Jim_GetResult(interp), "error while matching pattern: ", buf, NULL);
- goto done;
- }
- if (match == REG_NOMATCH) {
- break;
- }
-
- num_matches++;
-
- /*
- * Copy the portion of the source string before the match to the
- * result variable.
- */
- Jim_AppendString(interp, resultObj, p, pmatch[0].rm_so);
-
- /*
- * Append the subSpec (replace_str) argument to the variable, making appropriate
- * substitutions. This code is a bit hairy because of the backslash
- * conventions and because the code saves up ranges of characters in
- * subSpec to reduce the number of calls to Jim_SetVar.
- */
-
- for (src = replace_str; *src; src++) {
- int index;
- int c = *src;
-
- if (c == '&') {
- index = 0;
- }
- else if (c == '\\') {
- c = *++src;
- if ((c >= '0') && (c <= '9')) {
- index = c - '0';
- }
- else if ((c == '\\') || (c == '&')) {
- Jim_AppendString(interp, resultObj, src, 1);
- continue;
- }
- else {
- Jim_AppendString(interp, resultObj, src - 1, 2);
- continue;
- }
- }
- else {
- Jim_AppendString(interp, resultObj, src, 1);
- continue;
- }
- if ((index < MAX_SUB_MATCHES) && pmatch[index].rm_so != -1 && pmatch[index].rm_eo != -1) {
- Jim_AppendString(interp, resultObj, p + pmatch[index].rm_so, pmatch[index].rm_eo - pmatch[index].rm_so);
- }
- }
-
- p += pmatch[0].rm_eo;
-
- if (!opt_all || pmatch[0].rm_eo == 0 || pattern[0] == '^') {
- /* If we are doing a single match, or we haven't moved with this match
- * or this is an anchored match, we stop */
- break;
- }
- }
-
- /*
- * Copy the portion of the string after the last match to the
- * result variable.
- */
- Jim_AppendString(interp, resultObj, p, -1);
-
- /* And now set the result variable */
- result = Jim_SetVariable(interp, varname, resultObj);
-
- if (result == JIM_OK) {
- Jim_SetIntResult(interp, num_matches);
- }
- else {
- Jim_SetResult(interp, Jim_NewEmptyStringObj(interp));
- Jim_AppendStrings(interp, Jim_GetResult(interp), "couldn't set variable \"", Jim_GetString(varname, NULL), "\"", NULL);
- Jim_FreeObj(interp, resultObj);
- }
-
- done:
- regfree(regex);
- Jim_Free(regex);
- return result;
-}
-
-int Jim_regexpInit(Jim_Interp *interp)
-{
- if (Jim_PackageProvide(interp, "regexp", "1.0", JIM_ERRMSG) != JIM_OK) {
- return JIM_ERR;
- }
- Jim_CreateCommand(interp, "regexp", Jim_RegexpCmd, NULL, NULL);
- Jim_CreateCommand(interp, "regsub", Jim_RegsubCmd, NULL, NULL);
- return JIM_OK;
-}
+/* + * (c) 2008 Steve Bennett <steveb@workware.net.au> + * + * Implements the regexp and regsub commands for Jim + * + * Uses C library regcomp()/regexec() for the matching. + * + * The FreeBSD license + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE JIM TCL PROJECT ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * JIM TCL PROJECT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * The views and conclusions contained in the software and documentation + * are those of the authors and should not be interpreted as representing + * official policies, either expressed or implied, of the Jim Tcl Project. + * + * Based on code originally from Tcl 6.7: + * + * Copyright 1987-1991 Regents of the University of California + * Permission to use, copy, modify, and distribute this + * software and its documentation for any purpose and without + * fee is hereby granted, provided that the above copyright + * notice appear in all copies. The University of California + * makes no representations about the suitability of this + * software for any purpose. It is provided "as is" without + * express or implied warranty. + */ + +#include <regex.h> +#include <string.h> + +#define JIM_EXTENSION +#include "jim.h" + +/* REVISIT: Would be useful in jim.h */ +static void Jim_SetIntResult(Jim_Interp *interp, jim_wide wide) +{ + Jim_SetResult(interp, Jim_NewIntObj(interp, wide)); +} + +/** + * REVISIT: Should cache a number of compiled regexps for performance reasons. + */ +static regex_t * +compile_regexp(Jim_Interp *interp, const char *pattern, int flags) +{ + int ret; + + regex_t *result = (regex_t *)Jim_Alloc(sizeof(*result)); + + if ((ret = regcomp(result, pattern, REG_EXTENDED | flags)) != 0) { + char buf[100]; + regerror(ret, result, buf, sizeof(buf)); + Jim_SetResult(interp, Jim_NewEmptyStringObj(interp)); + Jim_AppendStrings(interp, Jim_GetResult(interp), "couldn't compile regular expression pattern: ", buf, NULL); + Jim_Free(result); + return NULL; + } + return result; +} + +int Jim_RegexpCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv) +{ + int opt_indices = 0; + int opt_all = 0; + int opt_inline = 0; + regex_t *regex; + int match, i, j; + long offset = 0; + regmatch_t *pmatch = NULL; + int source_len; + int result = JIM_OK; + const char *pattern; + const char *source_str; + int num_matches = 0; + int num_vars; + Jim_Obj *resultListObj = NULL; + int regcomp_flags = 0; + + if (argc < 3) { + wrongNumArgs: + Jim_WrongNumArgs(interp, 1, argv, "?-nocase? ?-line? ?-indices? ?-start offset? ?-all? ?-inline? exp string ?matchVar? ?subMatchVar ...?"); + return JIM_ERR; + } + + for (i = 1; i < argc; i++) { + if (Jim_CompareStringImmediate(interp, argv[i], "-indices")) { + opt_indices = 1; + } + else if (Jim_CompareStringImmediate(interp, argv[i], "-nocase")) { + regcomp_flags |= REG_ICASE; + } + else if (Jim_CompareStringImmediate(interp, argv[i], "-line")) { + regcomp_flags |= REG_NEWLINE; + } + else if (Jim_CompareStringImmediate(interp, argv[i], "-all")) { + opt_all = 1; + } + else if (Jim_CompareStringImmediate(interp, argv[i], "-inline")) { + opt_inline = 1; + } + else if (Jim_CompareStringImmediate(interp, argv[i], "-start")) { + if (++i == argc) { + goto wrongNumArgs; + } + if (Jim_GetLong(interp, argv[i], &offset) != JIM_OK) { + return JIM_ERR; + } + } + else if (Jim_CompareStringImmediate(interp, argv[i], "--")) { + i++; + break; + } + else { + const char *opt = Jim_GetString(argv[i], NULL); + if (*opt == '-') { + /* Bad option */ + goto wrongNumArgs; + } + break; + } + } + if (argc - i < 2) { + goto wrongNumArgs; + } + + pattern = Jim_GetString(argv[i], NULL); + regex = compile_regexp(interp, pattern, regcomp_flags); + if (regex == NULL) { + return JIM_ERR; + } + + source_str = Jim_GetString(argv[i + 1], &source_len); + + num_vars = argc - i - 2; + + if (opt_inline) { + if (num_vars) { + Jim_SetResultString(interp, "regexp match variables not allowed when using -inline", -1); + result = JIM_ERR; + goto done; + } + /* REVISIT: Ugly! */ + num_vars = 100; + } + + pmatch = Jim_Alloc((num_vars + 1) * sizeof(*pmatch)); + + /* If an offset has been specified, adjust for that now. + * If it points past the end of the string, point to the terminating null + */ + if (offset) { + if (offset > source_len) { + source_str += source_len; + } else if (offset > 0) { + source_str += offset; + } + } + + if (opt_inline) { + resultListObj = Jim_NewListObj(interp, NULL, 0); + } + + next_match: + match = regexec(regex, source_str, num_vars + 1, pmatch, 0); + if (match >= REG_BADPAT) { + char buf[100]; + regerror(match, regex, buf, sizeof(buf)); + Jim_SetResultString(interp, "", 0); + Jim_AppendStrings(interp, Jim_GetResult(interp), "error while matching pattern: ", buf, NULL); + result = JIM_ERR; + goto done; + } + + if (match == REG_NOMATCH) { + goto done; + } + + num_matches++; + + if (opt_all && !opt_inline) { + /* Just count the number of matches, so skip the substitution h*/ + goto try_next_match; + } + + /* + * If additional variable names have been specified, return + * index information in those variables. + */ + + //fprintf(stderr, "source_str=%s, [0].rm_eo=%d\n", source_str, pmatch[0].rm_eo); + + j = 0; + for (i += 2; opt_inline ? pmatch[j].rm_so != -1 : i < argc; i++, j++) { + Jim_Obj *resultObj; + + if (opt_indices) { + resultObj = Jim_NewListObj(interp, NULL, 0); + } + else { + resultObj = Jim_NewStringObj(interp, "", 0); + } + + if (pmatch[j].rm_so == -1) { + if (opt_indices) { + Jim_ListAppendElement(interp, resultObj, Jim_NewIntObj(interp, -1)); + Jim_ListAppendElement(interp, resultObj, Jim_NewIntObj(interp, -1)); + } + } else { + int len = pmatch[j].rm_eo - pmatch[j].rm_so; + if (opt_indices) { + Jim_ListAppendElement(interp, resultObj, Jim_NewIntObj(interp, offset + pmatch[j].rm_so)); + Jim_ListAppendElement(interp, resultObj, Jim_NewIntObj(interp, offset + pmatch[j].rm_so + len - 1)); + } else { + Jim_AppendString(interp, resultObj, source_str + pmatch[j].rm_so, len); + } + } + + if (opt_inline) { + Jim_ListAppendElement(interp, resultListObj, resultObj); + } + else { + /* And now set the result variable */ + result = Jim_SetVariable(interp, argv[i], resultObj); + + if (result != JIM_OK) { + Jim_SetResult(interp, Jim_NewEmptyStringObj(interp)); + Jim_AppendStrings(interp, Jim_GetResult(interp), "couldn't set variable \"", Jim_GetString(argv[i], NULL), "\"", NULL); + Jim_FreeObj(interp, resultObj); + break; + } + } + } + + try_next_match: + if (opt_all && pattern[0] != '^' && *source_str) { + if (pmatch[0].rm_eo) { + source_str += pmatch[0].rm_eo; + } + else { + source_str++; + } + if (*source_str) { + goto next_match; + } + } + + done: + if (result == JIM_OK) { + if (opt_inline) { + Jim_SetResult(interp, resultListObj); + } + else { + Jim_SetIntResult(interp, num_matches); + } + } + + Jim_Free(pmatch); + regfree(regex); + Jim_Free(regex); + return result; +} + +#define MAX_SUB_MATCHES 10 + +int Jim_RegsubCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv) +{ + int regcomp_flags = 0; + int opt_all = 0; + long offset = 0; + regex_t *regex; + const char *p; + int result = JIM_ERR; + regmatch_t pmatch[MAX_SUB_MATCHES + 1]; + int num_matches = 0; + + int i; + Jim_Obj *varname; + Jim_Obj *resultObj; + const char *source_str; + int source_len; + const char *replace_str; + const char *pattern; + + if (argc < 5) { + wrongNumArgs: + Jim_WrongNumArgs(interp, 1, argv, "?-nocase? ?-all? exp string subSpec varName"); + return JIM_ERR; + } + + for (i = 1; i < argc; i++) { + if (Jim_CompareStringImmediate(interp, argv[i], "-nocase")) { + regcomp_flags |= REG_ICASE; + } + else if (Jim_CompareStringImmediate(interp, argv[i], "-line")) { + regcomp_flags |= REG_NEWLINE; + } + else if (Jim_CompareStringImmediate(interp, argv[i], "-all")) { + opt_all = 1; + } + else if (Jim_CompareStringImmediate(interp, argv[i], "-start")) { + if (++i == argc) { + goto wrongNumArgs; + } + if (Jim_GetLong(interp, argv[i], &offset) != JIM_OK) { + return JIM_ERR; + } + } + else if (Jim_CompareStringImmediate(interp, argv[i], "--")) { + i++; + break; + } + else { + const char *opt = Jim_GetString(argv[i], NULL); + if (*opt == '-') { + /* Bad option */ + goto wrongNumArgs; + } + break; + } + } + if (argc - i != 4) { + goto wrongNumArgs; + } + + pattern = Jim_GetString(argv[i], NULL); + regex = compile_regexp(interp, pattern, regcomp_flags); + if (regex == NULL) { + return JIM_ERR; + } + + source_str = Jim_GetString(argv[i + 1], &source_len); + replace_str = Jim_GetString(argv[i + 2], NULL); + varname = argv[i + 3]; + + /* Create the result string */ + resultObj = Jim_NewStringObj(interp, "", 0); + + /* If an offset has been specified, adjust for that now. + * If it points past the end of the string, point to the terminating null + */ + if (offset) { + if (offset > source_len) { + offset = source_len; + } else if (offset < 0) { + offset = 0; + } + } + + /* Copy the part before -start */ + Jim_AppendString(interp, resultObj, source_str, offset); + + /* + * The following loop is to handle multiple matches within the + * same source string; each iteration handles one match and its + * corresponding substitution. If "-all" hasn't been specified + * then the loop body only gets executed once. + */ + + for (p = source_str + offset; *p != 0; ) { + const char *src; + int match = regexec(regex, p, MAX_SUB_MATCHES, pmatch, 0); + if (match >= REG_BADPAT) { + char buf[100]; + regerror(match, regex, buf, sizeof(buf)); + Jim_SetResultString(interp, "", 0); + Jim_AppendStrings(interp, Jim_GetResult(interp), "error while matching pattern: ", buf, NULL); + goto done; + } + if (match == REG_NOMATCH) { + break; + } + + num_matches++; + + /* + * Copy the portion of the source string before the match to the + * result variable. + */ + Jim_AppendString(interp, resultObj, p, pmatch[0].rm_so); + + /* + * Append the subSpec (replace_str) argument to the variable, making appropriate + * substitutions. This code is a bit hairy because of the backslash + * conventions and because the code saves up ranges of characters in + * subSpec to reduce the number of calls to Jim_SetVar. + */ + + for (src = replace_str; *src; src++) { + int index; + int c = *src; + + if (c == '&') { + index = 0; + } + else if (c == '\\') { + c = *++src; + if ((c >= '0') && (c <= '9')) { + index = c - '0'; + } + else if ((c == '\\') || (c == '&')) { + Jim_AppendString(interp, resultObj, src, 1); + continue; + } + else { + Jim_AppendString(interp, resultObj, src - 1, 2); + continue; + } + } + else { + Jim_AppendString(interp, resultObj, src, 1); + continue; + } + if ((index < MAX_SUB_MATCHES) && pmatch[index].rm_so != -1 && pmatch[index].rm_eo != -1) { + Jim_AppendString(interp, resultObj, p + pmatch[index].rm_so, pmatch[index].rm_eo - pmatch[index].rm_so); + } + } + + p += pmatch[0].rm_eo; + + if (!opt_all || pmatch[0].rm_eo == 0 || pattern[0] == '^') { + /* If we are doing a single match, or we haven't moved with this match + * or this is an anchored match, we stop */ + break; + } + } + + /* + * Copy the portion of the string after the last match to the + * result variable. + */ + Jim_AppendString(interp, resultObj, p, -1); + + /* And now set the result variable */ + result = Jim_SetVariable(interp, varname, resultObj); + + if (result == JIM_OK) { + Jim_SetIntResult(interp, num_matches); + } + else { + Jim_SetResult(interp, Jim_NewEmptyStringObj(interp)); + Jim_AppendStrings(interp, Jim_GetResult(interp), "couldn't set variable \"", Jim_GetString(varname, NULL), "\"", NULL); + Jim_FreeObj(interp, resultObj); + } + + done: + regfree(regex); + Jim_Free(regex); + return result; +} + +int Jim_regexpInit(Jim_Interp *interp) +{ + if (Jim_PackageProvide(interp, "regexp", "1.0", JIM_ERRMSG) != JIM_OK) { + return JIM_ERR; + } + Jim_CreateCommand(interp, "regexp", Jim_RegexpCmd, NULL, NULL); + Jim_CreateCommand(interp, "regsub", Jim_RegsubCmd, NULL, NULL); + return JIM_OK; +} |