diff options
author | Steve Bennett <steveb@workware.net.au> | 2011-06-25 08:12:40 +1000 |
---|---|---|
committer | Steve Bennett <steveb@workware.net.au> | 2011-06-25 08:12:40 +1000 |
commit | 04215dbf0b2f002bba05c91a1d4690aae0d671b7 (patch) | |
tree | 4d82096d793bee7c1595647476c93f051afb4599 | |
parent | 2d77c4a33c2a8571b2d417eeba5cdb580a910f51 (diff) | |
download | jimtcl-04215dbf0b2f002bba05c91a1d4690aae0d671b7.zip jimtcl-04215dbf0b2f002bba05c91a1d4690aae0d671b7.tar.gz jimtcl-04215dbf0b2f002bba05c91a1d4690aae0d671b7.tar.bz2 |
Update autosetup. Fixes a problem in make-c-ext.tcl
Signed-off-by: Steve Bennett <steveb@workware.net.au>
-rwxr-xr-x | autosetup/autosetup | 2 | ||||
-rwxr-xr-x | autosetup/find-tclsh | 6 | ||||
-rw-r--r-- | autosetup/jimsh0.c | 1180 |
3 files changed, 695 insertions, 493 deletions
diff --git a/autosetup/autosetup b/autosetup/autosetup index 76ca036..68aa6f8 100755 --- a/autosetup/autosetup +++ b/autosetup/autosetup @@ -657,7 +657,7 @@ proc dputs {msg} { proc user-error {msg} { show-notices puts stderr "Error: $msg" - puts stderr "Try: autosetup --help for options" + puts stderr "Try: '[file tail $::autosetup(exe)] --help' for options" exit 1 } diff --git a/autosetup/find-tclsh b/autosetup/find-tclsh index ef056a2..5c01fcc 100755 --- a/autosetup/find-tclsh +++ b/autosetup/find-tclsh @@ -1,11 +1,11 @@ #!/bin/sh # Looks for a suitable tclsh or jimsh in the PATH # If not found, builds a bootstrap jimsh from source -PATH="$PATH:$(pwd)" d=$(dirname "$0") +PATH="$PATH:$d" for tclsh in jimsh tclsh8.5 tclsh8.6 jimsh0; do $tclsh "$d/test-tclsh" 2>/dev/null && exit 0 done echo 1>&2 "No installed jimsh or tclsh, building local bootstrap jimsh0" -${CC_FOR_BUILD:-cc} -o jimsh0 "$d/jimsh0.c" || exit 1 -$(pwd)/jimsh0 "$d/test-tclsh" +${CC_FOR_BUILD:-cc} -o "$d/jimsh0" "$d/jimsh0.c" || exit 1 +"$d/jimsh0" "$d/test-tclsh" diff --git a/autosetup/jimsh0.c b/autosetup/jimsh0.c index 6cc216c..49b0ed8 100644 --- a/autosetup/jimsh0.c +++ b/autosetup/jimsh0.c @@ -19,6 +19,13 @@ #define jim_ext_array #define jim_ext_stdlib #define jim_ext_tclcompat +#ifdef __MINGW32__ +#define MKDIR_ONE_ARG +#define HAVE_SYSTEM +#else +#define HAVE_VFORK +#define HAVE_WAITPID +#endif #ifndef UTF8_UTIL_H #define UTF8_UTIL_H /** @@ -1574,6 +1581,22 @@ int Jim_tclcompatInit(Jim_Interp *interp) " }\n" " tailcall $chan read {*}${-nonewline}\n" " }\n" +"\n" +" proc fconfigure {f args} {\n" +" foreach {n v} $args {\n" +" switch -glob -- $n {\n" +" -bl* {\n" +" $f ndelay $v\n" +" }\n" +" -bu* {\n" +" $f buffering $v\n" +" }\n" +" default {\n" +" return -code error \"fconfigure: unknown option $n\"\n" +" }\n" +" }\n" +" }\n" +" }\n" "}\n" "\n" "\n" @@ -2133,12 +2156,15 @@ static int aio_cmd_read(Jim_Interp *interp, int argc, Jim_Obj *const *argv) break; } /* Check for error conditions */ - if (ferror(af->fp) && errno != EAGAIN) { - /* I/O error */ - Jim_FreeNewObj(interp, objPtr); - JimAioSetError(interp, af->filename); + if (ferror(af->fp)) { clearerr(af->fp); - return JIM_ERR; + /* eof and EAGAIN are not error conditions */ + if (!feof(af->fp) && errno != EAGAIN) { + /* I/O error */ + Jim_FreeNewObj(interp, objPtr); + JimAioSetError(interp, af->filename); + return JIM_ERR; + } } if (nonewline) { int len; @@ -2386,7 +2412,9 @@ static int aio_cmd_accept(Jim_Interp *interp, int argc, Jim_Obj *const *argv) /* Create the file command */ af = Jim_Alloc(sizeof(*af)); af->fd = sock; +#ifdef FD_CLOEXEC fcntl(af->fd, F_SETFD, FD_CLOEXEC); +#endif af->filename = Jim_NewStringObj(interp, "accept", -1); Jim_IncrRefCount(af->filename); af->fp = fdopen(sock, "r+"); @@ -2502,6 +2530,41 @@ static int aio_cmd_ndelay(Jim_Interp *interp, int argc, Jim_Obj *const *argv) } #endif +static int aio_cmd_buffering(Jim_Interp *interp, int argc, Jim_Obj *const *argv) +{ + AioFile *af = Jim_CmdPrivData(interp); + + static const char *options[] = { + "none", + "line", + "full", + NULL + }; + enum + { + OPT_NONE, + OPT_LINE, + OPT_FULL, + }; + int option; + + if (Jim_GetEnum(interp, argv[0], options, &option, NULL, JIM_ERRMSG) != JIM_OK) { + return JIM_ERR; + } + switch (option) { + case OPT_NONE: + setvbuf(af->fp, NULL, _IONBF, 0); + break; + case OPT_LINE: + setvbuf(af->fp, NULL, _IOLBF, BUFSIZ); + break; + case OPT_FULL: + setvbuf(af->fp, NULL, _IOFBF, BUFSIZ); + break; + } + return JIM_OK; +} + #ifdef jim_ext_eventloop static void JimAioFileEventFinalizer(Jim_Interp *interp, void *clientData) { @@ -2661,6 +2724,13 @@ static const jim_subcmd_type aio_command_table[] = { .description = "Set O_NDELAY (if arg). Returns current/new setting." }, #endif + { .cmd = "buffering", + .args = "none|line|full", + .function = aio_cmd_buffering, + .minargs = 1, + .maxargs = 1, + .description = "Sets buffering" + }, #ifdef jim_ext_eventloop { .cmd = "readable", .args = "?readable-script?", @@ -2748,9 +2818,11 @@ static int JimAioOpenCommand(Jim_Interp *interp, int argc, af = Jim_Alloc(sizeof(*af)); af->fp = fp; af->fd = fileno(fp); +#ifdef FD_CLOEXEC if ((OpenFlags & AIO_KEEPOPEN) == 0) { fcntl(af->fd, F_SETFD, FD_CLOEXEC); } +#endif #ifdef O_NDELAY af->flags = fcntl(af->fd, F_GETFL); #endif @@ -4665,12 +4737,22 @@ int Jim_fileInit(Jim_Interp *interp) */ #include <string.h> -#include <unistd.h> #include <signal.h> + + +#if defined(HAVE_VFORK) && defined(HAVE_WAITPID) + + +#include <unistd.h> #include <fcntl.h> #include <errno.h> #include <sys/wait.h> +#if defined(__GNUC__) && !defined(__clang__) +#define IGNORE_RC(EXPR) ((EXPR) < 0 ? -1 : 0) +#else +#define IGNORE_RC(EXPR) EXPR +#endif /* These two could be moved into the Tcl core */ static void Jim_SetResultErrno(Jim_Interp *interp, const char *msg) @@ -5536,7 +5618,7 @@ badargs: execvp(execName, &arg_array[firstArg]); /* we really can ignore the error here! */ - write(2, execerr, execerrlen) < 0 ? -1 : 0; + IGNORE_RC(write(2, execerr, execerrlen)); _exit(127); } @@ -5688,6 +5770,66 @@ int Jim_execInit(Jim_Interp *interp) Jim_CreateCommand(interp, "exec", Jim_ExecCmd, JimAllocWaitInfoTable(), JimFreeWaitInfoTable); return JIM_OK; } +#else +/* e.g. Windows. Poor mans implementation of exec with system() + * The system() call *may* do command line redirection, etc. + * The standard output is not available. + * Can't redirect filehandles. + */ +static int Jim_ExecCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv) +{ + Jim_Obj *cmdlineObj = Jim_NewEmptyStringObj(interp); + int i, j; + int rc; + + /* Create a quoted command line */ + for (i = 1; i < argc; i++) { + int len; + const char *arg = Jim_GetString(argv[i], &len); + + if (i > 1) { + Jim_AppendString(interp, cmdlineObj, " ", 1); + } + if (strpbrk(arg, "\\\" ") == NULL) { + /* No quoting required */ + Jim_AppendString(interp, cmdlineObj, arg, len); + continue; + } + + Jim_AppendString(interp, cmdlineObj, "\"", 1); + for (j = 0; j < len; j++) { + if (arg[j] == '\\' || arg[j] == '"') { + Jim_AppendString(interp, cmdlineObj, "\\", 1); + } + Jim_AppendString(interp, cmdlineObj, &arg[j], 1); + } + Jim_AppendString(interp, cmdlineObj, "\"", 1); + } + rc = system(Jim_String(cmdlineObj)); + + Jim_FreeNewObj(interp, cmdlineObj); + + if (rc) { + Jim_Obj *errorCode = Jim_NewListObj(interp, NULL, 0); + Jim_ListAppendElement(interp, errorCode, Jim_NewStringObj(interp, "CHILDSTATUS", -1)); + Jim_ListAppendElement(interp, errorCode, Jim_NewIntObj(interp, 0)); + Jim_ListAppendElement(interp, errorCode, Jim_NewIntObj(interp, rc)); + Jim_SetGlobalVariableStr(interp, "errorCode", errorCode); + return JIM_ERR; + } + + return JIM_OK; +} + +int Jim_execInit(Jim_Interp *interp) +{ + if (Jim_PackageProvide(interp, "exec", "1.0", JIM_ERRMSG)) + return JIM_ERR; + + Jim_CreateCommand(interp, "exec", Jim_ExecCmd, NULL, NULL); + return JIM_OK; +} +#endif /* * tcl_clock.c @@ -5713,7 +5855,6 @@ static int clock_cmd_format(Jim_Interp *interp, int argc, Jim_Obj *const *argv) char buf[100]; time_t t; long seconds; - struct tm tm; const char *format = "%a %b %d %H:%M:%S %Z %Y"; @@ -5730,7 +5871,7 @@ static int clock_cmd_format(Jim_Interp *interp, int argc, Jim_Obj *const *argv) } t = seconds; - strftime(buf, sizeof(buf), format, localtime_r(&t, &tm)); + strftime(buf, sizeof(buf), format, localtime(&t)); Jim_SetResultString(interp, buf, -1); @@ -6277,6 +6418,8 @@ static const Jim_HashTableType JimVariablesHashTableType; /* Fast access to the int (wide) value of an object which is known to be of int type */ #define JimWideValue(objPtr) (objPtr)->internalRep.wideValue +#define JimObjTypeName(O) (objPtr->typePtr ? objPtr->typePtr->name : "none") + static int utf8_tounicode_case(const char *s, int *uc, int upper) { int l = utf8_tounicode(s, uc); @@ -7273,20 +7416,18 @@ struct JimParserCtx char missing; /* At end of parse, ' ' if complete, '{' if braces incomplete, '"' if quotes incomplete */ }; -#define JimParserEof(c) ((c)->eof) -#define JimParserTstart(c) ((c)->tstart) -#define JimParserTend(c) ((c)->tend) -#define JimParserTtype(c) ((c)->tt) -#define JimParserTline(c) ((c)->tline) - static int JimParseScript(struct JimParserCtx *pc); static int JimParseSep(struct JimParserCtx *pc); static int JimParseEol(struct JimParserCtx *pc); static int JimParseCmd(struct JimParserCtx *pc); +static int JimParseQuote(struct JimParserCtx *pc); static int JimParseVar(struct JimParserCtx *pc); static int JimParseBrace(struct JimParserCtx *pc); static int JimParseStr(struct JimParserCtx *pc); static int JimParseComment(struct JimParserCtx *pc); +static void JimParseSubCmd(struct JimParserCtx *pc); +static int JimParseSubQuote(struct JimParserCtx *pc); +static void JimParseSubCmd(struct JimParserCtx *pc); static Jim_Obj *JimParserGetTokenObj(Jim_Interp *interp, struct JimParserCtx *pc); /* Initialize a parser context. @@ -7414,59 +7555,217 @@ static int JimParseEol(struct JimParserCtx *pc) return JIM_OK; } -static int JimParseCmd(struct JimParserCtx *pc) +/* +** Here are the rules for parsing: +** {braced expression} +** - Count open and closing braces +** - Backslash escapes meaning of braces +** +** "quoted expression" +** - First double quote at start of word terminates the expression +** - Backslash escapes quote and bracket +** - [commands brackets] are counted/nested +** - command rules apply within [brackets], not quoting rules (i.e. quotes have their own rules) +** +** [command expression] +** - Count open and closing brackets +** - Backslash escapes quote, bracket and brace +** - [commands brackets] are counted/nested +** - "quoted expressions" are parsed according to quoting rules +** - {braced expressions} are parsed according to brace rules +** +** For everything, backslash escapes the next char, newline increments current line +*/ + +/** + * Parses a braced expression starting at pc->p. + * + * Positions the parser at the end of the braced expression, + * sets pc->tend and possibly pc->missing. + */ +static void JimParseSubBrace(struct JimParserCtx *pc) { int level = 1; - int quoted = 0; - pc->tstart = ++pc->p; + /* Skip the brace */ + pc->p++; pc->len--; - pc->tline = pc->linenr; while (pc->len) { - if (*pc->p == '\\' && pc->len > 1) { - if (pc->p[1] == '\n') - pc->linenr++; + switch (*pc->p) { + case '\\': + if (pc->len > 1) { + if (*++pc->p == '\n') { + pc->linenr++; + } + pc->len--; + } + break; - pc->p += 2; - pc->len -= 2; - continue; - } - else if (*pc->p == '"') { - quoted = !quoted; - } - else if (!quoted) { - if (*pc->p == '[') { + case '{': level++; - } - else if (*pc->p == ']') { - level--; - if (!level) - break; - } - else if (*pc->p == '{') { - /* Save and restore tstart and tline across JimParseBrace() */ - const char * tstart = pc->tstart; - int tline = pc->tline; + break; - JimParseBrace(pc); + case '}': + if (--level == 0) { + pc->tend = pc->p - 1; + pc->p++; + pc->len--; + return; + } + break; - pc->tstart = tstart; - pc->tline = tline; - continue; - } + case '\n': + pc->linenr++; + break; } - if (*pc->p == '\n') { - pc->linenr++; + pc->p++; + pc->len--; + } + pc->missing = '{'; + pc->tend = pc->p - 1; +} + +/** + * Parses a quoted expression starting at pc->p. + * + * Positions the parser at the end of the quoted expression, + * sets pc->tend and possibly pc->missing. + * + * Returns the type of the token of the string, + * either JIM_TT_ESC (if it contains values which need to be [subst]ed) + * or JIM_TT_STR. + */ +static int JimParseSubQuote(struct JimParserCtx *pc) +{ + int tt = JIM_TT_STR; + + /* Skip the quote */ + pc->p++; + pc->len--; + while (pc->len) { + switch (*pc->p) { + case '\\': + if (pc->len > 1) { + if (*++pc->p == '\n') { + pc->linenr++; + } + pc->len--; + tt = JIM_TT_ESC; + } + break; + + case '"': + pc->tend = pc->p - 1; + pc->p++; + pc->len--; + return tt; + + case '[': + JimParseSubCmd(pc); + tt = JIM_TT_ESC; + continue; + + case '\n': + pc->linenr++; + break; + + case '$': + tt = JIM_TT_ESC; + break; } pc->p++; pc->len--; } + pc->missing = '"'; pc->tend = pc->p - 1; - pc->tt = JIM_TT_CMD; - if (*pc->p == ']') { + return tt; +} + +/** + * Parses a [command] expression starting at pc->p. + * + * Positions the parser at the end of the command expression, + * sets pc->tend and possibly pc->missing. + */ +static void JimParseSubCmd(struct JimParserCtx *pc) +{ + int level = 1; + int startofword = 1; + + /* Skip the bracket */ + pc->p++; + pc->len--; + while (pc->len) { + switch (*pc->p) { + case '\\': + if (pc->len > 1) { + if (*++pc->p == '\n') { + pc->linenr++; + } + pc->len--; + } + break; + + case '[': + level++; + break; + + case ']': + if (--level == 0) { + pc->tend = pc->p - 1; + pc->p++; + pc->len--; + return; + } + break; + + case '"': + if (startofword) { + JimParseSubQuote(pc); + continue; + } + break; + + case '{': + JimParseSubBrace(pc); + startofword = 0; + continue; + + case '\n': + pc->linenr++; + break; + } + startofword = isspace(UCHAR(*pc->p)); pc->p++; pc->len--; } + pc->missing = '['; + pc->tend = pc->p - 1; +} + +static int JimParseBrace(struct JimParserCtx *pc) +{ + pc->tstart = pc->p + 1; + pc->tline = pc->linenr; + pc->tt = JIM_TT_STR; + JimParseSubBrace(pc); + return JIM_OK; +} + +static int JimParseCmd(struct JimParserCtx *pc) +{ + pc->tstart = pc->p + 1; + pc->tline = pc->linenr; + pc->tt = JIM_TT_CMD; + JimParseSubCmd(pc); + return JIM_OK; +} + +static int JimParseQuote(struct JimParserCtx *pc) +{ + pc->tstart = pc->p + 1; + pc->tline = pc->linenr; + pc->tt = JimParseSubQuote(pc); return JIM_OK; } @@ -7561,48 +7860,6 @@ static int JimParseVar(struct JimParserCtx *pc) return JIM_OK; } -static int JimParseBrace(struct JimParserCtx *pc) -{ - int level = 1; - - pc->tstart = ++pc->p; - pc->len--; - pc->tline = pc->linenr; - while (1) { - if (*pc->p == '\\' && pc->len >= 2) { - pc->p++; - pc->len--; - if (*pc->p == '\n') - pc->linenr++; - } - else if (*pc->p == '{') { - level++; - } - else if (pc->len == 0 || *pc->p == '}') { - if (pc->len == 0) { - pc->missing = '{'; - /*printf("Missing brace at line %d, opened on line %d\n", pc->linenr, pc->tline);*/ - } - level--; - if (pc->len == 0 || level == 0) { - pc->tend = pc->p - 1; - if (pc->len != 0) { - pc->p++; - pc->len--; - } - pc->tt = JIM_TT_STR; - return JIM_OK; - } - } - else if (*pc->p == '\n') { - pc->linenr++; - } - pc->p++; - pc->len--; - } - return JIM_OK; /* unreached */ -} - static int JimParseStr(struct JimParserCtx *pc) { int newword = (pc->tt == JIM_TT_SEP || pc->tt == JIM_TT_EOL || @@ -7899,8 +8156,8 @@ static Jim_Obj *JimParserGetTokenObj(Jim_Interp *interp, struct JimParserCtx *pc char *token; int len; - start = JimParserTstart(pc); - end = JimParserTend(pc); + start = pc->tstart; + end = pc->tend; if (start > end) { len = 0; token = Jim_Alloc(1); @@ -7909,7 +8166,7 @@ static Jim_Obj *JimParserGetTokenObj(Jim_Interp *interp, struct JimParserCtx *pc else { len = (end - start) + 1; token = Jim_Alloc(len + 1); - if (JimParserTtype(pc) != JIM_TT_ESC) { + if (pc->tt != JIM_TT_ESC) { /* No escape conversion needed? Just copy it. */ memcpy(token, start, len); token[len] = '\0'; @@ -7939,7 +8196,7 @@ int Jim_ScriptIsComplete(const char *s, int len, char *stateCharPtr) struct JimParserCtx parser; JimParserInit(&parser, s, len, 1); - while (!JimParserEof(&parser)) { + while (!parser.eof) { JimParseScript(&parser); } if (stateCharPtr) { @@ -7957,13 +8214,6 @@ static int JimParseListQuote(struct JimParserCtx *pc); static int JimParseList(struct JimParserCtx *pc) { - if (pc->len == 0) { - pc->tstart = pc->tend = pc->p; - pc->tline = pc->linenr; - pc->tt = JIM_TT_EOL; - pc->eof = 1; - return JIM_OK; - } switch (*pc->p) { case ' ': case '\n': @@ -7978,8 +8228,16 @@ static int JimParseList(struct JimParserCtx *pc) return JimParseBrace(pc); default: - return JimParseListStr(pc); + if (pc->len) { + return JimParseListStr(pc); + } + break; } + + pc->tstart = pc->tend = pc->p; + pc->tline = pc->linenr; + pc->tt = JIM_TT_EOL; + pc->eof = 1; return JIM_OK; } @@ -8010,10 +8268,6 @@ static int JimParseListQuote(struct JimParserCtx *pc) while (pc->len) { switch (*pc->p) { - case '$': - case '[': - pc->tt = JIM_TT_ESC; - break; case '\\': pc->tt = JIM_TT_ESC; if (--pc->len == 0) { @@ -8048,17 +8302,13 @@ static int JimParseListStr(struct JimParserCtx *pc) while (pc->len) { switch (*pc->p) { - case '$': - case '[': - pc->tt = JIM_TT_ESC; - break; case '\\': - pc->tt = JIM_TT_ESC; if (--pc->len == 0) { /* Trailing backslash */ pc->tend = pc->p; return JIM_OK; } + pc->tt = JIM_TT_ESC; pc->p++; break; case ' ': @@ -8187,17 +8437,17 @@ Jim_Obj *Jim_DuplicateObj(Jim_Interp *interp, Jim_Obj *objPtr) else { Jim_InitStringRep(dupPtr, objPtr->bytes, objPtr->length); } + + /* By default, the new object has the same type as the old object */ + dupPtr->typePtr = objPtr->typePtr; if (objPtr->typePtr != NULL) { if (objPtr->typePtr->dupIntRepProc == NULL) { dupPtr->internalRep = objPtr->internalRep; } else { + /* The dup proc may set a different type, e.g. NULL */ objPtr->typePtr->dupIntRepProc(interp, objPtr, dupPtr); } - dupPtr->typePtr = objPtr->typePtr; - } - else { - dupPtr->typePtr = NULL; } return dupPtr; } @@ -8964,9 +9214,14 @@ static Jim_Obj *JimNewScriptLineObj(Jim_Interp *interp, int argc, int line) { Jim_Obj *objPtr; - objPtr = Jim_NewObj(interp); +#ifdef DEBUG_SHOW_SCRIPT + char buf[100]; + snprintf(buf, sizeof(buf), "line=%d, argc=%d", line, argc); + objPtr = Jim_NewStringObj(interp, buf, -1); +#else + objPtr = Jim_NewEmptyStringObj(interp); +#endif objPtr->typePtr = &scriptLineObjType; - objPtr->bytes = JimEmptyStringRep; objPtr->internalRep.scriptLineValue.argc = argc; objPtr->internalRep.scriptLineValue.line = line; @@ -9307,8 +9562,11 @@ static void ScriptObjAddTokens(Jim_Interp *interp, struct ScriptObj *script, } } + if (lineargs == 0) { + /* First real token on the line, so record the line number */ + linenr = tokenlist->list[i].line; + } lineargs++; - linenr = tokenlist->list[i].line; /* Add each non-separator word token to the line */ while (wordtokens--) { @@ -9335,7 +9593,7 @@ static void ScriptObjAddTokens(Jim_Interp *interp, struct ScriptObj *script, assert(script->len < count); #ifdef DEBUG_SHOW_SCRIPT - printf("==== Script ====\n"); + printf("==== Script (%s) ====\n", script->fileName); for (i = 0; i < script->len; i++) { const ScriptToken *t = &script->token[i]; printf("[%2d] %s %s\n", i, jim_tt_name(t->type), Jim_String(t->objPtr)); @@ -9393,7 +9651,7 @@ int SetScriptFromAny(Jim_Interp *interp, struct Jim_Obj *objPtr) ScriptTokenListInit(&tokenlist); JimParserInit(&parser, scriptText, scriptTextLen, script->line); - while (!JimParserEof(&parser)) { + while (!parser.eof) { JimParseScript(&parser); ScriptAddToken(&tokenlist, parser.tstart, parser.tend - parser.tstart + 1, parser.tt, parser.tline); @@ -11803,14 +12061,14 @@ int SetListFromAny(Jim_Interp *interp, struct Jim_Obj *objPtr) /* Convert into a list */ JimParserInit(&parser, str, strLen, linenr); - while (!JimParserEof(&parser)) { + while (!parser.eof) { Jim_Obj *elementPtr; JimParseList(&parser); - if (JimParserTtype(&parser) != JIM_TT_STR && JimParserTtype(&parser) != JIM_TT_ESC) + if (parser.tt != JIM_TT_STR && parser.tt != JIM_TT_ESC) continue; elementPtr = JimParserGetTokenObj(interp, &parser); - JimSetSourceInfo(interp, elementPtr, filename, JimParserTline(&parser)); + JimSetSourceInfo(interp, elementPtr, filename, parser.tline); ListAppendElement(objPtr, elementPtr); } if (filename) { @@ -13234,19 +13492,20 @@ static int JimExprOpIntBin(Jim_Interp *interp, struct JimExprState *e) } } break; - case JIM_EXPROP_ROTL:{ + case JIM_EXPROP_ROTL: + case JIM_EXPROP_ROTR:{ /* uint32_t would be better. But not everyone has inttypes.h? */ unsigned long uA = (unsigned long)wA; + unsigned long uB = (unsigned long)wB; const unsigned int S = sizeof(unsigned long) * 8; - wC = (unsigned long)((uA << wB) | (uA >> (S - wB))); - break; - } - case JIM_EXPROP_ROTR:{ - unsigned long uA = (unsigned long)wA; - const unsigned int S = sizeof(unsigned long) * 8; + /* Shift left by the word size or more is undefined. */ + uB %= S; - wC = (unsigned long)((uA >> wB) | (uA << (S - wB))); + if (e->opcode == JIM_EXPROP_ROTR) { + uB = S - uB; + } + wC = (unsigned long)(uA << uB) | (uA >> (S - uB)); break; } default: @@ -13777,7 +14036,6 @@ static int JimParseExpression(struct JimParserCtx *pc) break; case '[': return JimParseCmd(pc); - break; case '$': if (JimParseVar(pc) == JIM_ERR) return JimParseExprOperator(pc); @@ -13801,13 +14059,11 @@ static int JimParseExpression(struct JimParserCtx *pc) case '9': case '.': return JimParseExprNumber(pc); - break; case '"': + return JimParseQuote(pc); case '{': - /* Here it's possible to reuse the List String parsing. */ - pc->tt = JIM_TT_NONE; /* Make sure it's sensed as a new word. */ - return JimParseList(pc); - break; + return JimParseBrace(pc); + case 'N': case 'I': case 'n': @@ -14485,7 +14741,7 @@ int SetExprFromAny(Jim_Interp *interp, struct Jim_Obj *objPtr) ScriptTokenListInit(&tokenlist); JimParserInit(&parser, exprText, exprTextLen, 0); - while (!JimParserEof(&parser)) { + while (!parser.eof) { if (JimParseExpression(&parser) != JIM_OK) { ScriptTokenListFree(&tokenlist); invalidexpr: @@ -15360,20 +15616,24 @@ Jim_Obj *Jim_ScanString(Jim_Interp *interp, Jim_Obj *strObjPtr, Jim_Obj *fmtObjP /* ----------------------------------------------------------------------------- * Pseudo Random Number Generation * ---------------------------------------------------------------------------*/ -static void JimPrngSeed(Jim_Interp *interp, const unsigned char *seed, int seedLen); +static void JimPrngSeed(Jim_Interp *interp, unsigned char *seed, int seedLen); /* Initialize the sbox with the numbers from 0 to 255 */ static void JimPrngInit(Jim_Interp *interp) { +#define PRNG_SEED_SIZE 256 int i; - /* XXX: Move off stack */ - unsigned int seed[256]; - unsigned rseed; /* uninitialized! */ + unsigned int *seed; + time_t t = time(NULL); interp->prngState = Jim_Alloc(sizeof(Jim_PrngState)); - for (i = 0; i < 256; i++) - seed[i] = (rand_r(&rseed) ^ time(NULL) ^ clock()); - JimPrngSeed(interp, (unsigned char *)seed, sizeof(int) * 256); + + seed = Jim_Alloc(PRNG_SEED_SIZE * sizeof(*seed)); + for (i = 0; i < PRNG_SEED_SIZE; i++) { + seed[i] = (rand() ^ t ^ clock()); + } + JimPrngSeed(interp, (unsigned char *)seed, PRNG_SEED_SIZE * sizeof(*seed)); + Jim_Free(seed); } /* Generates N bytes of random data */ @@ -15400,11 +15660,9 @@ static void JimRandomBytes(Jim_Interp *interp, void *dest, unsigned int len) } /* Re-seed the generator with user-provided bytes */ -static void JimPrngSeed(Jim_Interp *interp, const unsigned char *seed, int seedLen) +static void JimPrngSeed(Jim_Interp *interp, unsigned char *seed, int seedLen) { int i; - /* XXX: Move off stack */ - unsigned char buf[256]; Jim_PrngState *prng; /* initialization, only needed the first time */ @@ -15424,8 +15682,13 @@ static void JimPrngSeed(Jim_Interp *interp, const unsigned char *seed, int seedL prng->sbox[seed[i]] = t; } prng->i = prng->j = 0; - /* discard the first 256 bytes of stream. */ - JimRandomBytes(interp, buf, 256); + + /* discard at least the first 256 bytes of stream. + * borrow the seed buffer for this + */ + for (i = 0; i < 256; i += seedLen) { + JimRandomBytes(interp, seed, seedLen); + } } /* [incr] */ @@ -16520,7 +16783,7 @@ static int SetSubstFromAny(Jim_Interp *interp, struct Jim_Obj *objPtr, int flags JimParserInit(&parser, scriptText, scriptTextLen, 1); while (1) { JimParseSubst(&parser, flags); - if (JimParserEof(&parser)) { + if (parser.eof) { /* Note that subst doesn't need the EOL token */ break; } @@ -19640,15 +19903,19 @@ static int Jim_InfoCoreCommand(Jim_Interp *interp, int argc, Jim_Obj *const *arg } case INFO_COMPLETE: - if (argc != 3) { - Jim_WrongNumArgs(interp, 2, argv, "script"); + if (argc != 3 && argc != 4) { + Jim_WrongNumArgs(interp, 2, argv, "script ?missing?"); return JIM_ERR; } else { int len; const char *s = Jim_GetString(argv[2], &len); + char missing = '\0'; - Jim_SetResultBool(interp, Jim_ScriptIsComplete(s, len, NULL)); + Jim_SetResultBool(interp, Jim_ScriptIsComplete(s, len, &missing)); + if (missing && argc == 4) { + Jim_SetVariable(interp, argv[3], Jim_NewStringObj(interp, &missing, 1)); + } } break; @@ -20060,17 +20327,17 @@ static int Jim_EnvCoreCommand(Jim_Interp *interp, int argc, Jim_Obj *const *argv const char *val; if (argc == 1) { - char **environ = Jim_GetEnviron(); + char **e = Jim_GetEnviron(); int i; Jim_Obj *listObjPtr = Jim_NewListObj(interp, NULL, 0); - for (i = 0; environ[i]; i++) { - const char *equals = strchr(environ[i], '='); + for (i = 0; e[i]; i++) { + const char *equals = strchr(e[i], '='); if (equals) { - Jim_ListAppendElement(interp, listObjPtr, Jim_NewStringObj(interp, environ[i], - equals - environ[i])); + Jim_ListAppendElement(interp, listObjPtr, Jim_NewStringObj(interp, e[i], + equals - e[i])); Jim_ListAppendElement(interp, listObjPtr, Jim_NewStringObj(interp, equals + 1, -1)); } } @@ -21635,12 +21902,13 @@ Jim_Obj *Jim_FormatString(Jim_Interp *interp, Jim_Obj *fmtObjPtr, int objc, Jim_ #define BACK 7 /* no Match "", "next" ptr points backward. */ #define EXACTLY 8 /* str Match this string. */ #define NOTHING 9 /* no Match empty string. */ -#define STAR 10 /* node Match this (simple) thing 0 or more times. */ -#define STARMIN 11 /* node Match this (simple) thing 0 or more times, mininal match. */ -#define PLUS 12 /* node Match this (simple) thing 1 or more times. */ -#define PLUSMIN 13 /* node Match this (simple) thing 1 or more times, mininal match. */ -#define WORDA 14 /* no Match "" at wordchar, where prev is nonword */ -#define WORDZ 15 /* no Match "" at nonwordchar, where prev is word */ +#define REP 10 /* max,min Match this (simple) thing [min,max] times. */ +#define REPMIN 11 /* max,min Match this (simple) thing [min,max] times, mininal match. */ +#define REPX 12 /* max,min Match this (complex) thing [min,max] times. */ +#define REPXMIN 13 /* max,min Match this (complex) thing [min,max] times, minimal match. */ + +#define WORDA 15 /* no Match "" at wordchar, where prev is nonword */ +#define WORDZ 16 /* no Match "" at nonwordchar, where prev is word */ #define OPEN 20 /* no Mark this point in input as start of #n. */ /* OPEN+1 is number 1, etc. */ #define CLOSE (OPEN+REG_MAX_PAREN) /* no Analogous to OPEN. */ @@ -21709,6 +21977,8 @@ Jim_Obj *Jim_FormatString(Jim_Interp *interp, Jim_Obj *fmtObjPtr, int objc, Jim_ #define SPSTART 04 /* Starts with * or +. */ #define WORST 0 /* Worst case. */ +#define MAX_REP_COUNT 1000000 + /* * Forward declarations for regcomp()'s friends. */ @@ -21719,11 +21989,11 @@ static int *regatom(regex_t *preg, int *flagp ); static int *regnode(regex_t *preg, int op ); static const int *regnext(regex_t *preg, const int *p ); static void regc(regex_t *preg, int b ); -static int *reginsert(regex_t *preg, int op, int *opnd ); +static int *reginsert(regex_t *preg, int op, int size, int *opnd ); static void regtail(regex_t *preg, int *p, const int *val ); static void regoptail(regex_t *preg, int *p, const int *val ); -static int reg_range_find(const int *string, int c, int nocase); +static int reg_range_find(const int *string, int c); static const char *str_find(const char *string, int c, int nocase); static int prefix_cmp(const int *prog, int proglen, const char *string, int nocase); @@ -21771,6 +22041,9 @@ int regcomp(regex_t *preg, const char *exp, int cflags) unsigned len; int flags; +#ifdef DEBUG + fprintf(stderr, "Compiling: '%s'\n", exp); +#endif memset(preg, 0, sizeof(*preg)); if (exp == NULL) @@ -21814,8 +22087,9 @@ int regcomp(regex_t *preg, const char *exp, int cflags) scan = OPERAND(scan); /* Starting-point info. */ - if (OP(scan) == EXACTLY) + if (OP(scan) == EXACTLY) { preg->regstart = *OPERAND(scan); + } else if (OP(scan) == BOL) preg->reganch++; @@ -21960,31 +22234,6 @@ static int *regbranch(regex_t *preg, int *flagp ) return(ret); } -/** - * Duplicates the program at 'pos' of length 'len' at the end of the program. - * - * If 'maketail' is set, the next point for 'pos' is set to skip to the next - * part of the program after 'pos'. - */ -static int *regdup(regex_t *preg, int *pos, int len, int maketail) -{ - int i; - - preg->regsize += len; - - if (preg->regcode == ®dummy) { - return pos; - } - - for (i = 0; i < len; i++) { - regc(preg, pos[i]); - } - if (maketail) { - regtail(preg, pos, pos + len); - } - return preg->regcode - len; -} - /* - regpiece - something followed by possible [*+?] * @@ -22002,6 +22251,8 @@ static int *regpiece(regex_t *preg, int *flagp) int flags; int size = preg->regsize; int *chain = NULL; + int min; + int max; ret = regatom(preg, &flags); if (ret == NULL) @@ -22019,23 +22270,15 @@ static int *regpiece(regex_t *preg, int *flagp) preg->err = REG_ERR_OPERAND_COULD_BE_EMPTY; return NULL; } - *flagp = (op != '+') ? (WORST|SPSTART) : (WORST|HASWIDTH); /* Handle braces (counted repetition) by expansion */ if (op == '{') { - int min = 0; - int max = 0; char *end; min = strtoul(preg->regparse + 1, &end, 10); if (end == preg->regparse + 1) { - if (*end == ',') { - min = 0; - } - else { - preg->err = REG_ERR_BAD_COUNT; - return NULL; - } + preg->err = REG_ERR_BAD_COUNT; + return NULL; } if (*end == '}') { max = min; @@ -22049,7 +22292,7 @@ static int *regpiece(regex_t *preg, int *flagp) } } if (end == preg->regparse + 1) { - max = -1; + max = MAX_REP_COUNT; } else if (max < min || max >= 100) { preg->err = REG_ERR_BAD_COUNT; @@ -22061,169 +22304,31 @@ static int *regpiece(regex_t *preg, int *flagp) } preg->regparse = strchr(preg->regparse, '}'); - - /* By default, chain to the start of the sequence */ - chain = ret; - - if (max < 0 || max == min) { - /* Simple case */ - if (max == min) { - if (min == 0) { - /* {0,0} so do nothing at all */ - reginsert(preg, NOTHING, ret); - preg->regparse++; - return ret; - } - /* Output 'min - 1' instances of 'x' */ - min--; - op = 0; - } - else { - /* {n,} is just xxxx* */ - op = '*'; - /* No - chain to the tail of the sequence */ - chain = NULL; - } - - /* We need to duplicate the arg 'min' times */ - while (min--) { - ret = regdup(preg, ret, size, 1); - } - } - else { - /* Complex case */ - int i; - - /* Chaining is needed */ - - /* Need to emit some min args first */ - for (i = 0; i < min; i++) { - ret = regdup(preg, ret, size, 1); - } - - for (i = min; i < max; i++) { - /* Emit x */ - /* There is already one instance of 'reg' at the end */ - /* Add another 'reg' at the end */ - int *prog; - - /* Convert to (x|), just like ? */ - prog = reginsert(preg, BRANCH, ret); /* Either x */ - regtail(preg, ret, regnode(preg, BRANCH)); /* or */ - next = regnode(preg, NOTHING); /* null. */ - regtail(preg, ret, next); - regoptail(preg, ret, next); - - /* Now grab a copy ready for the next iteration */ - if (i != max - 1) { - ret = regdup(preg, prog, size, 0); - } - } - op = 0; - } } - - if (op == '*' && (flags&SIMPLE)) { - if (preg->regparse[1] == '?') { - preg->regparse++; - reginsert(preg, STARMIN, ret); - } - else { - reginsert(preg, STAR, ret); - } + else { + min = (op == '+'); + max = (op == '?' ? 1 : MAX_REP_COUNT); } - else if (op == '*') { - if (preg->regparse[1] == '?') { - int *last; - int *branch; - preg->regparse++; + if (preg->regparse[1] == '?') { + preg->regparse++; + next = reginsert(preg, flags & SIMPLE ? REPMIN : REPXMIN, 5, ret); + } + else { + next = reginsert(preg, flags & SIMPLE ? REP: REPX, 5, ret); + } + ret[2] = max; + ret[3] = min; + ret[4] = 0; - /* Emit x*? as (|x&), where & means "self". */ - /* x points to BRANCH */ + *flagp = (min) ? (WORST|HASWIDTH) : (WORST|SPSTART); - /* Note that we need to insert BRANCH NOTHING BRANCH in front. - * Carefully keep track of where everything is inserted. - */ - chain = ret; - next = ret = reginsert(preg, BRANCH, ret); - branch = ret = reginsert(preg, NOTHING, ret); - ret = reginsert(preg, BRANCH, ret); - regtail(preg, chain, branch); - regtail(preg, ret, regnode(preg, BACK)); - regtail(preg, ret, chain); - last = regnode(preg, NOTHING); - regtail(preg, chain, last); - regtail(preg, next, last); - } - else { - /* Emit x* as (x&|), where & means "self". */ - reginsert(preg, BRANCH, ret); /* Either x */ - regoptail(preg, ret, regnode(preg, BACK)); /* and loop */ - regoptail(preg, ret, ret); /* back */ - regtail(preg, ret, regnode(preg, BRANCH)); /* or */ - regtail(preg, ret, regnode(preg, NOTHING)); /* null. */ - } - } else if (op == '+' && (flags&SIMPLE)) { - if (preg->regparse[1] == '?') { - preg->regparse++; - reginsert(preg, PLUSMIN, ret); - } - else { - reginsert(preg, PLUS, ret); - } - } - else if (op == '+') { - if (preg->regparse[1] == '?') { - int *last; - preg->regparse++; - - /* Emit x+? as x(|&), where & means "self". */ - /* x points to BRANCH */ - regtail(preg, ret, regnode(preg, BRANCH)); - next = regnode(preg, NOTHING); - regtail(preg, ret, regnode(preg, BRANCH)); - regtail(preg, regnode(preg, BACK), ret); - /* Dummy node that both paths can point to */ - last = regnode(preg, NOTHING); - regtail(preg, next, last); - regtail(preg, ret, last); - } - else { - /* Emit x+ as x(&|), where & means "self". */ - next = regnode(preg, BRANCH); /* Either */ - regtail(preg, ret, next); - regtail(preg, regnode(preg, BACK), ret); /* loop back */ - regtail(preg, next, regnode(preg, BRANCH)); /* or */ - regtail(preg, ret, regnode(preg, NOTHING)); /* null. */ - } - } else if (op == '?') { - if (preg->regparse[1] == '?') { - /* Emit x?? as (|x) */ - int *last; - int *branch; - - preg->regparse++; - - chain = ret; - next = ret = reginsert(preg, BRANCH, ret); - branch = ret = reginsert(preg, NOTHING, ret); - ret = reginsert(preg, BRANCH, ret); - regtail(preg, chain, branch); - regtail(preg, ret, chain); - last = regnode(preg, NOTHING); - regtail(preg, chain, last); - regtail(preg, next, last); - } - else { - /* Emit x? as (x|) */ - reginsert(preg, BRANCH, ret); /* Either x */ - regtail(preg, ret, regnode(preg, BRANCH)); /* or */ - next = regnode(preg, NOTHING); /* null. */ - regtail(preg, ret, next); - regoptail(preg, ret, next); - } + if (!(flags & SIMPLE)) { + int *back = regnode(preg, BACK); + regtail(preg, back, ret); + regtail(preg, next, back); } + preg->regparse++; if (ISMULT(*preg->regparse)) { preg->err = REG_ERR_NESTED_COUNT; @@ -22641,27 +22746,29 @@ static void regc(regex_t *preg, int b ) * Means relocating the operand. * Returns the new location of the original operand. */ -static int *reginsert(regex_t *preg, int op, int *opnd ) +static int *reginsert(regex_t *preg, int op, int size, int *opnd ) { int *src; int *dst; int *place; - preg->regsize += 2; + preg->regsize += size; if (preg->regcode == ®dummy) { return opnd; } src = preg->regcode; - preg->regcode += 2; + preg->regcode += size; dst = preg->regcode; while (src > opnd) *--dst = *--src; place = opnd; /* Op node, where operand used to be. */ *place++ = op; - *place++ = 0; + while (--size) { + *place++ = 0; + } return place; } @@ -22716,7 +22823,7 @@ static void regoptail(regex_t *preg, int *p, const int *val ) */ static int regtry(regex_t *preg, const char *string ); static int regmatch(regex_t *preg, const int *prog); -static int regrepeat(regex_t *preg, const int *p ); +static int regrepeat(regex_t *preg, const int *p, int max); /* - regexec - match a regexp against a string @@ -22724,6 +22831,7 @@ static int regrepeat(regex_t *preg, const int *p ); int regexec(regex_t *preg, const char *string, size_t nmatch, regmatch_t pmatch[], int eflags) { const char *s; + const int *scan; /* Be paranoid... */ if (preg == NULL || preg->program == NULL || string == NULL) { @@ -22736,7 +22844,8 @@ int regexec(regex_t *preg, const char *string, size_t nmatch, regmatch_t pmat } #ifdef DEBUG - /*regdump(preg);*/ + fprintf(stderr, "regexec: %s\n", string); + regdump(preg); #endif preg->eflags = eflags; @@ -22744,6 +22853,18 @@ int regexec(regex_t *preg, const char *string, size_t nmatch, regmatch_t pmat preg->nmatch = nmatch; preg->start = string; /* All offsets are computed from here */ + /* Must clear out the embedded repeat counts */ + for (scan = OPERAND(preg->program + 1); scan != NULL; scan = regnext(preg, scan)) { + switch (OP(scan)) { + case REP: + case REPMIN: + case REPX: + case REPXMIN: + *(int *)(scan + 4) = 0; + break; + } + } + /* If there is a "must appear" string, look for it. */ if (preg->regmust != NULL) { s = string; @@ -22798,10 +22919,14 @@ nextline: } else /* We don't -- general case. */ - do { + while (1) { if (regtry(preg, s)) return REG_NOERROR; - } while (*s++ != '\0'); + if (*s == '\0') { + break; + } + s += utf8_charlen(*s); + } /* Failure. */ return REG_NOMATCH; @@ -22858,18 +22983,12 @@ static int prefix_cmp(const int *prog, int proglen, const char *string, int noca /** * Searchs for 'c' in the range 'range'. * - * If 'nocase' is set, the range is assumed to be uppercase - * and 'c' is converted to uppercase before matching. - * * Returns 1 if found, or 0 if not. */ -static int reg_range_find(const int *range, int c, int nocase) +static int reg_range_find(const int *range, int c) { - if (nocase) { - /* The "string" should already be converted to uppercase */ - c = utf8_upper(c); - } while (*range) { + /*printf("Checking %d in range [%d,%d]\n", c, range[1], (range[0] + range[1] - 1));*/ if (c >= range[1] && c <= (range[0] + range[1] - 1)) { return 1; } @@ -22920,6 +23039,117 @@ static int reg_iseol(regex_t *preg, int ch) } } +static int regmatchsimplerepeat(regex_t *preg, const int *scan, int matchmin) +{ + int nextch = '\0'; + const char *save; + int no; + int c; + + int max = scan[2]; + int min = scan[3]; + const int *next = regnext(preg, scan); + + /* + * Lookahead to avoid useless match attempts + * when we know what character comes next. + */ + if (OP(next) == EXACTLY) { + nextch = *OPERAND(next); + } + save = preg->reginput; + no = regrepeat(preg, scan + 5, max); + if (no < min) { + return 0; + } + if (matchmin) { + /* from min up to no */ + max = no; + no = min; + } + /* else from no down to min */ + while (1) { + if (matchmin) { + if (no > max) { + break; + } + } + else { + if (no < min) { + break; + } + } + preg->reginput = save + utf8_index(save, no); + reg_utf8_tounicode_case(preg->reginput, &c, (preg->cflags & REG_ICASE)); + /* If it could work, try it. */ + if (reg_iseol(preg, nextch) || c == nextch) { + if (regmatch(preg, next)) { + return(1); + } + } + if (matchmin) { + /* Couldn't or didn't, add one more */ + no++; + } + else { + /* Couldn't or didn't -- back up. */ + no--; + } + } + return(0); +} + +static int regmatchrepeat(regex_t *preg, int *scan, int matchmin) +{ + const char *save; + + int max = scan[2]; + int min = scan[3]; + + save = preg->reginput; + + /* Have we reached min? */ + if (scan[4] < min) { + /* No, so get another one */ + scan[4]++; + if (regmatch(preg, scan + 5)) { + return 1; + } + scan[4]--; + return 0; + } + if (scan[4] > max) { + return 0; + } + + if (matchmin) { + /* minimal, so try other branch first */ + if (regmatch(preg, regnext(preg, scan))) { + return 1; + } + /* No, so try one more */ + scan[4]++; + if (regmatch(preg, scan + 5)) { + return 1; + } + scan[4]--; + return 0; + } + /* maximal, so try this branch again */ + save = preg->reginput; + if (scan[4] < max) { + scan[4]++; + if (regmatch(preg, scan + 5)) { + return 1; + } + scan[4]--; + } + /* At this point we are at max with no match. Back up by one and try the other branch */ + preg->reginput = save; + int ret = regmatch(preg, regnext(preg, scan)); + return ret; +} + /* - regmatch - main matching routine * @@ -22937,11 +23167,14 @@ static int regmatch(regex_t *preg, const int *prog) const int *next; /* Next node. */ scan = prog; + #ifdef DEBUG if (scan != NULL && regnarrate) fprintf(stderr, "%s(\n", regprop(scan)); #endif while (scan != NULL) { + int n; + int c; #ifdef DEBUG if (regnarrate) { //fprintf(stderr, "%s...\n", regprop(scan)); @@ -22950,6 +23183,7 @@ static int regmatch(regex_t *preg, const int *prog) } #endif next = regnext(preg, scan); + n = reg_utf8_tounicode_case(preg->reginput, &c, (preg->cflags & REG_ICASE)); switch (OP(scan)) { case BOL: @@ -22957,29 +23191,38 @@ static int regmatch(regex_t *preg, const int *prog) return(0); break; case EOL: - if (!reg_iseol(preg, *preg->reginput)) { + if (!reg_iseol(preg, c)) { return(0); } break; case WORDA: /* Must be looking at a letter, digit, or _ */ - if ((!isalnum(UCHAR(*preg->reginput))) && *preg->reginput != '_') + if ((!isalnum(UCHAR(c))) && c != '_') return(0); /* Prev must be BOL or nonword */ if (preg->reginput > preg->regbol && - (isalnum(UCHAR(preg->reginput[-1])) || preg->reginput[-1] == '_')) + (isalnum(UCHAR(preg->reginput[-1])) || preg->reginput[-1] == '_')) return(0); break; case WORDZ: - /* Must be looking at non letter, digit, or _ */ - if (isalnum(UCHAR(*preg->reginput)) || *preg->reginput == '_') - return(0); - /* We don't care what the previous char was */ - break; + /* Can't match at BOL */ + if (preg->reginput > preg->regbol) { + /* Current must be EOL or nonword */ + if (reg_iseol(preg, c) || !isalnum(UCHAR(c)) || c != '_') { + c = preg->reginput[-1]; + /* Previous must be word */ + if (isalnum(UCHAR(c)) || c == '_') { + break; + } + } + } + /* No */ + return(0); + case ANY: - if (reg_iseol(preg, *preg->reginput)) + if (reg_iseol(preg, c)) return 0; - preg->reginput++; + preg->reginput += n; break; case EXACTLY: { const int *opnd; @@ -22997,18 +23240,16 @@ static int regmatch(regex_t *preg, const int *prog) } break; case ANYOF: - if (reg_iseol(preg, *preg->reginput)) - return 0; - if (reg_range_find(OPERAND(scan), *preg->reginput, preg->cflags & REG_ICASE) == 0) + if (reg_iseol(preg, c) || reg_range_find(OPERAND(scan), c) == 0) { return(0); - preg->reginput++; + } + preg->reginput += n; break; case ANYBUT: - if (reg_iseol(preg, *preg->reginput)) - return 0; - if (reg_range_find(OPERAND(scan), *preg->reginput, preg->cflags & REG_ICASE) != 0) + if (reg_iseol(preg, c) || reg_range_find(OPERAND(scan), c) != 0) { return(0); - preg->reginput++; + } + preg->reginput += n; break; case NOTHING: break; @@ -23033,69 +23274,14 @@ static int regmatch(regex_t *preg, const int *prog) } } break; - case STARMIN: - case PLUSMIN: { - char nextch; - const char *save; - int min; - int max; + case REP: + case REPMIN: + return regmatchsimplerepeat(preg, scan, OP(scan) == REPMIN); - /* - * Lookahead to avoid useless match attempts - * when we know what character comes next. - */ - nextch = '\0'; - if (OP(next) == EXACTLY) - nextch = *OPERAND(next); - min = (OP(scan) == STARMIN) ? 0 : 1; - save = preg->reginput; - max = regrepeat(preg, OPERAND(scan)); - while (min < max) { - int ch; - preg->reginput = save + min; - reg_utf8_tounicode_case(preg->reginput, &ch, (preg->cflags & REG_ICASE)); - /* If it could work, try it. */ - if (reg_iseol(preg, nextch) || ch == nextch) - if (regmatch(preg, next)) - return(1); - /* Couldn't or didn't, add one more */ - min++; - } - return(0); - } - break; + case REPX: + case REPXMIN: + return regmatchrepeat(preg, (int *)scan, OP(scan) == REPXMIN); - case STAR: - case PLUS: { - char nextch; - int no; - const char *save; - int min; - - /* - * Lookahead to avoid useless match attempts - * when we know what character comes next. - */ - nextch = '\0'; - if (OP(next) == EXACTLY) - nextch = *OPERAND(next); - min = (OP(scan) == STAR) ? 0 : 1; - save = preg->reginput; - no = regrepeat(preg, OPERAND(scan)); - while (no >= min) { - int ch; - reg_utf8_tounicode_case(preg->reginput, &ch, (preg->cflags & REG_ICASE)); - /* If it could work, try it. */ - if (reg_iseol(preg, nextch) || ch == nextch) - if (regmatch(preg, next)) - return(1); - /* Couldn't or didn't -- back up. */ - no--; - preg->reginput = save + no; - } - return(0); - } - break; case END: return(1); /* Success! */ break; @@ -23144,50 +23330,52 @@ static int regmatch(regex_t *preg, const int *prog) /* - regrepeat - repeatedly match something simple, report how many */ -static int regrepeat(regex_t *preg, const int *p ) +static int regrepeat(regex_t *preg, const int *p, int max) { int count = 0; const char *scan; const int *opnd; + int ch; + int n; scan = preg->reginput; opnd = OPERAND(p); switch (OP(p)) { case ANY: - while (!reg_iseol(preg, *scan)) { + /* No need to handle utf8 specially here */ + while (!reg_iseol(preg, *scan) && count < max) { count++; scan++; } break; case EXACTLY: - if (preg->cflags & REG_ICASE) { - while (1) { - int ch; - int n = reg_utf8_tounicode_case(scan, &ch, 1); - if (*opnd != ch) { - break; - } - count++; - scan += n; - } - } - else { - while (*opnd == *scan) { - count++; - scan++; + while (count < max) { + n = reg_utf8_tounicode_case(scan, &ch, preg->cflags & REG_ICASE); + if (*opnd != ch) { + break; } + count++; + scan += n; } break; case ANYOF: - while (!reg_iseol(preg, *scan) && reg_range_find(opnd, *scan, preg->cflags & REG_ICASE) != 0) { + while (count < max) { + n = reg_utf8_tounicode_case(scan, &ch, preg->cflags & REG_ICASE); + if (reg_iseol(preg, ch) || reg_range_find(opnd, ch) == 0) { + break; + } count++; - scan++; + scan += n; } break; case ANYBUT: - while (!reg_iseol(preg, *scan) && reg_range_find(opnd, *scan, preg->cflags & REG_ICASE) == 0) { + while (count < max) { + n = reg_utf8_tounicode_case(scan, &ch, preg->cflags & REG_ICASE); + if (reg_iseol(preg, ch) || reg_range_find(opnd, ch) != 0) { + break; + } count++; - scan++; + scan += n; } break; default: /* Oh dear. Called inappropriately. */ @@ -23211,6 +23399,7 @@ static const int *regnext(regex_t *preg, const int *p ) return(NULL); offset = NEXT(p); + if (offset == 0) return(NULL); @@ -23245,7 +23434,19 @@ static void regdump(regex_t *preg) else printf("(%d)", (int)((s-preg->program)+(next-s))); s += 2; - if (op == ANYOF || op == ANYBUT) { + if (op == REP || op == REPMIN || op == REPX || op == REPXMIN) { + int max = s[0]; + int min = s[1]; + if (max == 65535) { + printf("{%d,*}", min); + } + else { + printf("{%d,%d}", min, max); + } + printf(" %d", s[2]); + s += 3; + } + else if (op == ANYOF || op == ANYBUT) { /* set of ranges */ while (*s) { @@ -23275,9 +23476,10 @@ static void regdump(regex_t *preg) if (op == END) { /* Header fields of interest. */ - if (preg->regstart != '\0') + if (preg->regstart) { buf[utf8_fromunicode(buf, preg->regstart)] = 0; printf("start '%s' ", buf); + } if (preg->reganch) printf("anchored "); if (preg->regmust != NULL) { @@ -23333,17 +23535,17 @@ static const char *regprop( const int *op ) case END: p = "END"; break; - case STAR: - p = "STAR"; + case REP: + p = "REP"; break; - case PLUS: - p = "PLUS"; + case REPMIN: + p = "REPMIN"; break; - case STARMIN: - p = "STARMIN"; + case REPX: + p = "REPX"; break; - case PLUSMIN: - p = "PLUSMIN"; + case REPXMIN: + p = "REPXMIN"; break; case WORDA: p = "WORDA"; |