From 1eb1f3ed707923e0c95107d5c8bea9b37aeb2b8d Mon Sep 17 00:00:00 2001 From: Steve Bennett Date: Fri, 3 Jun 2011 18:48:10 +1000 Subject: Fix parsing bug introduced by 505ce9d7 This caused a problem with quotes inside commands. Rework the parsing to properly handle commands inside quoted strings and quoted strings inside commands. Signed-off-by: Steve Bennett --- jim.c | 336 ++++++++++++++++++++++++++++++++++++------------------- tests/parse.test | 19 ++-- 2 files changed, 232 insertions(+), 123 deletions(-) diff --git a/jim.c b/jim.c index 48cc62e..8ddc7a4 100644 --- a/jim.c +++ b/jim.c @@ -1095,20 +1095,18 @@ struct JimParserCtx char missing; /* At end of parse, ' ' if complete, '{' if braces incomplete, '"' if quotes incomplete */ }; -#define JimParserEof(c) ((c)->eof) -#define JimParserTstart(c) ((c)->tstart) -#define JimParserTend(c) ((c)->tend) -#define JimParserTtype(c) ((c)->tt) -#define JimParserTline(c) ((c)->tline) - static int JimParseScript(struct JimParserCtx *pc); static int JimParseSep(struct JimParserCtx *pc); static int JimParseEol(struct JimParserCtx *pc); static int JimParseCmd(struct JimParserCtx *pc); +static int JimParseQuote(struct JimParserCtx *pc); static int JimParseVar(struct JimParserCtx *pc); static int JimParseBrace(struct JimParserCtx *pc); static int JimParseStr(struct JimParserCtx *pc); static int JimParseComment(struct JimParserCtx *pc); +static void JimParseSubCmd(struct JimParserCtx *pc); +static int JimParseSubQuote(struct JimParserCtx *pc); +static void JimParseSubCmd(struct JimParserCtx *pc); static Jim_Obj *JimParserGetTokenObj(Jim_Interp *interp, struct JimParserCtx *pc); /* Initialize a parser context. @@ -1236,59 +1234,217 @@ static int JimParseEol(struct JimParserCtx *pc) return JIM_OK; } -static int JimParseCmd(struct JimParserCtx *pc) +/* +** Here are the rules for parsing: +** {braced expression} +** - Count open and closing braces +** - Backslash escapes meaning of braces +** +** "quoted expression" +** - First double quote at start of word terminates the expression +** - Backslash escapes quote and bracket +** - [commands brackets] are counted/nested +** - command rules apply within [brackets], not quoting rules (i.e. quotes have their own rules) +** +** [command expression] +** - Count open and closing brackets +** - Backslash escapes quote, bracket and brace +** - [commands brackets] are counted/nested +** - "quoted expressions" are parsed according to quoting rules +** - {braced expressions} are parsed according to brace rules +** +** For everything, backslash escapes the next char, newline increments current line +*/ + +/** + * Parses a braced expression starting at pc->p. + * + * Positions the parser at the end of the braced expression, + * sets pc->tend and possibly pc->missing. + */ +static void JimParseSubBrace(struct JimParserCtx *pc) { int level = 1; - int quoted = 0; - pc->tstart = ++pc->p; + /* Skip the brace */ + pc->p++; pc->len--; - pc->tline = pc->linenr; while (pc->len) { - if (*pc->p == '\\' && pc->len > 1) { - if (pc->p[1] == '\n') - pc->linenr++; + switch (*pc->p) { + case '\\': + if (pc->len > 1) { + if (*++pc->p == '\n') { + pc->linenr++; + } + pc->len--; + } + break; - pc->p += 2; - pc->len -= 2; - continue; - } - else if (*pc->p == '"') { - quoted = !quoted; - } - else if (!quoted) { - if (*pc->p == '[') { + case '{': level++; - } - else if (*pc->p == ']') { - level--; - if (!level) - break; - } - else if (*pc->p == '{') { - /* Save and restore tstart and tline across JimParseBrace() */ - const char * tstart = pc->tstart; - int tline = pc->tline; + break; - JimParseBrace(pc); + case '}': + if (--level == 0) { + pc->tend = pc->p - 1; + pc->p++; + pc->len--; + return; + } + break; - pc->tstart = tstart; - pc->tline = tline; - continue; - } + case '\n': + pc->linenr++; + break; } - if (*pc->p == '\n') { - pc->linenr++; + pc->p++; + pc->len--; + } + pc->missing = '{'; + pc->tend = pc->p - 1; +} + +/** + * Parses a quoted expression starting at pc->p. + * + * Positions the parser at the end of the quoted expression, + * sets pc->tend and possibly pc->missing. + * + * Returns the type of the token of the string, + * either JIM_TT_ESC (if it contains values which need to be [subst]ed) + * or JIM_TT_STR. + */ +static int JimParseSubQuote(struct JimParserCtx *pc) +{ + int tt = JIM_TT_STR; + + /* Skip the quote */ + pc->p++; + pc->len--; + while (pc->len) { + switch (*pc->p) { + case '\\': + if (pc->len > 1) { + if (*++pc->p == '\n') { + pc->linenr++; + } + pc->len--; + tt = JIM_TT_ESC; + } + break; + + case '"': + pc->tend = pc->p - 1; + pc->p++; + pc->len--; + return tt; + + case '[': + JimParseSubCmd(pc); + tt = JIM_TT_ESC; + continue; + + case '\n': + pc->linenr++; + break; + + case '$': + tt = JIM_TT_ESC; + break; } pc->p++; pc->len--; } + pc->missing = '"'; pc->tend = pc->p - 1; - pc->tt = JIM_TT_CMD; - if (*pc->p == ']') { + return tt; +} + +/** + * Parses a [command] expression starting at pc->p. + * + * Positions the parser at the end of the command expression, + * sets pc->tend and possibly pc->missing. + */ +static void JimParseSubCmd(struct JimParserCtx *pc) +{ + int level = 1; + int startofword = 1; + + /* Skip the bracket */ + pc->p++; + pc->len--; + while (pc->len) { + switch (*pc->p) { + case '\\': + if (pc->len > 1) { + if (*++pc->p == '\n') { + pc->linenr++; + } + pc->len--; + } + break; + + case '[': + level++; + break; + + case ']': + if (--level == 0) { + pc->tend = pc->p - 1; + pc->p++; + pc->len--; + return; + } + break; + + case '"': + if (startofword) { + JimParseSubQuote(pc); + continue; + } + break; + + case '{': + JimParseSubBrace(pc); + startofword = 0; + continue; + + case '\n': + pc->linenr++; + break; + } + startofword = isspace(UCHAR(*pc->p)); pc->p++; pc->len--; } + pc->missing = '['; + pc->tend = pc->p - 1; +} + +static int JimParseBrace(struct JimParserCtx *pc) +{ + pc->tstart = pc->p + 1; + pc->tline = pc->linenr; + pc->tt = JIM_TT_STR; + JimParseSubBrace(pc); + return JIM_OK; +} + +static int JimParseCmd(struct JimParserCtx *pc) +{ + pc->tstart = pc->p + 1; + pc->tline = pc->linenr; + pc->tt = JIM_TT_CMD; + JimParseSubCmd(pc); + return JIM_OK; +} + +static int JimParseQuote(struct JimParserCtx *pc) +{ + pc->tstart = pc->p + 1; + pc->tline = pc->linenr; + pc->tt = JimParseSubQuote(pc); return JIM_OK; } @@ -1383,48 +1539,6 @@ static int JimParseVar(struct JimParserCtx *pc) return JIM_OK; } -static int JimParseBrace(struct JimParserCtx *pc) -{ - int level = 1; - - pc->tstart = ++pc->p; - pc->len--; - pc->tline = pc->linenr; - while (1) { - if (*pc->p == '\\' && pc->len >= 2) { - pc->p++; - pc->len--; - if (*pc->p == '\n') - pc->linenr++; - } - else if (*pc->p == '{') { - level++; - } - else if (pc->len == 0 || *pc->p == '}') { - if (pc->len == 0) { - pc->missing = '{'; - /*printf("Missing brace at line %d, opened on line %d\n", pc->linenr, pc->tline);*/ - } - level--; - if (pc->len == 0 || level == 0) { - pc->tend = pc->p - 1; - if (pc->len != 0) { - pc->p++; - pc->len--; - } - pc->tt = JIM_TT_STR; - return JIM_OK; - } - } - else if (*pc->p == '\n') { - pc->linenr++; - } - pc->p++; - pc->len--; - } - return JIM_OK; /* unreached */ -} - static int JimParseStr(struct JimParserCtx *pc) { int newword = (pc->tt == JIM_TT_SEP || pc->tt == JIM_TT_EOL || @@ -1721,8 +1835,8 @@ static Jim_Obj *JimParserGetTokenObj(Jim_Interp *interp, struct JimParserCtx *pc char *token; int len; - start = JimParserTstart(pc); - end = JimParserTend(pc); + start = pc->tstart; + end = pc->tend; if (start > end) { len = 0; token = Jim_Alloc(1); @@ -1731,7 +1845,7 @@ static Jim_Obj *JimParserGetTokenObj(Jim_Interp *interp, struct JimParserCtx *pc else { len = (end - start) + 1; token = Jim_Alloc(len + 1); - if (JimParserTtype(pc) != JIM_TT_ESC) { + if (pc->tt != JIM_TT_ESC) { /* No escape conversion needed? Just copy it. */ memcpy(token, start, len); token[len] = '\0'; @@ -1761,7 +1875,7 @@ int Jim_ScriptIsComplete(const char *s, int len, char *stateCharPtr) struct JimParserCtx parser; JimParserInit(&parser, s, len, 1); - while (!JimParserEof(&parser)) { + while (!parser.eof) { JimParseScript(&parser); } if (stateCharPtr) { @@ -1779,13 +1893,6 @@ static int JimParseListQuote(struct JimParserCtx *pc); static int JimParseList(struct JimParserCtx *pc) { - if (pc->len == 0) { - pc->tstart = pc->tend = pc->p; - pc->tline = pc->linenr; - pc->tt = JIM_TT_EOL; - pc->eof = 1; - return JIM_OK; - } switch (*pc->p) { case ' ': case '\n': @@ -1800,8 +1907,16 @@ static int JimParseList(struct JimParserCtx *pc) return JimParseBrace(pc); default: - return JimParseListStr(pc); + if (pc->len) { + return JimParseListStr(pc); + } + break; } + + pc->tstart = pc->tend = pc->p; + pc->tline = pc->linenr; + pc->tt = JIM_TT_EOL; + pc->eof = 1; return JIM_OK; } @@ -1832,10 +1947,6 @@ static int JimParseListQuote(struct JimParserCtx *pc) while (pc->len) { switch (*pc->p) { - case '$': - case '[': - pc->tt = JIM_TT_ESC; - break; case '\\': pc->tt = JIM_TT_ESC; if (--pc->len == 0) { @@ -1870,17 +1981,13 @@ static int JimParseListStr(struct JimParserCtx *pc) while (pc->len) { switch (*pc->p) { - case '$': - case '[': - pc->tt = JIM_TT_ESC; - break; case '\\': - pc->tt = JIM_TT_ESC; if (--pc->len == 0) { /* Trailing backslash */ pc->tend = pc->p; return JIM_OK; } + pc->tt = JIM_TT_ESC; pc->p++; break; case ' ': @@ -3194,7 +3301,7 @@ int SetScriptFromAny(Jim_Interp *interp, struct Jim_Obj *objPtr) ScriptTokenListInit(&tokenlist); JimParserInit(&parser, scriptText, scriptTextLen, script->line); - while (!JimParserEof(&parser)) { + while (!parser.eof) { JimParseScript(&parser); ScriptAddToken(&tokenlist, parser.tstart, parser.tend - parser.tstart + 1, parser.tt, parser.tline); @@ -5603,15 +5710,15 @@ int SetListFromAny(Jim_Interp *interp, struct Jim_Obj *objPtr) /* Convert into a list */ JimParserInit(&parser, str, strLen, linenr); - while (!JimParserEof(&parser)) { + while (!parser.eof) { Jim_Obj *elementPtr; JimParseList(&parser); - if (JimParserTtype(&parser) != JIM_TT_STR && JimParserTtype(&parser) != JIM_TT_ESC) + if (parser.tt != JIM_TT_STR && parser.tt != JIM_TT_ESC) continue; elementPtr = JimParserGetTokenObj(interp, &parser); if (filename) { - JimSetSourceInfo(interp, elementPtr, filename, JimParserTline(&parser)); + JimSetSourceInfo(interp, elementPtr, filename, parser.tline); } ListAppendElement(objPtr, elementPtr); } @@ -7586,7 +7693,6 @@ static int JimParseExpression(struct JimParserCtx *pc) break; case '[': return JimParseCmd(pc); - break; case '$': if (JimParseVar(pc) == JIM_ERR) return JimParseExprOperator(pc); @@ -7610,13 +7716,11 @@ static int JimParseExpression(struct JimParserCtx *pc) case '9': case '.': return JimParseExprNumber(pc); - break; case '"': + return JimParseQuote(pc); case '{': - /* Here it's possible to reuse the List String parsing. */ - pc->tt = JIM_TT_NONE; /* Make sure it's sensed as a new word. */ - return JimParseList(pc); - break; + return JimParseBrace(pc); + case 'N': case 'I': case 'n': @@ -8294,7 +8398,7 @@ int SetExprFromAny(Jim_Interp *interp, struct Jim_Obj *objPtr) ScriptTokenListInit(&tokenlist); JimParserInit(&parser, exprText, exprTextLen, 0); - while (!JimParserEof(&parser)) { + while (!parser.eof) { if (JimParseExpression(&parser) != JIM_OK) { ScriptTokenListFree(&tokenlist); invalidexpr: @@ -10332,7 +10436,7 @@ static int SetSubstFromAny(Jim_Interp *interp, struct Jim_Obj *objPtr, int flags JimParserInit(&parser, scriptText, scriptTextLen, 1); while (1) { JimParseSubst(&parser, flags); - if (JimParserEof(&parser)) { + if (parser.eof) { /* Note that subst doesn't need the EOL token */ break; } diff --git a/tests/parse.test b/tests/parse.test index d8e6035..b28f285 100644 --- a/tests/parse.test +++ b/tests/parse.test @@ -272,38 +272,43 @@ test parse-1.53 "special chars in dict sugar" { array names a } {{x[}} -test parse-1.52 "special chars in dict sugar" { +test parse-1.54 "special chars in dict sugar" { set x $a(x\[) } 5 -test parse-1.53 "special chars in dict sugar" { +test parse-1.55 "special chars in dict sugar" { unset -nocomplain a set a(x\() 5 array names a } {x(} -test parse-1.52 "special chars in dict sugar" { +test parse-1.56 "special chars in dict sugar" { set x $a(x\() } 5 -test parse-1.53 "special chars in dict sugar" { +test parse-1.57 "special chars in dict sugar" { unset -nocomplain a set a(x() 5 array names a } {x(} -test parse-1.52 "special chars in dict sugar" { +test parse-1.58 "special chars in dict sugar" { set x $a(x() } 5 -test parse-1.53 "special chars in dict sugar" { +test parse-1.59 "special chars in dict sugar" { unset -nocomplain a set a(x") 5 lindex [array names a] 0 } {x"} -test parse-1.52 "special chars in dict sugar" { +test parse-1.60 "special chars in dict sugar" { set x $a(x") } 5 +test parse-1.61 "quote in command" { + set x [list \\" x] + lindex $x end +} x + testreport -- cgit v1.1