diff options
-rw-r--r-- | jim_tcl.txt | 9 | ||||
-rw-r--r-- | jimregexp.c | 23 | ||||
-rw-r--r-- | tests/regmin.test | 8 |
3 files changed, 35 insertions, 5 deletions
diff --git a/jim_tcl.txt b/jim_tcl.txt index 389522a..b3100b8 100644 --- a/jim_tcl.txt +++ b/jim_tcl.txt @@ -3,7 +3,7 @@ Jim Tcl(n) NAME ---- -Jim Tcl v0.72 - overview of the Jim tool command language facilities +Jim Tcl v0.73 - overview of the Jim tool command language facilities SYNOPSIS -------- @@ -55,6 +55,10 @@ The major differences with Tcl 8.5/8.6 are: RECENT CHANGES -------------- +Changes between 0.72 and 0.73 +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +1. Built-in regexp now support non-capturing parentheses: (?:...) + Changes between 0.71 and 0.72 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1. procs now allow 'args' and optional parameters in any position @@ -994,7 +998,8 @@ and POSIX are highlighted below. 4. Character classes apply to ASCII characters only 5. Supported constraint escapes: +{backslash}m+ = +{backslash}<+ = start of word, +{backslash}M+ = +{backslash}>+ = end of word 6. Backslash escapes may be used within regular expressions, such as +{backslash}n+ = newline, +{backslash}uNNNN+ = unicode -7. No support for the +?+ non-greedy quantifier. e.g. +*?+ +7. Support for the +?+ non-greedy quantifier. e.g. +*?+ +7. Support for non-capuring parentheses +(?:...)+ COMMAND RESULTS --------------- diff --git a/jimregexp.c b/jimregexp.c index c652ad4..45f7c31 100644 --- a/jimregexp.c +++ b/jimregexp.c @@ -77,7 +77,7 @@ * to the thing following the set of BRANCHes.) The opcodes are: */ -/* This *MUST* be less than (255-20)/2=117 */ +/* This *MUST* be less than (255-20-1)/2=117 */ #define REG_MAX_PAREN 100 /* definition number opnd? meaning */ @@ -98,10 +98,12 @@ #define WORDA 15 /* no Match "" at wordchar, where prev is nonword */ #define WORDZ 16 /* no Match "" at nonwordchar, where prev is word */ +#define OPENNC 19 /* no Non-capturing parentheses - must be OPEN-1 */ #define OPEN 20 /* no Mark this point in input as start of #n. */ /* OPEN+1 is number 1, etc. */ -#define CLOSE (OPEN+REG_MAX_PAREN) /* no Analogous to OPEN. */ +#define CLOSE (OPEN+REG_MAX_PAREN+1) /* no Analogous to OPEN. */ #define CLOSE_END (CLOSE+REG_MAX_PAREN) +#define CLOSENC (CLOSE-1) /* no Non-capturing parentheses - must be CLOSE-1 */ /* * The first byte of the regexp internal "program" is actually this magic @@ -333,7 +335,14 @@ static int reg(regex_t *preg, int paren /* Parenthesized? */, int *flagp ) /* Make an OPEN node, if parenthesized. */ if (paren) { - parno = ++preg->re_nsub; + if (preg->regparse[0] == '?' && preg->regparse[1] == ':') { + /* non-capturing paren */ + preg->regparse += 2; + parno = -1; + } + else { + parno = ++preg->re_nsub; + } ret = regnode(preg, OPEN+parno); } else ret = 0; @@ -1446,6 +1455,14 @@ static int regmatch(regex_t *preg, int prog) case END: return(1); /* Success! */ break; + + case OPENNC: + case CLOSENC: + if (regmatch(preg, next)) { + return 1; + } + return 0; + default: if (OP(preg, scan) >= OPEN+1 && OP(preg, scan) < CLOSE_END) { const char *save; diff --git a/tests/regmin.test b/tests/regmin.test index 15a1932..ed4f1cd 100644 --- a/tests/regmin.test +++ b/tests/regmin.test @@ -44,4 +44,12 @@ test regexpmin-2.2 {utf8 min repeat} utf8 { regexp -inline {a\u00df+?} a\udf\udf\udf\udf\ub5z } "a\udf" +test regexpmin-3.1 {non-capturing paren} { + regexp -inline {x(?:a|b)?} xababcabc +} {xa} + +test regexpmin-3.2 {non-capturing paren} { + regexp -inline {x(?:a|b)?.*(b|c)} xababcabc +} {xababcabc c} + testreport |