diff options
author | Steve Bennett <steveb@workware.net.au> | 2017-12-22 12:57:03 +1000 |
---|---|---|
committer | Steve Bennett <steveb@workware.net.au> | 2017-12-31 11:45:53 +1000 |
commit | 2d2f74ebfeeb056130a37fec19189766a85cec81 (patch) | |
tree | 9f070ca7d6641b4d7fb8e3ec6ea423d2db5062d5 | |
parent | dde3b217dacb724ea4b6f86a8f7095d73e80674f (diff) | |
download | jimtcl-2d2f74ebfeeb056130a37fec19189766a85cec81.zip jimtcl-2d2f74ebfeeb056130a37fec19189766a85cec81.tar.gz jimtcl-2d2f74ebfeeb056130a37fec19189766a85cec81.tar.bz2 |
regexp: Implement class shorthand escapes in brackets
The following class shorthand escapes now match Tcl when
used within bracket expressions:
\d [[:digit:]]
\s [[:space:]]
\w [[:alnum:]_] (note underscore)
e.g. [a-f\d] => [a-f0-9]
Previously these shorthand escapes were only implemented outside bracket expressions.
Signed-off-by: Steve Bennett <steveb@workware.net.au>
-rw-r--r-- | jimregexp.c | 40 | ||||
-rw-r--r-- | tests/regcount.test | 1 |
2 files changed, 30 insertions, 11 deletions
diff --git a/jimregexp.c b/jimregexp.c index cf31558..3771bd7 100644 --- a/jimregexp.c +++ b/jimregexp.c @@ -724,8 +724,31 @@ static int regatom(regex_t *preg, int *flagp) int start; int end; + enum { + CC_ALPHA, CC_ALNUM, CC_SPACE, CC_BLANK, CC_UPPER, CC_LOWER, + CC_DIGIT, CC_XDIGIT, CC_CNTRL, CC_GRAPH, CC_PRINT, CC_PUNCT, + CC_NUM + }; + int cc; + pattern += reg_utf8_tounicode_case(pattern, &start, nocase); if (start == '\\') { + /* First check for class shorthand escapes */ + switch (*pattern) { + case 's': + pattern++; + cc = CC_SPACE; + goto cc_switch; + case 'd': + pattern++; + cc = CC_DIGIT; + goto cc_switch; + case 'w': + pattern++; + reg_addrange(preg, '_', '_'); + cc = CC_ALNUM; + goto cc_switch; + } pattern += reg_decode_escape(pattern, &start); if (start == 0) { preg->err = REG_ERR_NULL_CHAR; @@ -752,23 +775,18 @@ static int regatom(regex_t *preg, int *flagp) ":alpha:", ":alnum:", ":space:", ":blank:", ":upper:", ":lower:", ":digit:", ":xdigit:", ":cntrl:", ":graph:", ":print:", ":punct:", }; - enum { - CC_ALPHA, CC_ALNUM, CC_SPACE, CC_BLANK, CC_UPPER, CC_LOWER, - CC_DIGIT, CC_XDIGIT, CC_CNTRL, CC_GRAPH, CC_PRINT, CC_PUNCT, - CC_NUM - }; - int i; - for (i = 0; i < CC_NUM; i++) { - n = strlen(character_class[i]); - if (strncmp(pattern, character_class[i], n) == 0) { + for (cc = 0; cc < CC_NUM; cc++) { + n = strlen(character_class[cc]); + if (strncmp(pattern, character_class[cc], n) == 0) { /* Found a character class */ pattern += n + 1; break; } } - if (i != CC_NUM) { - switch (i) { + if (cc != CC_NUM) { +cc_switch: + switch (cc) { case CC_ALNUM: reg_addrange(preg, '0', '9'); /* Fall through */ diff --git a/tests/regcount.test b/tests/regcount.test index 96f4ddd..5c1469e 100644 --- a/tests/regcount.test +++ b/tests/regcount.test @@ -84,6 +84,7 @@ foreach {pat str exp} { (a|y){5,6}? baaaad {} {[[:alpha:]]+} _bcd56_ef bcd {[[:alnum:]]+} _bcd56_ef bcd56 + {[\w]+} :_bcd56_ef _bcd56_ef {[[:space:]]+} "_bc \t\r\n\f\v_" "{ \t\r\n\f\v}" {[\x41-\x43]+} "_ABCD_" ABC {\m.+\M} "#A test#" "{A test}" |