aboutsummaryrefslogtreecommitdiff
path: root/jimregexp.c
diff options
context:
space:
mode:
authorSteve Bennett <steveb@workware.net.au>2017-12-22 12:57:03 +1000
committerSteve Bennett <steveb@workware.net.au>2017-12-31 11:45:53 +1000
commit2d2f74ebfeeb056130a37fec19189766a85cec81 (patch)
tree9f070ca7d6641b4d7fb8e3ec6ea423d2db5062d5 /jimregexp.c
parentdde3b217dacb724ea4b6f86a8f7095d73e80674f (diff)
downloadjimtcl-2d2f74ebfeeb056130a37fec19189766a85cec81.zip
jimtcl-2d2f74ebfeeb056130a37fec19189766a85cec81.tar.gz
jimtcl-2d2f74ebfeeb056130a37fec19189766a85cec81.tar.bz2
regexp: Implement class shorthand escapes in brackets
The following class shorthand escapes now match Tcl when used within bracket expressions: \d [[:digit:]] \s [[:space:]] \w [[:alnum:]_] (note underscore) e.g. [a-f\d] => [a-f0-9] Previously these shorthand escapes were only implemented outside bracket expressions. Signed-off-by: Steve Bennett <steveb@workware.net.au>
Diffstat (limited to 'jimregexp.c')
-rw-r--r--jimregexp.c40
1 files changed, 29 insertions, 11 deletions
diff --git a/jimregexp.c b/jimregexp.c
index cf31558..3771bd7 100644
--- a/jimregexp.c
+++ b/jimregexp.c
@@ -724,8 +724,31 @@ static int regatom(regex_t *preg, int *flagp)
int start;
int end;
+ enum {
+ CC_ALPHA, CC_ALNUM, CC_SPACE, CC_BLANK, CC_UPPER, CC_LOWER,
+ CC_DIGIT, CC_XDIGIT, CC_CNTRL, CC_GRAPH, CC_PRINT, CC_PUNCT,
+ CC_NUM
+ };
+ int cc;
+
pattern += reg_utf8_tounicode_case(pattern, &start, nocase);
if (start == '\\') {
+ /* First check for class shorthand escapes */
+ switch (*pattern) {
+ case 's':
+ pattern++;
+ cc = CC_SPACE;
+ goto cc_switch;
+ case 'd':
+ pattern++;
+ cc = CC_DIGIT;
+ goto cc_switch;
+ case 'w':
+ pattern++;
+ reg_addrange(preg, '_', '_');
+ cc = CC_ALNUM;
+ goto cc_switch;
+ }
pattern += reg_decode_escape(pattern, &start);
if (start == 0) {
preg->err = REG_ERR_NULL_CHAR;
@@ -752,23 +775,18 @@ static int regatom(regex_t *preg, int *flagp)
":alpha:", ":alnum:", ":space:", ":blank:", ":upper:", ":lower:",
":digit:", ":xdigit:", ":cntrl:", ":graph:", ":print:", ":punct:",
};
- enum {
- CC_ALPHA, CC_ALNUM, CC_SPACE, CC_BLANK, CC_UPPER, CC_LOWER,
- CC_DIGIT, CC_XDIGIT, CC_CNTRL, CC_GRAPH, CC_PRINT, CC_PUNCT,
- CC_NUM
- };
- int i;
- for (i = 0; i < CC_NUM; i++) {
- n = strlen(character_class[i]);
- if (strncmp(pattern, character_class[i], n) == 0) {
+ for (cc = 0; cc < CC_NUM; cc++) {
+ n = strlen(character_class[cc]);
+ if (strncmp(pattern, character_class[cc], n) == 0) {
/* Found a character class */
pattern += n + 1;
break;
}
}
- if (i != CC_NUM) {
- switch (i) {
+ if (cc != CC_NUM) {
+cc_switch:
+ switch (cc) {
case CC_ALNUM:
reg_addrange(preg, '0', '9');
/* Fall through */