aboutsummaryrefslogtreecommitdiff
path: root/jimregexp.c
diff options
context:
space:
mode:
authorSteve Bennett <steveb@workware.net.au>2013-11-10 11:04:14 +1000
committerSteve Bennett <steveb@workware.net.au>2016-02-02 12:48:14 +1000
commita14c13716364a835a8ab6cfcb4aa969e8c3bcc23 (patch)
tree355d31284ecf7427dae199a40d2c11109c3e1951 /jimregexp.c
parentd95b24518a0668f23e414100c1239de71625f403 (diff)
downloadjimtcl-a14c13716364a835a8ab6cfcb4aa969e8c3bcc23.zip
jimtcl-a14c13716364a835a8ab6cfcb4aa969e8c3bcc23.tar.gz
jimtcl-a14c13716364a835a8ab6cfcb4aa969e8c3bcc23.tar.bz2
regexp: Add missing support for character classes
[[::blank:]], [[::xdigit::]], etc. Signed-off-by: Steve Bennett <steveb@workware.net.au>
Diffstat (limited to 'jimregexp.c')
-rw-r--r--jimregexp.c82
1 files changed, 64 insertions, 18 deletions
diff --git a/jimregexp.c b/jimregexp.c
index 20a0e83..dca55a2 100644
--- a/jimregexp.c
+++ b/jimregexp.c
@@ -743,27 +743,73 @@ static int regatom(regex_t *preg, int *flagp)
reg_addrange(preg, start, end);
continue;
}
- if (start == '[') {
- if (strncmp(pattern, ":alpha:]", 8) == 0) {
- if ((preg->cflags & REG_ICASE) == 0) {
- reg_addrange(preg, 'a', 'z');
+ if (start == '[' && pattern[0] == ':') {
+ static const char *character_class[] = {
+ ":alpha:", ":alnum:", ":space:", ":blank:", ":upper:", ":lower:",
+ ":digit:", ":xdigit:", ":cntrl:", ":graph:", ":print:", ":punct:",
+ };
+ enum {
+ CC_ALPHA, CC_ALNUM, CC_SPACE, CC_BLANK, CC_UPPER, CC_LOWER,
+ CC_DIGIT, CC_XDIGIT, CC_CNTRL, CC_GRAPH, CC_PRINT, CC_PUNCT,
+ CC_NUM
+ };
+ int i;
+
+ for (i = 0; i < CC_NUM; i++) {
+ int n = strlen(character_class[i]);
+ if (strncmp(pattern, character_class[i], n) == 0) {
+ /* Found a character class */
+ pattern += n + 1;
+ break;
}
- reg_addrange(preg, 'A', 'Z');
- pattern += 8;
- continue;
}
- if (strncmp(pattern, ":alnum:]", 8) == 0) {
- if ((preg->cflags & REG_ICASE) == 0) {
- reg_addrange(preg, 'a', 'z');
+ if (i != CC_NUM) {
+ switch (i) {
+ case CC_ALNUM:
+ reg_addrange(preg, '0', '9');
+ /* Fall through */
+ case CC_ALPHA:
+ if ((preg->cflags & REG_ICASE) == 0) {
+ reg_addrange(preg, 'a', 'z');
+ }
+ reg_addrange(preg, 'A', 'Z');
+ break;
+ case CC_SPACE:
+ reg_addrange_str(preg, " \t\r\n\f\v");
+ break;
+ case CC_BLANK:
+ reg_addrange_str(preg, " \t");
+ break;
+ case CC_UPPER:
+ reg_addrange(preg, 'A', 'Z');
+ break;
+ case CC_LOWER:
+ reg_addrange(preg, 'a', 'z');
+ break;
+ case CC_XDIGIT:
+ reg_addrange(preg, 'a', 'f');
+ reg_addrange(preg, 'A', 'F');
+ /* Fall through */
+ case CC_DIGIT:
+ reg_addrange(preg, '0', '9');
+ break;
+ case CC_CNTRL:
+ reg_addrange(preg, 0, 31);
+ reg_addrange(preg, 127, 127);
+ break;
+ case CC_PRINT:
+ reg_addrange(preg, ' ', '~');
+ break;
+ case CC_GRAPH:
+ reg_addrange(preg, '!', '~');
+ break;
+ case CC_PUNCT:
+ reg_addrange(preg, '!', '/');
+ reg_addrange(preg, ':', '@');
+ reg_addrange(preg, '[', '`');
+ reg_addrange(preg, '{', '~');
+ break;
}
- reg_addrange(preg, 'A', 'Z');
- reg_addrange(preg, '0', '9');
- pattern += 8;
- continue;
- }
- if (strncmp(pattern, ":space:]", 8) == 0) {
- reg_addrange_str(preg, " \t\r\n\f\v");
- pattern += 8;
continue;
}
}