aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog10
-rw-r--r--posix/regcomp.c37
-rw-r--r--posix/regex_internal.h2
3 files changed, 42 insertions, 7 deletions
diff --git a/ChangeLog b/ChangeLog
index 0a6301c..9fc0a65 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,15 @@
2003-09-20 Paolo Bonzini <bonzini@gnu.org>
+ * posix/regcomp.c (build_word_op): Rename like...
+ (build_charclass_op): ...this. Accept two extra parameters,
+ CLASS_NAME and EXTRA. Add EXTRA to the result, not only _.
+ (peek_token): accept \s and \S as OP_SPACE and OP_NOTSPACE.
+ (parse_expression): replace build_word_op with
+ build_charclass_op, add new arguments, accept OP_SPACE
+ and OP_NOTSPACE.
+ * posix/regex_internal.h (re_token_type_t): Add OP_SPACE
+ and OP_NOTSPACE.
+
* posix/regcomp.c (peek_token): Don't look back for ( or |
to check whether to treat a caret as special. It fails
for the (extended) regex \(^.
diff --git a/posix/regcomp.c b/posix/regcomp.c
index 4682ca6..3d54f99 100644
--- a/posix/regcomp.c
+++ b/posix/regcomp.c
@@ -118,8 +118,10 @@ static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans,
const unsigned char *class_name,
reg_syntax_t syntax);
#endif /* not RE_ENABLE_I18N */
-static bin_tree_t *build_word_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans,
- int not, reg_errcode_t *err);
+static bin_tree_t *build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans,
+ const unsigned char *class_name,
+ const unsigned char *extra, int not,
+ reg_errcode_t *err);
static void free_bin_tree (bin_tree_t *tree);
static bin_tree_t *create_tree (bin_tree_t *left, bin_tree_t *right,
re_token_type_t type, int index);
@@ -1561,6 +1563,14 @@ peek_token (token, input, syntax)
if (!(syntax & RE_NO_GNU_OPS))
token->type = OP_NOTWORD;
break;
+ case 's':
+ if (!(syntax & RE_NO_GNU_OPS))
+ token->type = OP_SPACE;
+ break;
+ case 'S':
+ if (!(syntax & RE_NO_GNU_OPS))
+ token->type = OP_NOTSPACE;
+ break;
case '`':
if (!(syntax & RE_NO_GNU_OPS))
{
@@ -2076,12 +2086,22 @@ parse_expression (regexp, preg, token, syntax, nest, err)
dfa->has_mb_node = 1;
break;
case OP_WORD:
- tree = build_word_op (dfa, regexp->trans, 0, err);
+ tree = build_charclass_op (dfa, regexp->trans, "alnum", "_", 0, err);
if (BE (*err != REG_NOERROR && tree == NULL, 0))
return NULL;
break;
case OP_NOTWORD:
- tree = build_word_op (dfa, regexp->trans, 1, err);
+ tree = build_charclass_op (dfa, regexp->trans, "alnum", "_", 1, err);
+ if (BE (*err != REG_NOERROR && tree == NULL, 0))
+ return NULL;
+ break;
+ case OP_SPACE:
+ tree = build_charclass_op (dfa, regexp->trans, "space", "", 0, err);
+ if (BE (*err != REG_NOERROR && tree == NULL, 0))
+ return NULL;
+ break;
+ case OP_NOTSPACE:
+ tree = build_charclass_op (dfa, regexp->trans, "space", "", 1, err);
if (BE (*err != REG_NOERROR && tree == NULL, 0))
return NULL;
break;
@@ -3284,9 +3304,11 @@ build_charclass (trans, sbcset, class_name, syntax)
}
static bin_tree_t *
-build_word_op (dfa, trans, not, err)
+build_charclass_op (dfa, trans, class_name, extra, not, err)
re_dfa_t *dfa;
RE_TRANSLATE_TYPE trans;
+ const unsigned char *class_name;
+ const unsigned char *extra;
int not;
reg_errcode_t *err;
{
@@ -3340,7 +3362,7 @@ build_word_op (dfa, trans, not, err)
#ifdef RE_ENABLE_I18N
mbcset, &alloc,
#endif /* RE_ENABLE_I18N */
- (const unsigned char *) "alnum", 0);
+ class_name, 0);
if (BE (ret != REG_NOERROR, 0))
{
@@ -3352,7 +3374,8 @@ build_word_op (dfa, trans, not, err)
return NULL;
}
/* \w match '_' also. */
- bitset_set (sbcset, '_');
+ for (; *extra; extra++)
+ bitset_set (sbcset, *extra);
/* If it is non-matching list. */
#ifdef RE_ENABLE_I18N
diff --git a/posix/regex_internal.h b/posix/regex_internal.h
index 9dd3fe5..b2fe766 100644
--- a/posix/regex_internal.h
+++ b/posix/regex_internal.h
@@ -182,6 +182,8 @@ typedef enum
OP_CLOSE_CHAR_CLASS,
OP_WORD,
OP_NOTWORD,
+ OP_SPACE,
+ OP_NOTSPACE,
BACK_SLASH,
/* Tree type, these are used only by tree. */