diff options
-rw-r--r-- | ChangeLog | 10 | ||||
-rw-r--r-- | posix/regcomp.c | 37 | ||||
-rw-r--r-- | posix/regex_internal.h | 2 |
3 files changed, 42 insertions, 7 deletions
@@ -1,5 +1,15 @@ 2003-09-20 Paolo Bonzini <bonzini@gnu.org> + * posix/regcomp.c (build_word_op): Rename like... + (build_charclass_op): ...this. Accept two extra parameters, + CLASS_NAME and EXTRA. Add EXTRA to the result, not only _. + (peek_token): accept \s and \S as OP_SPACE and OP_NOTSPACE. + (parse_expression): replace build_word_op with + build_charclass_op, add new arguments, accept OP_SPACE + and OP_NOTSPACE. + * posix/regex_internal.h (re_token_type_t): Add OP_SPACE + and OP_NOTSPACE. + * posix/regcomp.c (peek_token): Don't look back for ( or | to check whether to treat a caret as special. It fails for the (extended) regex \(^. diff --git a/posix/regcomp.c b/posix/regcomp.c index 4682ca6..3d54f99 100644 --- a/posix/regcomp.c +++ b/posix/regcomp.c @@ -118,8 +118,10 @@ static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans, const unsigned char *class_name, reg_syntax_t syntax); #endif /* not RE_ENABLE_I18N */ -static bin_tree_t *build_word_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans, - int not, reg_errcode_t *err); +static bin_tree_t *build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans, + const unsigned char *class_name, + const unsigned char *extra, int not, + reg_errcode_t *err); static void free_bin_tree (bin_tree_t *tree); static bin_tree_t *create_tree (bin_tree_t *left, bin_tree_t *right, re_token_type_t type, int index); @@ -1561,6 +1563,14 @@ peek_token (token, input, syntax) if (!(syntax & RE_NO_GNU_OPS)) token->type = OP_NOTWORD; break; + case 's': + if (!(syntax & RE_NO_GNU_OPS)) + token->type = OP_SPACE; + break; + case 'S': + if (!(syntax & RE_NO_GNU_OPS)) + token->type = OP_NOTSPACE; + break; case '`': if (!(syntax & RE_NO_GNU_OPS)) { @@ -2076,12 +2086,22 @@ parse_expression (regexp, preg, token, syntax, nest, err) dfa->has_mb_node = 1; break; case OP_WORD: - tree = build_word_op (dfa, regexp->trans, 0, err); + tree = build_charclass_op (dfa, regexp->trans, "alnum", "_", 0, err); if (BE (*err != REG_NOERROR && tree == NULL, 0)) return NULL; break; case OP_NOTWORD: - tree = build_word_op (dfa, regexp->trans, 1, err); + tree = build_charclass_op (dfa, regexp->trans, "alnum", "_", 1, err); + if (BE (*err != REG_NOERROR && tree == NULL, 0)) + return NULL; + break; + case OP_SPACE: + tree = build_charclass_op (dfa, regexp->trans, "space", "", 0, err); + if (BE (*err != REG_NOERROR && tree == NULL, 0)) + return NULL; + break; + case OP_NOTSPACE: + tree = build_charclass_op (dfa, regexp->trans, "space", "", 1, err); if (BE (*err != REG_NOERROR && tree == NULL, 0)) return NULL; break; @@ -3284,9 +3304,11 @@ build_charclass (trans, sbcset, class_name, syntax) } static bin_tree_t * -build_word_op (dfa, trans, not, err) +build_charclass_op (dfa, trans, class_name, extra, not, err) re_dfa_t *dfa; RE_TRANSLATE_TYPE trans; + const unsigned char *class_name; + const unsigned char *extra; int not; reg_errcode_t *err; { @@ -3340,7 +3362,7 @@ build_word_op (dfa, trans, not, err) #ifdef RE_ENABLE_I18N mbcset, &alloc, #endif /* RE_ENABLE_I18N */ - (const unsigned char *) "alnum", 0); + class_name, 0); if (BE (ret != REG_NOERROR, 0)) { @@ -3352,7 +3374,8 @@ build_word_op (dfa, trans, not, err) return NULL; } /* \w match '_' also. */ - bitset_set (sbcset, '_'); + for (; *extra; extra++) + bitset_set (sbcset, *extra); /* If it is non-matching list. */ #ifdef RE_ENABLE_I18N diff --git a/posix/regex_internal.h b/posix/regex_internal.h index 9dd3fe5..b2fe766 100644 --- a/posix/regex_internal.h +++ b/posix/regex_internal.h @@ -182,6 +182,8 @@ typedef enum OP_CLOSE_CHAR_CLASS, OP_WORD, OP_NOTWORD, + OP_SPACE, + OP_NOTSPACE, BACK_SLASH, /* Tree type, these are used only by tree. */ |