diff options
author | Ulrich Drepper <drepper@redhat.com> | 2003-10-02 22:41:11 +0000 |
---|---|---|
committer | Ulrich Drepper <drepper@redhat.com> | 2003-10-02 22:41:11 +0000 |
commit | 134abcb5b9ba854fec25051cd3c9f88e760913c5 (patch) | |
tree | 3988d22ca828c20e2fa396d7b021915a5d6f47e6 /posix | |
parent | b77ca0e82754c1b0615e5ca6acd343018ffcb287 (diff) | |
download | glibc-134abcb5b9ba854fec25051cd3c9f88e760913c5.zip glibc-134abcb5b9ba854fec25051cd3c9f88e760913c5.tar.gz glibc-134abcb5b9ba854fec25051cd3c9f88e760913c5.tar.bz2 |
Update.
2003-09-26 Paolo Bonzini <bonzini@gnu.org>
* posix/regcomp.c (parse_sub_exp): Pass RE_CARET_ANCHORS_HERE
for the first token in a subexpression as well.
2003-10-02 Jakub Jelinek <jakub@redhat.com>
* posix/regcomp.c (peek_token): Add 2003-09-20 changes for anchor
handling again.
(parse_reg_exp): Likewise.
* posix/regex.h (RE_CARET_ANCHORS_HERE): Define.
* posix/bug-regex11.c (tests): Add new tests.
* posix/bug-regex12.c (tests): Add new test.
Diffstat (limited to 'posix')
-rw-r--r-- | posix/bug-regex11.c | 14 | ||||
-rw-r--r-- | posix/bug-regex12.c | 6 | ||||
-rw-r--r-- | posix/regcomp.c | 13 | ||||
-rw-r--r-- | posix/regex.h | 5 |
4 files changed, 27 insertions, 11 deletions
diff --git a/posix/bug-regex11.c b/posix/bug-regex11.c index 681888c..40fd7c2 100644 --- a/posix/bug-regex11.c +++ b/posix/bug-regex11.c @@ -1,5 +1,5 @@ /* Regular expression tests. - Copyright (C) 2002 Free Software Foundation, Inc. + Copyright (C) 2002, 2003 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Jakub Jelinek <jakub@redhat.com>, 2002. @@ -41,7 +41,17 @@ struct { { 0, 21 }, { 8, 9 }, { 9, 10 } } }, { "^\\(a*\\)\\1\\{9\\}\\(a\\{0,9\\}\\)\\([0-9]*;.*[^a]\\2\\([0-9]\\)\\)", "a1;;0a1aa2aaa3aaaa4aaaaa5aaaaaa6aaaaaaa7aaaaaaaa8aaaaaaaaa9aa2aa1a0", 0, - 5, { { 0, 67 }, { 0, 0 }, { 0, 1 }, { 1, 67 }, { 66, 67 } } } + 5, { { 0, 67 }, { 0, 0 }, { 0, 1 }, { 1, 67 }, { 66, 67 } } }, + /* Test for BRE expression anchoring. POSIX says just that this may match; + in glibc regex it always matched, so avoid changing it. */ + { "\\(^\\|foo\\)bar", "bar", 0, 2, { { 0, 3 }, { -1, -1 } } }, + { "\\(foo\\|^\\)bar", "bar", 0, 2, { { 0, 3 }, { -1, -1 } } }, + /* In ERE this must be treated as an anchor. */ + { "(^|foo)bar", "bar", REG_EXTENDED, 2, { { 0, 3 }, { -1, -1 } } }, + { "(foo|^)bar", "bar", REG_EXTENDED, 2, { { 0, 3 }, { -1, -1 } } }, + /* Here ^ cannot be treated as an anchor according to POSIX. */ + { "(^|foo)bar", "(^|foo)bar", 0, 2, { { 0, 10 }, { -1, -1 } } }, + { "(foo|^)bar", "(foo|^)bar", 0, 2, { { 0, 10 }, { -1, -1 } } }, }; int diff --git a/posix/bug-regex12.c b/posix/bug-regex12.c index d5b5ef1..0ad063e 100644 --- a/posix/bug-regex12.c +++ b/posix/bug-regex12.c @@ -1,5 +1,5 @@ /* Regular expression tests. - Copyright (C) 2002 Free Software Foundation, Inc. + Copyright (C) 2002, 2003 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Jakub Jelinek <jakub@redhat.com>, 2002. @@ -32,7 +32,9 @@ struct int flags, nmatch; } tests[] = { { "^<\\([^~]*\\)\\([^~]\\)[^~]*~\\1\\(.\\).*|=.*\\3.*\\2", - "<,.8~2,~so-|=-~.0,123456789<><", REG_NOSUB, 0, } + "<,.8~2,~so-|=-~.0,123456789<><", REG_NOSUB, 0 }, + /* In ERE, all carets must be treated as anchors. */ + { "a^b", "a^b", REG_EXTENDED, 0 } }; int diff --git a/posix/regcomp.c b/posix/regcomp.c index e2f01fc..35a3b83 100644 --- a/posix/regcomp.c +++ b/posix/regcomp.c @@ -119,7 +119,7 @@ static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans, reg_syntax_t syntax); #endif /* not RE_ENABLE_I18N */ static bin_tree_t *build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans, - const unsigned char *class_name, + const unsigned char *class_name, const unsigned char *extra, int not, reg_errcode_t *err); static void free_bin_tree (bin_tree_t *tree); @@ -1660,12 +1660,11 @@ peek_token (token, input, syntax) token->type = OP_PERIOD; break; case '^': - if (!(syntax & RE_CONTEXT_INDEP_ANCHORS) && + if (!(syntax & (RE_CONTEXT_INDEP_ANCHORS | RE_CARET_ANCHORS_HERE)) && re_string_cur_idx (input) != 0) { char prev = re_string_peek_byte (input, -1); - if (prev != '|' && prev != '(' && - (!(syntax & RE_NEWLINE_ALT) || prev != '\n')) + if (!(syntax & RE_NEWLINE_ALT) || prev != '\n') break; } token->type = ANCHOR; @@ -1800,7 +1799,7 @@ parse (regexp, preg, syntax, err) bin_tree_t *tree, *eor, *root; re_token_t current_token; int new_idx; - current_token = fetch_token (regexp, syntax); + current_token = fetch_token (regexp, syntax | RE_CARET_ANCHORS_HERE); tree = parse_reg_exp (regexp, preg, ¤t_token, syntax, 0, err); if (BE (*err != REG_NOERROR && tree == NULL, 0)) return NULL; @@ -1847,7 +1846,7 @@ parse_reg_exp (regexp, preg, token, syntax, nest, err) { re_token_t alt_token = *token; new_idx = re_dfa_add_node (dfa, alt_token, 0); - *token = fetch_token (regexp, syntax); + *token = fetch_token (regexp, syntax | RE_CARET_ANCHORS_HERE); if (token->type != OP_ALT && token->type != END_OF_RE && (nest == 0 || token->type != OP_CLOSE_SUBEXP)) { @@ -2178,7 +2177,7 @@ parse_sub_exp (regexp, preg, token, syntax, nest, err) return NULL; } dfa->nodes[new_idx].opr.idx = cur_nsub; - *token = fetch_token (regexp, syntax); + *token = fetch_token (regexp, syntax | RE_CARET_ANCHORS_HERE); /* The subexpression may be a null string. */ if (token->type == OP_CLOSE_SUBEXP) diff --git a/posix/regex.h b/posix/regex.h index e251962..1ab3e24 100644 --- a/posix/regex.h +++ b/posix/regex.h @@ -170,6 +170,11 @@ typedef unsigned long int reg_syntax_t; If not set, then case is significant. */ #define RE_ICASE (RE_INVALID_INTERVAL_ORD << 1) +/* This bit is used internally like RE_CONTEXT_INDEP_ANCHORS but only + for ^, because it is difficult to scan the regex backwards to find + whether ^ should be special. */ +#define RE_CARET_ANCHORS_HERE (RE_ICASE << 1) + /* This global variable defines the particular regexp syntax to use (for some interfaces). When a regexp is compiled, the syntax used is stored in the pattern buffer, so changing this does not affect |