From 1cef7b3caef9c520148f15511ec193dcce80d1b2 Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Mon, 22 Dec 2003 06:52:35 +0000 Subject: Update. 2003-12-21 Jakub Jelinek * posix/regcomp.c (duplicate_node, duplicate_node_closure): Revert 2003-11-24 changes. * posix/regexec.c (group_nodes_into_DFAstates): For CHARACTER with NEXT_{,NOT}WORD_CONSTRAINT check word_char bit. * posix/bug-regex19.c (tests): Add new tests. * posix/regexec.c (check_dst_limits_calc_pos): Fix formatting. * posix/regcomp.c (parse_dup_op): Return NULL if dup_elem is NULL, after checking syntax. Optimize. (calc_first): Fix comment. * posix/bug-regex13.c (tests): Add new tests. --- posix/bug-regex13.c | 28 +++++++++++++++++++++++++++- posix/bug-regex19.c | 3 +++ posix/regexec.c | 44 +++++++++++++++++++++++++++----------------- 3 files changed, 57 insertions(+), 18 deletions(-) (limited to 'posix') diff --git a/posix/bug-regex13.c b/posix/bug-regex13.c index e6b0ca1..df1c95d 100644 --- a/posix/bug-regex13.c +++ b/posix/bug-regex13.c @@ -34,7 +34,33 @@ static struct } tests[] = { {RE_BACKSLASH_ESCAPE_IN_LISTS, "[0\\-9]", "1", -1}, /* It should not match. */ {RE_BACKSLASH_ESCAPE_IN_LISTS, "[0\\-9]", "-", 0}, /* It should match. */ - {RE_SYNTAX_POSIX_BASIC, "s1\n.*\ns3", "s1\ns2\ns3", 0} + {RE_SYNTAX_POSIX_BASIC, "s1\n.*\ns3", "s1\ns2\ns3", 0}, + {RE_SYNTAX_POSIX_EXTENDED, "ab{0}c", "ac", 0}, + {RE_SYNTAX_POSIX_EXTENDED, "ab{0}c", "abc", -1}, + {RE_SYNTAX_POSIX_EXTENDED, "ab{0}c", "abbc", -1}, + /* Nested duplication. */ + {RE_SYNTAX_POSIX_EXTENDED, "ab{1}{1}c", "ac", -1}, + {RE_SYNTAX_POSIX_EXTENDED, "ab{1}{1}c", "abc", 0}, + {RE_SYNTAX_POSIX_EXTENDED, "ab{1}{1}c", "abbc", -1}, + {RE_SYNTAX_POSIX_EXTENDED, "ab{2}{2}c", "ac", -1}, + {RE_SYNTAX_POSIX_EXTENDED, "ab{2}{2}c", "abbc", -1}, + {RE_SYNTAX_POSIX_EXTENDED, "ab{2}{2}c", "abbbbc", 0}, + {RE_SYNTAX_POSIX_EXTENDED, "ab{2}{2}c", "abbbbbc", -1}, + {RE_SYNTAX_POSIX_EXTENDED, "ab{0}{1}c", "ac", 0}, + {RE_SYNTAX_POSIX_EXTENDED, "ab{0}{1}c", "abc", -1}, + {RE_SYNTAX_POSIX_EXTENDED, "ab{0}{1}c", "abbc", -1}, + {RE_SYNTAX_POSIX_EXTENDED, "ab{1}{0}c", "ac", 0}, + {RE_SYNTAX_POSIX_EXTENDED, "ab{1}{0}c", "abc", -1}, + {RE_SYNTAX_POSIX_EXTENDED, "ab{1}{0}c", "abbc", -1}, + {RE_SYNTAX_POSIX_EXTENDED, "ab{0}*c", "ac", 0}, + {RE_SYNTAX_POSIX_EXTENDED, "ab{0}*c", "abc", -1}, + {RE_SYNTAX_POSIX_EXTENDED, "ab{0}*c", "abbc", -1}, + {RE_SYNTAX_POSIX_EXTENDED, "ab{0}?c", "ac", 0}, + {RE_SYNTAX_POSIX_EXTENDED, "ab{0}?c", "abc", -1}, + {RE_SYNTAX_POSIX_EXTENDED, "ab{0}?c", "abbc", -1}, + {RE_SYNTAX_POSIX_EXTENDED, "ab{0}+c", "ac", 0}, + {RE_SYNTAX_POSIX_EXTENDED, "ab{0}+c", "abc", -1}, + {RE_SYNTAX_POSIX_EXTENDED, "ab{0}+c", "abbc", -1}, }; int diff --git a/posix/bug-regex19.c b/posix/bug-regex19.c index f24e0aa..4000b19 100644 --- a/posix/bug-regex19.c +++ b/posix/bug-regex19.c @@ -246,6 +246,9 @@ static struct test_s {ERE, "(\\<|[A].)[A~C]", "DACC", 0, 1}, {ERE, "(\\<|[A].)[A~C]", "B!A=", 0, 2}, {ERE, "(\\<|[A].)[A~C]", "B~C", 0, 2}, + {ERE, "^[^A]*\\bB", "==B", 0, 0}, + {ERE, "^[^A]*\\bB", "CBD!=B", 0, 0}, + {ERE, "[^A]*\\bB", "==B", 2, 2} }; int diff --git a/posix/regexec.c b/posix/regexec.c index 72b26f1..7f8fac8 100644 --- a/posix/regexec.c +++ b/posix/regexec.c @@ -1746,7 +1746,7 @@ check_dst_limits_calc_pos (dfa, mctx, limit, eclosures, subexp_idx, from_node, int limit, subexp_idx, from_node, str_idx; { struct re_backref_cache_entry *lim = mctx->bkref_ents + limit; - int node_idx; + int node_idx; /* If we are outside the range of the subexpression, return -1 or 1. */ if (str_idx < lim->subexp_from) @@ -1761,23 +1761,23 @@ check_dst_limits_calc_pos (dfa, mctx, limit, eclosures, subexp_idx, from_node, /* Else, we are on the boundary: examine the nodes on the epsilon closure. */ - for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx) - { - int node = eclosures->elems[node_idx]; + for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx) + { + int node = eclosures->elems[node_idx]; switch (dfa->nodes[node].type) { case OP_BACK_REF: - { - int bi = search_cur_bkref_entry (mctx, str_idx); - for (; bi < mctx->nbkref_ents; ++bi) - { - struct re_backref_cache_entry *ent = mctx->bkref_ents + bi; + { + int bi = search_cur_bkref_entry (mctx, str_idx); + for (; bi < mctx->nbkref_ents; ++bi) + { + struct re_backref_cache_entry *ent = mctx->bkref_ents + bi; int dst, cpos; /* If this backreference goes beyond the point we're examining, don't go any further. */ - if (ent->str_idx > str_idx) - break; + if (ent->str_idx > str_idx) + break; if (ent->node != node || ent->subexp_from != ent->subexp_to) continue; @@ -1788,7 +1788,7 @@ check_dst_limits_calc_pos (dfa, mctx, limit, eclosures, subexp_idx, from_node, node, don't recurse because it would cause an infinite loop: a regex that exhibits this behavior is ()\1*\1* */ - dst = dfa->edests[node].elems[0]; + dst = dfa->edests[node].elems[0]; if (dst == from_node) { if (str_idx == lim->subexp_from) @@ -1797,17 +1797,17 @@ check_dst_limits_calc_pos (dfa, mctx, limit, eclosures, subexp_idx, from_node, return 0; } - cpos = check_dst_limits_calc_pos (dfa, mctx, limit, - dfa->eclosures + dst, - subexp_idx, dst, - str_idx); + cpos = check_dst_limits_calc_pos (dfa, mctx, limit, + dfa->eclosures + dst, + subexp_idx, dst, + str_idx); if (cpos == -1 && str_idx == lim->subexp_from) return -1; if (cpos == 0 /* && str_idx == lim->lim->subexp_to */) return 0; - } + } break; } @@ -3416,6 +3416,11 @@ group_nodes_into_DFAstates (preg, state, dests_node, dests_ch) if (constraint & NEXT_WORD_CONSTRAINT) { unsigned int any_set = 0; + if (type == CHARACTER && !node->word_char) + { + bitset_empty (accepts); + continue; + } #ifdef RE_ENABLE_I18N if (dfa->mb_cur_max > 1) for (j = 0; j < BITSET_UINTS; ++j) @@ -3430,6 +3435,11 @@ group_nodes_into_DFAstates (preg, state, dests_node, dests_ch) if (constraint & NEXT_NOTWORD_CONSTRAINT) { unsigned int any_set = 0; + if (type == CHARACTER && node->word_char) + { + bitset_empty (accepts); + continue; + } #ifdef RE_ENABLE_I18N if (dfa->mb_cur_max > 1) for (j = 0; j < BITSET_UINTS; ++j) -- cgit v1.1