aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog15
-rw-r--r--posix/regex_internal.h12
-rw-r--r--posix/regexec.c99
3 files changed, 65 insertions, 61 deletions
diff --git a/ChangeLog b/ChangeLog
index b2b5a75..9903bc9 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,18 @@
+2004-12-01 Paolo Bonzini <bonzini@gnu.org>
+
+ * posix/regcomp.c (free_dfa_content, init_dfa): Remove
+ references to re_dfa_t's subexps field.
+ (parse_sub_exp, parse_expression): Do not use it. Use
+ completed_bkref_map instead.
+ (create_initial_state, peek_token): Store a backreference \N
+ with opr.idx = N-1.
+ * posix/regexec.c (proceed_next_node, check_dst_limits, get_subexp):
+ Likewise.
+ (check_subexp_limits): Remove useless condition.
+ * posix/regex_internal.h (re_subexp_t): Remove.
+ (re_dfa_t): Remove subexps and subexps_alloc field, add
+ completed_bkref_map.
+
2004-12-05 Roland McGrath <roland@frob.com>
* Makeconfig: Comment typo fix.
diff --git a/posix/regex_internal.h b/posix/regex_internal.h
index 703d409..1345067 100644
--- a/posix/regex_internal.h
+++ b/posix/regex_internal.h
@@ -496,13 +496,6 @@ struct re_dfastate_t
};
typedef struct re_dfastate_t re_dfastate_t;
-typedef struct
-{
- /* start <= node < end */
- int start;
- int end;
-} re_subexp_t;
-
struct re_state_table_entry
{
int num;
@@ -607,7 +600,6 @@ struct re_fail_stack_t
struct re_dfa_t
{
- re_subexp_t *subexps;
re_token_t *nodes;
int nodes_alloc;
int nodes_len;
@@ -627,13 +619,15 @@ struct re_dfa_t
int str_tree_storage_idx;
/* number of subexpressions `re_nsub' is in regex_t. */
- int subexps_alloc;
unsigned int state_hash_mask;
int states_alloc;
int init_node;
int nbackref; /* The number of backreference in this dfa. */
+
/* Bitmap expressing which backreference is used. */
unsigned int used_bkref_map;
+ unsigned int completed_bkref_map;
+
unsigned int has_plural_match : 1;
/* If this dfa has "multibyte node", which is a backreference or
a node which can accept multibyte character or multi character
diff --git a/posix/regexec.c b/posix/regexec.c
index 5877ade..22b8064 100644
--- a/posix/regexec.c
+++ b/posix/regexec.c
@@ -1260,7 +1260,7 @@ proceed_next_node (mctx, nregs, regs, pidx, node, eps_via_nodes, fs)
#endif /* RE_ENABLE_I18N */
if (type == OP_BACK_REF)
{
- int subexp_idx = dfa->nodes[node].opr.idx;
+ int subexp_idx = dfa->nodes[node].opr.idx + 1;
naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;
if (fs != NULL)
{
@@ -1853,7 +1853,7 @@ check_dst_limits (mctx, limits, dst_node, dst_idx, src_node, src_idx)
int subexp_idx;
struct re_backref_cache_entry *ent;
ent = mctx->bkref_ents + limits->elems[lim_idx];
- subexp_idx = dfa->nodes[ent->node].opr.idx - 1;
+ subexp_idx = dfa->nodes[ent->node].opr.idx;
dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
subexp_idx, dst_node, dst_idx,
@@ -1891,49 +1891,48 @@ check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx, from_node, bkref_idx)
switch (dfa->nodes[node].type)
{
case OP_BACK_REF:
- {
- struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx;
- do
- {
- int dst, cpos;
-
- if (ent->node != node)
- continue;
-
- if (subexp_idx <= 8 * sizeof (ent->eps_reachable_subexps_map)
- && (ent->eps_reachable_subexps_map
- & (1 << (subexp_idx - 1))) == 0)
- continue;
+ if (bkref_idx != -1)
+ {
+ struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx;
+ do
+ {
+ int dst, cpos;
- /* Recurse trying to reach the OP_OPEN_SUBEXP and
- OP_CLOSE_SUBEXP cases below. But, if the
- destination node is the same node as the source
- node, don't recurse because it would cause an
- infinite loop: a regex that exhibits this behavior
- is ()\1*\1* */
- dst = dfa->edests[node].elems[0];
- if (dst == from_node)
- {
- if (boundaries & 1)
- return -1;
- else /* if (boundaries & 2) */
- return 0;
- }
+ if (ent->node != node)
+ continue;
- cpos = check_dst_limits_calc_pos_1 (mctx, boundaries,
- subexp_idx, dst, bkref_idx);
+ if (subexp_idx <= 8 * sizeof (ent->eps_reachable_subexps_map)
+ && !(ent->eps_reachable_subexps_map & (1 << subexp_idx)))
+ continue;
- if (cpos == -1 /* && (boundaries & 1) */)
- return -1;
+ /* Recurse trying to reach the OP_OPEN_SUBEXP and
+ OP_CLOSE_SUBEXP cases below. But, if the
+ destination node is the same node as the source
+ node, don't recurse because it would cause an
+ infinite loop: a regex that exhibits this behavior
+ is ()\1*\1* */
+ dst = dfa->edests[node].elems[0];
+ if (dst == from_node)
+ {
+ if (boundaries & 1)
+ return -1;
+ else /* if (boundaries & 2) */
+ return 0;
+ }
- if (cpos == 0 && (boundaries & 2))
- return 0;
+ cpos =
+ check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
+ dst, bkref_idx);
+ if (cpos == -1 /* && (boundaries & 1) */)
+ return -1;
+ if (cpos == 0 && (boundaries & 2))
+ return 0;
- ent->eps_reachable_subexps_map &= ~(1 << (subexp_idx - 1));
- }
- while (ent++->more);
- break;
- }
+ ent->eps_reachable_subexps_map &= ~(1 << subexp_idx);
+ }
+ while (ent++->more);
+ }
+ break;
case OP_OPEN_SUBEXP:
if ((boundaries & 1) && subexp_idx == dfa->nodes[node].opr.idx)
@@ -2003,7 +2002,7 @@ check_subexp_limits (dfa, dest_nodes, candidates, limits, bkref_ents, str_idx)
if (str_idx <= ent->subexp_from || ent->str_idx < str_idx)
continue; /* This is unrelated limitation. */
- subexp_idx = dfa->nodes[ent->node].opr.idx - 1;
+ subexp_idx = dfa->nodes[ent->node].opr.idx;
if (ent->subexp_to == str_idx)
{
int ops_node = -1;
@@ -2060,16 +2059,12 @@ check_subexp_limits (dfa, dest_nodes, candidates, limits, bkref_ents, str_idx)
{
if (subexp_idx != dfa->nodes[node].opr.idx)
continue;
- if ((type == OP_CLOSE_SUBEXP && ent->subexp_to != str_idx)
- || (type == OP_OPEN_SUBEXP))
- {
- /* It is against this limitation.
- Remove it form the current sifted state. */
- err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
- candidates);
- if (BE (err != REG_NOERROR, 0))
- return err;
- }
+ /* It is against this limitation.
+ Remove it form the current sifted state. */
+ err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
+ candidates);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
}
}
}
@@ -2656,7 +2651,7 @@ get_subexp (mctx, bkref_node, bkref_str_idx)
while (entry++->more);
}
- subexp_num = dfa->nodes[bkref_node].opr.idx - 1;
+ subexp_num = dfa->nodes[bkref_node].opr.idx;
/* For each sub expression */
for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx)