aboutsummaryrefslogtreecommitdiff
path: root/posix
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>2002-10-12 08:34:26 +0000
committerUlrich Drepper <drepper@redhat.com>2002-10-12 08:34:26 +0000
commit485d775dd578f2c2528d10d7618f09c45ffe6840 (patch)
treedde239879b15fbec3536cede5e8019211495f84b /posix
parentcc12f2a442d1d971d6ec0c21bdfa578299601fc7 (diff)
downloadglibc-485d775dd578f2c2528d10d7618f09c45ffe6840.zip
glibc-485d775dd578f2c2528d10d7618f09c45ffe6840.tar.gz
glibc-485d775dd578f2c2528d10d7618f09c45ffe6840.tar.bz2
Update.
2002-10-11 Isamu Hasegawa <isamu@yamato.ibm.com> * posix/regcomp.c (re_compile_fastmap_iter): Remove the handling OP_CONTEXT_NODE. (regfree): Likewise. (create_initial_state): Likewise. (analyze): Remove the substitutions which became useless. (calc_first): Likewise. (calc_epsdest): Use edests of OP_BACK_REF in case that it has epsilon destination. (duplicate_node_closure): New function. (duplicate_node): Remove the handling OP_CONTEXT_NODE. (calc_inveclosure): Likewise. (calc_eclosure): Likewise. (calc_eclosure_iter): Invoke duplicate_node_closure instead of direct invocation of duplicate_node. (parse): Don't use comma operator in the return to avoid compiler warning. (parse_reg_exp): Likewise. (parse_branch): Likewise. (parse_expression): Likewise. (parse_sub_exp): Likewise. (parse_dup_op): Likewise. * posix/regex_internal.c (re_dfa_add_node): Remove the substitutions which became useless. (create_ci_newstate): Remove the handling OP_CONTEXT_NODE. (create_cd_newstate): Likewise. * posix/regex_internal.h (re_token_type_t): Remove the obsolete type. (re_token_t): Likewise. (re_dfa_t): Likewise. (re_node_set_remove): New macro. * posix/regexec.c (check_matching): Remove the handling OP_CONTEXT_NODE. (check_halt_node_context): Likewise. (proceed_next_node): Likewise. (pop_fail_stack): Fix the memory leak. (set_regs): Likewise. (free_fail_stack_return): New function. (sift_states_backward): Fix the memory leak. Remove the handling OP_CONTEXT_NODE. (update_cur_sifted_state): Append some if clause to avoid redundant call. (sub_epsilon_src_nodes): Use IS_EPSILON_NODE since it might be a back reference. (check_dst_limits): Remove the handling OP_CONTEXT_NODE. (check_subexp_limits): Likewise. (search_subexp): Likewise. (sift_states_bkref): Likewise. (transit_state_mb): Likewise. (transit_state_bkref_loop): Likewise. (transit_state_bkref_loop): Likewise. (group_nodes_into_DFAstates): Likewise. (check_node_accept): Likewise. (sift_ctx_init): Add initializing. 2002-10-12 Ulrich Drepper <drepper@redhat.com> * sysdeps/unix/sysv/linux/i386/sysdep.h (INLINE_SYSCALL): Use __builtin_expect.
Diffstat (limited to 'posix')
-rw-r--r--posix/regcomp.c365
-rw-r--r--posix/regex_internal.c38
-rw-r--r--posix/regex_internal.h9
-rw-r--r--posix/regexec.c236
4 files changed, 303 insertions, 345 deletions
diff --git a/posix/regcomp.c b/posix/regcomp.c
index 7917c64..715dd0f 100644
--- a/posix/regcomp.c
+++ b/posix/regcomp.c
@@ -85,6 +85,9 @@ static reg_errcode_t analyze_tree (re_dfa_t *dfa, bin_tree_t *node);
static void calc_first (re_dfa_t *dfa, bin_tree_t *node);
static void calc_next (re_dfa_t *dfa, bin_tree_t *node);
static void calc_epsdest (re_dfa_t *dfa, bin_tree_t *node);
+static reg_errcode_t duplicate_node_closure (re_dfa_t *dfa, int top_org_node,
+ int top_clone_node, int root_node,
+ unsigned int constraint);
static reg_errcode_t duplicate_node (int *new_idx, re_dfa_t *dfa, int org_idx,
unsigned int constraint);
static reg_errcode_t calc_eclosure (re_dfa_t *dfa);
@@ -351,11 +354,6 @@ re_compile_fastmap_iter (bufp, init_state, fastmap)
{
int node = init_state->nodes.elems[node_cnt];
re_token_type_t type = dfa->nodes[node].type;
- if (type == OP_CONTEXT_NODE)
- {
- node = dfa->nodes[node].opr.ctx_info->entity;
- type = dfa->nodes[node].type;
- }
if (type == CHARACTER)
fastmap[dfa->nodes[node].opr.c] = 1;
@@ -587,18 +585,7 @@ regfree (preg)
#endif /* RE_ENABLE_I18N */
if (node->type == SIMPLE_BRACKET && node->duplicated == 0)
re_free (node->opr.sbcset);
- else if (node->type == OP_CONTEXT_NODE)
- {
- if (dfa->nodes[node->opr.ctx_info->entity].type == OP_BACK_REF)
- {
- if (node->opr.ctx_info->bkref_eclosure != NULL)
- re_node_set_free (node->opr.ctx_info->bkref_eclosure);
- re_free (node->opr.ctx_info->bkref_eclosure);
- }
- re_free (node->opr.ctx_info);
- }
}
- re_free (dfa->firsts);
re_free (dfa->nexts);
for (i = 0; i < dfa->nodes_len; ++i)
{
@@ -883,39 +870,25 @@ create_initial_state (dfa)
re_token_type_t type = dfa->nodes[node_idx].type;
int clexp_idx;
- int entity = (type != OP_CONTEXT_NODE ? node_idx
- : dfa->nodes[node_idx].opr.ctx_info->entity);
- if ((type != OP_CONTEXT_NODE
- || (dfa->nodes[entity].type != OP_BACK_REF))
- && (type != OP_BACK_REF))
+ if (type != OP_BACK_REF)
continue;
for (clexp_idx = 0; clexp_idx < init_nodes.nelem; ++clexp_idx)
{
re_token_t *clexp_node;
clexp_node = dfa->nodes + init_nodes.elems[clexp_idx];
if (clexp_node->type == OP_CLOSE_SUBEXP
- && clexp_node->opr.idx + 1 == dfa->nodes[entity].opr.idx)
+ && clexp_node->opr.idx + 1 == dfa->nodes[node_idx].opr.idx)
break;
}
if (clexp_idx == init_nodes.nelem)
continue;
- if (type == OP_CONTEXT_NODE
- && (dfa->nodes[dfa->nodes[node_idx].opr.ctx_info->entity].type
- == OP_BACK_REF))
- {
- int prev_nelem = init_nodes.nelem;
- re_node_set_merge (&init_nodes,
- dfa->nodes[node_idx].opr.ctx_info->bkref_eclosure);
- if (prev_nelem < init_nodes.nelem)
- i = 0;
- }
- else if (type == OP_BACK_REF)
+ if (type == OP_BACK_REF)
{
- int next_idx = dfa->nexts[node_idx];
- if (!re_node_set_contains (&init_nodes, next_idx))
+ int dest_idx = dfa->edests[node_idx].elems[0];
+ if (!re_node_set_contains (&init_nodes, dest_idx))
{
- re_node_set_merge (&init_nodes, dfa->eclosures + next_idx);
+ re_node_set_merge (&init_nodes, dfa->eclosures + dest_idx);
i = 0;
}
}
@@ -959,18 +932,16 @@ analyze (dfa)
reg_errcode_t ret;
/* Allocate arrays. */
- dfa->firsts = re_malloc (int, dfa->nodes_alloc);
dfa->nexts = re_malloc (int, dfa->nodes_alloc);
dfa->edests = re_malloc (re_node_set, dfa->nodes_alloc);
dfa->eclosures = re_malloc (re_node_set, dfa->nodes_alloc);
dfa->inveclosures = re_malloc (re_node_set, dfa->nodes_alloc);
- if (BE (dfa->firsts == NULL || dfa->nexts == NULL || dfa->edests == NULL
+ if (BE (dfa->nexts == NULL || dfa->edests == NULL
|| dfa->eclosures == NULL || dfa->inveclosures == NULL, 0))
return REG_ESPACE;
/* Initialize them. */
for (i = 0; i < dfa->nodes_len; ++i)
{
- dfa->firsts[i] = -1;
dfa->nexts[i] = -1;
re_node_set_init_empty (dfa->edests + i);
re_node_set_init_empty (dfa->eclosures + i);
@@ -1083,8 +1054,6 @@ calc_first (dfa, node)
node->first = node->left->first;
break;
}
- if (node->type == 0)
- dfa->firsts[idx] = node->first;
}
/* Calculate "next" for the node NODE. */
@@ -1187,11 +1156,114 @@ calc_epsdest (dfa, node)
}
else if (dfa->nodes[idx].type == ANCHOR
|| dfa->nodes[idx].type == OP_OPEN_SUBEXP
- || dfa->nodes[idx].type == OP_CLOSE_SUBEXP)
+ || dfa->nodes[idx].type == OP_CLOSE_SUBEXP
+ || dfa->nodes[idx].type == OP_BACK_REF)
re_node_set_init_1 (dfa->edests + idx, node->next);
}
}
+/* Duplicate the epsilon closure of the node ROOT_NODE.
+ Note that duplicated nodes have constraint INIT_CONSTRAINT in addition
+ to their own constraint. */
+
+static reg_errcode_t
+duplicate_node_closure (dfa, top_org_node, top_clone_node, root_node,
+ init_constraint)
+ re_dfa_t *dfa;
+ int top_org_node, top_clone_node, root_node;
+ unsigned int init_constraint;
+{
+ reg_errcode_t err;
+ int org_node, clone_node, ret;
+ unsigned int constraint = init_constraint;
+ for (org_node = top_org_node, clone_node = top_clone_node;;)
+ {
+ int org_dest, clone_dest;
+ if (dfa->nodes[org_node].type == OP_BACK_REF)
+ {
+ /* If the back reference epsilon-transit, its destination must
+ also have the constraint. Then duplicate the epsilon closure
+ of the destination of the back reference, and store it in
+ edests of the back reference. */
+ org_dest = dfa->nexts[org_node];
+ re_node_set_empty (dfa->edests + clone_node);
+ err = duplicate_node (&clone_dest, dfa, org_dest, constraint);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ dfa->nexts[clone_node] = dfa->nexts[org_node];
+ ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
+ if (BE (ret < 0, 0))
+ return REG_ESPACE;
+ }
+ else if (dfa->edests[org_node].nelem == 0)
+ {
+ /* In case of the node can't epsilon-transit, don't duplicate the
+ destination and store the original destination as the
+ destination of the node. */
+ dfa->nexts[clone_node] = dfa->nexts[org_node];
+ break;
+ }
+ else if (dfa->edests[org_node].nelem == 1)
+ {
+ /* In case of the node can epsilon-transit, and it has only one
+ destination. */
+ org_dest = dfa->edests[org_node].elems[0];
+ re_node_set_empty (dfa->edests + clone_node);
+ if (dfa->nodes[org_node].type == ANCHOR)
+ {
+ /* In case of the node has another constraint, append it. */
+ if (org_node == root_node && clone_node != org_node)
+ {
+ /* ...but if the node is root_node itself, it means the
+ epsilon closure have a loop, then tie it to the
+ destination of the root_node. */
+ ret = re_node_set_insert (dfa->edests + clone_node,
+ org_dest);
+ if (BE (ret < 0, 0))
+ return REG_ESPACE;
+ break;
+ }
+ constraint |= dfa->nodes[org_node].opr.ctx_type;
+ }
+ err = duplicate_node (&clone_dest, dfa, org_dest, constraint);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
+ if (BE (ret < 0, 0))
+ return REG_ESPACE;
+ }
+ else /* dfa->edests[org_node].nelem == 2 */
+ {
+ /* In case of the node can epsilon-transit, and it has two
+ destinations. */
+ org_dest = dfa->edests[org_node].elems[0];
+ re_node_set_empty (dfa->edests + clone_node);
+ err = duplicate_node (&clone_dest, dfa, org_dest, constraint);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
+ if (BE (ret < 0, 0))
+ return REG_ESPACE;
+
+ err = duplicate_node_closure (dfa, org_dest, clone_dest, root_node,
+ constraint);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+
+ org_dest = dfa->edests[org_node].elems[1];
+ err = duplicate_node (&clone_dest, dfa, org_dest, constraint);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
+ if (BE (ret < 0, 0))
+ return REG_ESPACE;
+ }
+ org_node = org_dest;
+ clone_node = clone_dest;
+ }
+ return REG_NOERROR;
+}
+
/* Duplicate the node whose index is ORG_IDX and set the constraint CONSTRAINT.
The new index will be stored in NEW_IDX and return REG_NOERROR if succeeded,
otherwise return the error code. */
@@ -1204,50 +1276,18 @@ duplicate_node (new_idx, dfa, org_idx, constraint)
{
re_token_t dup;
int dup_idx;
- reg_errcode_t err;
-
- dup.type = OP_CONTEXT_NODE;
- if (dfa->nodes[org_idx].type == OP_CONTEXT_NODE)
- {
- /* If the node whose index is ORG_IDX is the same as the intended
- node, use it. */
- if (dfa->nodes[org_idx].constraint == constraint)
- {
- *new_idx = org_idx;
- return REG_NOERROR;
- }
- dup.constraint = constraint |
- dfa->nodes[org_idx].constraint;
- }
- else
- dup.constraint = constraint;
- /* In case that `entity' points OP_CONTEXT_NODE,
- we correct `entity' to real entity in calc_inveclosures(). */
- dup.opr.ctx_info = malloc (sizeof (*dup.opr.ctx_info));
+ dup = dfa->nodes[org_idx];
dup_idx = re_dfa_add_node (dfa, dup, 1);
- if (BE (dup.opr.ctx_info == NULL || dup_idx == -1, 0))
+ if (BE (dup_idx == -1, 0))
return REG_ESPACE;
- dup.opr.ctx_info->entity = org_idx;
- dup.opr.ctx_info->bkref_eclosure = NULL;
-
+ dfa->nodes[dup_idx].constraint = constraint;
+ if (dfa->nodes[org_idx].type == ANCHOR)
+ dfa->nodes[dup_idx].constraint |= dfa->nodes[org_idx].opr.ctx_type;
dfa->nodes[dup_idx].duplicated = 1;
- dfa->firsts[dup_idx] = dfa->firsts[org_idx];
- dfa->nexts[dup_idx] = dfa->nexts[org_idx];
- err = re_node_set_init_copy (dfa->edests + dup_idx, dfa->edests + org_idx);
- if (BE (err != REG_NOERROR, 0))
- return err;
- /* Since we don't duplicate epsilon nodes, epsilon closure have
- only itself. */
- err = re_node_set_init_1 (dfa->eclosures + dup_idx, dup_idx);
- if (BE (err != REG_NOERROR, 0))
- return err;
- err = re_node_set_init_1 (dfa->inveclosures + dup_idx, dup_idx);
- if (BE (err != REG_NOERROR, 0))
- return err;
- /* Then we must update inveclosure for this node.
- We process them at last part of calc_eclosure(),
- since we don't complete to calculate them here. */
+ re_node_set_init_empty (dfa->edests + dup_idx);
+ re_node_set_init_empty (dfa->eclosures + dup_idx);
+ re_node_set_init_empty (dfa->inveclosures + dup_idx);
*new_idx = dup_idx;
return REG_NOERROR;
@@ -1257,7 +1297,7 @@ static void
calc_inveclosure (dfa)
re_dfa_t *dfa;
{
- int src, idx, dest, entity;
+ int src, idx, dest;
for (src = 0; src < dfa->nodes_len; ++src)
{
for (idx = 0; idx < dfa->eclosures[src].nelem; ++idx)
@@ -1265,15 +1305,6 @@ calc_inveclosure (dfa)
dest = dfa->eclosures[src].elems[idx];
re_node_set_insert (dfa->inveclosures + dest, src);
}
-
- entity = src;
- while (dfa->nodes[entity].type == OP_CONTEXT_NODE)
- {
- entity = dfa->nodes[entity].opr.ctx_info->entity;
- re_node_set_merge (dfa->inveclosures + src,
- dfa->inveclosures + entity);
- dfa->nodes[src].opr.ctx_info->entity = entity;
- }
}
}
@@ -1283,16 +1314,17 @@ static reg_errcode_t
calc_eclosure (dfa)
re_dfa_t *dfa;
{
- int idx, node_idx, max, incomplete = 0;
+ int node_idx, incomplete;
#ifdef DEBUG
assert (dfa->nodes_len > 0);
#endif
+ incomplete = 0;
/* For each nodes, calculate epsilon closure. */
- for (node_idx = 0, max = dfa->nodes_len; ; ++node_idx)
+ for (node_idx = 0; ; ++node_idx)
{
reg_errcode_t err;
re_node_set eclosure_elem;
- if (node_idx == max)
+ if (node_idx == dfa->nodes_len)
{
if (!incomplete)
break;
@@ -1301,7 +1333,6 @@ calc_eclosure (dfa)
}
#ifdef DEBUG
- assert (dfa->nodes[node_idx].type != OP_CONTEXT_NODE);
assert (dfa->eclosures[node_idx].nelem != -1);
#endif
/* If we have already calculated, skip it. */
@@ -1318,41 +1349,6 @@ calc_eclosure (dfa)
re_node_set_free (&eclosure_elem);
}
}
-
- /* for duplicated nodes. */
- for (idx = max; idx < dfa->nodes_len; ++idx)
- {
- int entity, i, constraint;
- re_node_set *bkref_eclosure;
- entity = dfa->nodes[idx].opr.ctx_info->entity;
- re_node_set_merge (dfa->inveclosures + idx, dfa->inveclosures + entity);
- if (dfa->nodes[entity].type != OP_BACK_REF)
- continue;
-
- /* If the node is backreference, duplicate the epsilon closure of
- the next node. Since it may epsilon transit. */
- /* Note: duplicate_node() may realloc dfa->eclosures, etc. */
- bkref_eclosure = re_malloc (re_node_set, 1);
- if (BE (bkref_eclosure == NULL, 0))
- return REG_ESPACE;
- re_node_set_init_empty (bkref_eclosure);
- constraint = dfa->nodes[idx].constraint;
- for (i = 0; i < dfa->eclosures[dfa->nexts[idx]].nelem; ++i)
- {
- int dest_node_idx = dfa->eclosures[dfa->nexts[idx]].elems[i];
- if (!IS_EPSILON_NODE (dfa->nodes[dest_node_idx].type))
- {
- reg_errcode_t err;
- err = duplicate_node (&dest_node_idx, dfa, dest_node_idx,
- constraint);
- if (BE (err != REG_NOERROR, 0))
- return err;
- }
- re_node_set_insert (bkref_eclosure, dest_node_idx);
- }
- dfa->nodes[idx].opr.ctx_info->bkref_eclosure = bkref_eclosure;
- }
-
return REG_NOERROR;
}
@@ -1366,8 +1362,9 @@ calc_eclosure_iter (new_set, dfa, node, root)
{
reg_errcode_t err;
unsigned int constraint;
- int i, max, incomplete = 0;
+ int i, incomplete;
re_node_set eclosure;
+ incomplete = 0;
err = re_node_set_alloc (&eclosure, dfa->edests[node].nelem + 1);
if (BE (err != REG_NOERROR, 0))
return err;
@@ -1378,9 +1375,19 @@ calc_eclosure_iter (new_set, dfa, node, root)
constraint = ((dfa->nodes[node].type == ANCHOR)
? dfa->nodes[node].opr.ctx_type : 0);
+ /* If the current node has constraints, duplicate all nodes.
+ Since they must inherit the constraints. */
+ if (constraint && !dfa->nodes[dfa->edests[node].elems[0]].duplicated)
+ {
+ int org_node, cur_node;
+ org_node = cur_node = node;
+ err = duplicate_node_closure (dfa, node, node, node, constraint);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
/* Expand each epsilon destination nodes. */
- if (dfa->edests[node].nelem != 0)
+ if (IS_EPSILON_NODE(dfa->nodes[node].type))
for (i = 0; i < dfa->edests[node].nelem; ++i)
{
re_node_set eclosure_elem;
@@ -1413,28 +1420,6 @@ calc_eclosure_iter (new_set, dfa, node, root)
}
}
- /* If the current node has constraints, duplicate all non-epsilon nodes.
- Since they must inherit the constraints. */
- if (constraint)
- for (i = 0, max = eclosure.nelem; i < max; ++i)
- {
- int dest = eclosure.elems[i];
- if (!IS_EPSILON_NODE (dfa->nodes[dest].type))
- {
- int dup_dest;
- reg_errcode_t err;
- err = duplicate_node (&dup_dest, dfa, dest, constraint);
- if (BE (err != REG_NOERROR, 0))
- return err;
- if (dest != dup_dest)
- {
- re_node_set_remove_at (&eclosure, i--);
- re_node_set_insert (&eclosure, dup_dest);
- --max;
- }
- }
- }
-
/* Epsilon closures include itself. */
re_node_set_insert (&eclosure, node);
if (incomplete && !root)
@@ -1793,7 +1778,10 @@ parse (regexp, preg, syntax, err)
else
root = eor;
if (BE (new_idx == -1 || eor == NULL || root == NULL, 0))
- return *err = REG_ESPACE, NULL;
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
return root;
}
@@ -1841,7 +1829,10 @@ parse_reg_exp (regexp, preg, token, syntax, nest, err)
branch = NULL;
tree = create_tree (tree, branch, 0, new_idx);
if (BE (new_idx == -1 || tree == NULL, 0))
- return *err = REG_ESPACE, NULL;
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
dfa->has_plural_match = 1;
}
return tree;
@@ -1883,7 +1874,10 @@ parse_branch (regexp, preg, token, syntax, nest, err)
{
tree = create_tree (tree, exp, CONCAT, 0);
if (tree == NULL)
- return *err = REG_ESPACE, NULL;
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
}
else if (tree == NULL)
tree = exp;
@@ -1916,7 +1910,10 @@ parse_expression (regexp, preg, token, syntax, nest, err)
new_idx = re_dfa_add_node (dfa, *token, 0);
tree = create_tree (NULL, NULL, 0, new_idx);
if (BE (new_idx == -1 || tree == NULL, 0))
- return *err = REG_ESPACE, NULL;
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
#ifdef RE_ENABLE_I18N
if (MB_CUR_MAX > 1)
{
@@ -1954,7 +1951,10 @@ parse_expression (regexp, preg, token, syntax, nest, err)
new_idx = re_dfa_add_node (dfa, *token, 0);
tree = create_tree (NULL, NULL, 0, new_idx);
if (BE (new_idx == -1 || tree == NULL, 0))
- return *err = REG_ESPACE, NULL;
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
++dfa->nbackref;
dfa->has_mb_node = 1;
break;
@@ -1963,7 +1963,10 @@ parse_expression (regexp, preg, token, syntax, nest, err)
case OP_DUP_QUESTION:
case OP_OPEN_DUP_NUM:
if (syntax & RE_CONTEXT_INVALID_OPS)
- return *err = REG_BADRPT, NULL;
+ {
+ *err = REG_BADRPT;
+ return NULL;
+ }
else if (syntax & RE_CONTEXT_INDEP_OPS)
{
*token = fetch_token (regexp, syntax);
@@ -1973,7 +1976,10 @@ parse_expression (regexp, preg, token, syntax, nest, err)
case OP_CLOSE_SUBEXP:
if ((token->type == OP_CLOSE_SUBEXP) &&
!(syntax & RE_UNMATCHED_RIGHT_PAREN_ORD))
- return *err = REG_ERPAREN, NULL;
+ {
+ *err = REG_ERPAREN;
+ return NULL;
+ }
/* else fall through */
case OP_CLOSE_DUP_NUM:
/* We treat it as a normal character. */
@@ -1983,7 +1989,10 @@ parse_expression (regexp, preg, token, syntax, nest, err)
new_idx = re_dfa_add_node (dfa, *token, 0);
tree = create_tree (NULL, NULL, 0, new_idx);
if (BE (new_idx == -1 || tree == NULL, 0))
- return *err = REG_ESPACE, NULL;
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
break;
case ANCHOR:
if (dfa->word_char == NULL)
@@ -2008,7 +2017,10 @@ parse_expression (regexp, preg, token, syntax, nest, err)
if (BE (idx_first == -1 || idx_last == -1 || new_idx == -1
|| tree_first == NULL || tree_last == NULL
|| tree == NULL, 0))
- return *err = REG_ESPACE, NULL;
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
}
else
{
@@ -2027,7 +2039,10 @@ parse_expression (regexp, preg, token, syntax, nest, err)
new_idx = re_dfa_add_node (dfa, *token, 0);
tree = create_tree (NULL, NULL, 0, new_idx);
if (BE (new_idx == -1 || tree == NULL, 0))
- return *err = REG_ESPACE, NULL;
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
if (MB_CUR_MAX > 1)
dfa->has_mb_node = 1;
break;
@@ -2108,7 +2123,10 @@ parse_sub_exp (regexp, preg, token, syntax, nest, err)
new_idx = re_dfa_add_node (dfa, *token, 0);
left_par = create_tree (NULL, NULL, 0, new_idx);
if (BE (new_idx == -1 || left_par == NULL, 0))
- return *err = REG_ESPACE, NULL;
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
dfa->nodes[new_idx].opr.idx = cur_nsub;
*token = fetch_token (regexp, syntax);
@@ -2134,7 +2152,10 @@ parse_sub_exp (regexp, preg, token, syntax, nest, err)
: create_tree (tree, right_par, CONCAT, 0));
tree = create_tree (left_par, tree, CONCAT, 0);
if (BE (new_idx == -1 || right_par == NULL || tree == NULL, 0))
- return *err = REG_ESPACE, NULL;
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
dfa->nodes[new_idx].opr.idx = cur_nsub;
return tree;
@@ -2252,7 +2273,10 @@ parse_dup_op (dup_elem, regexp, dfa, token, syntax, err)
work_tree = duplicate_tree (elem, dfa);
tree = create_tree (tree, work_tree, CONCAT, 0);
if (BE (work_tree == NULL || tree == NULL, 0))
- return *err = REG_ESPACE, NULL;
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
}
}
}
@@ -2261,7 +2285,10 @@ parse_dup_op (dup_elem, regexp, dfa, token, syntax, err)
new_idx = re_dfa_add_node (dfa, *token, 0);
tree = create_tree (tree, NULL, 0, new_idx);
if (BE (new_idx == -1 || tree == NULL, 0))
- return *err = REG_ESPACE, NULL;
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
}
*token = fetch_token (regexp, syntax);
return tree;
diff --git a/posix/regex_internal.c b/posix/regex_internal.c
index 92f59b4..69fb9a6 100644
--- a/posix/regex_internal.c
+++ b/posix/regex_internal.c
@@ -929,20 +929,18 @@ re_dfa_add_node (dfa, token, mode)
dfa->nodes = new_array;
if (mode)
{
- int *new_firsts, *new_nexts;
+ int *new_nexts;
re_node_set *new_edests, *new_eclosures, *new_inveclosures;
- new_firsts = re_realloc (dfa->firsts, int, dfa->nodes_alloc);
new_nexts = re_realloc (dfa->nexts, int, dfa->nodes_alloc);
new_edests = re_realloc (dfa->edests, re_node_set, dfa->nodes_alloc);
new_eclosures = re_realloc (dfa->eclosures, re_node_set,
dfa->nodes_alloc);
new_inveclosures = re_realloc (dfa->inveclosures, re_node_set,
dfa->nodes_alloc);
- if (BE (new_firsts == NULL || new_nexts == NULL || new_edests == NULL
+ if (BE (new_nexts == NULL || new_edests == NULL
|| new_eclosures == NULL || new_inveclosures == NULL, 0))
return -1;
- dfa->firsts = new_firsts;
dfa->nexts = new_nexts;
dfa->edests = new_edests;
dfa->eclosures = new_eclosures;
@@ -951,6 +949,7 @@ re_dfa_add_node (dfa, token, mode)
}
dfa->nodes[dfa->nodes_len] = token;
dfa->nodes[dfa->nodes_len].duplicated = 0;
+ dfa->nodes[dfa->nodes_len].constraint = 0;
return dfa->nodes_len++;
}
@@ -1126,7 +1125,7 @@ create_ci_newstate (dfa, nodes, hash)
{
re_token_t *node = dfa->nodes + nodes->elems[i];
re_token_type_t type = node->type;
- if (type == CHARACTER)
+ if (type == CHARACTER && !node->constraint)
continue;
/* If the state has the halt node, the state is a halt state. */
@@ -1139,13 +1138,8 @@ create_ci_newstate (dfa, nodes, hash)
#endif /* RE_ENABLE_I18N */
else if (type == OP_BACK_REF)
newstate->has_backref = 1;
- else if (type == ANCHOR || OP_CONTEXT_NODE)
- {
- newstate->has_constraint = 1;
- if (type == OP_CONTEXT_NODE
- && dfa->nodes[node->opr.ctx_info->entity].type == END_OF_RE)
- newstate->halt = 1;
- }
+ else if (type == ANCHOR || node->constraint)
+ newstate->has_constraint = 1;
}
err = register_state (dfa, newstate, hash);
return (err != REG_NOERROR) ? NULL : newstate;
@@ -1175,9 +1169,11 @@ create_cd_newstate (dfa, nodes, context, hash)
unsigned int constraint = 0;
re_token_t *node = dfa->nodes + nodes->elems[i];
re_token_type_t type = node->type;
- if (type == CHARACTER)
- continue;
+ if (node->constraint)
+ constraint = node->constraint;
+ if (type == CHARACTER && !constraint)
+ continue;
/* If the state has the halt node, the state is a halt state. */
else if (type == END_OF_RE)
newstate->halt = 1;
@@ -1190,20 +1186,6 @@ create_cd_newstate (dfa, nodes, context, hash)
newstate->has_backref = 1;
else if (type == ANCHOR)
constraint = node->opr.ctx_type;
- else if (type == OP_CONTEXT_NODE)
- {
- re_token_type_t ctype = dfa->nodes[node->opr.ctx_info->entity].type;
- constraint = node->constraint;
- if (ctype == END_OF_RE)
- newstate->halt = 1;
- else if (ctype == OP_BACK_REF)
- newstate->has_backref = 1;
-#ifdef RE_ENABLE_I18N
- else if (ctype == COMPLEX_BRACKET
- || (type == OP_PERIOD && MB_CUR_MAX > 1))
- newstate->accept_mb = 1;
-#endif /* RE_ENABLE_I18N */
- }
if (constraint)
{
diff --git a/posix/regex_internal.h b/posix/regex_internal.h
index 5aef684..7193ea0 100644
--- a/posix/regex_internal.h
+++ b/posix/regex_internal.h
@@ -133,7 +133,6 @@ typedef enum
OP_DUP_QUESTION,
OP_BACK_REF,
ANCHOR,
- OP_CONTEXT_NODE,
/* Dummy marker. */
END_OF_RE_TOKEN_T
@@ -198,11 +197,6 @@ typedef struct
#endif /* RE_ENABLE_I18N */
int idx; /* for BACK_REF */
re_context_type ctx_type; /* for ANCHOR */
- struct
- {
- int entity; /* for OP_CONTEXT_NODE, index of the entity */
- re_node_set *bkref_eclosure;
- } *ctx_info;
} opr;
#if __GNUC__ >= 2
re_token_type_t type : 8;
@@ -474,7 +468,6 @@ struct re_dfa_t
int nodes_alloc;
int nodes_len;
bin_tree_t *str_tree;
- int *firsts;
int *nexts;
re_node_set *edests;
re_node_set *eclosures;
@@ -519,6 +512,8 @@ static int re_node_set_compare (const re_node_set *set1,
const re_node_set *set2);
static int re_node_set_contains (const re_node_set *set, int elem);
static void re_node_set_remove_at (re_node_set *set, int idx);
+#define re_node_set_remove(set,id) \
+ (re_node_set_remove_at (set, re_node_set_contains (set, id) - 1))
#define re_node_set_empty(p) ((p)->nelem = 0)
#define re_node_set_free(set) re_free ((set)->elems)
static int re_dfa_add_node (re_dfa_t *dfa, re_token_t token, int mode);
diff --git a/posix/regexec.c b/posix/regexec.c
index 7e3a832..5c80f19 100644
--- a/posix/regexec.c
+++ b/posix/regexec.c
@@ -95,6 +95,8 @@ static reg_errcode_t set_regs (const regex_t *preg,
const re_match_context_t *mctx,
size_t nmatch, regmatch_t *pmatch,
int fl_backtrack);
+static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs);
+
#ifdef RE_ENABLE_I18N
static int sift_states_iter_mb (const regex_t *preg,
const re_match_context_t *mctx,
@@ -845,11 +847,8 @@ check_matching (preg, mctx, fl_search, fl_longest_match)
re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
for (i = 0; i < cur_state->nodes.nelem; ++i)
{
- re_token_type_t type;
int node = cur_state->nodes.elems[i];
- int entity = (dfa->nodes[node].type != OP_CONTEXT_NODE ? node
- : dfa->nodes[node].opr.ctx_info->entity);
- type = dfa->nodes[entity].type;
+ re_token_type_t type = dfa->nodes[node].type;
if (type == OP_BACK_REF)
{
int clexp_idx;
@@ -859,7 +858,7 @@ check_matching (preg, mctx, fl_search, fl_longest_match)
re_token_t *clexp_node;
clexp_node = dfa->nodes + cur_state->nodes.elems[clexp_idx];
if (clexp_node->type == OP_CLOSE_SUBEXP
- && clexp_node->opr.idx + 1== dfa->nodes[entity].opr.idx)
+ && clexp_node->opr.idx + 1== dfa->nodes[node].opr.idx)
{
err = match_ctx_add_entry (mctx, node, 0, 0, 0);
if (BE (err != REG_NOERROR, 0))
@@ -955,15 +954,13 @@ static int check_halt_node_context (dfa, node, context)
int node;
unsigned int context;
{
- int entity;
re_token_type_t type = dfa->nodes[node].type;
- if (type == END_OF_RE)
- return 1;
- if (type != OP_CONTEXT_NODE)
+ unsigned int constraint = dfa->nodes[node].constraint;
+ if (type != END_OF_RE)
return 0;
- entity = dfa->nodes[node].opr.ctx_info->entity;
- if (dfa->nodes[entity].type != END_OF_RE
- || NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[node].constraint, context))
+ if (!constraint)
+ return 1;
+ if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context))
return 0;
return 1;
}
@@ -1008,39 +1005,27 @@ proceed_next_node (preg, nregs, regs, mctx, pidx, node, eps_via_nodes, fs)
struct re_fail_stack_t *fs;
{
re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
- int i, err, dest_node, cur_entity;
+ int i, err, dest_node;
dest_node = -1;
- cur_entity = ((dfa->nodes[node].type == OP_CONTEXT_NODE)
- ? dfa->nodes[node].opr.ctx_info->entity : node);
if (IS_EPSILON_NODE (dfa->nodes[node].type))
{
- int ndest, dest_nodes[2], dest_entities[2];
+ re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes;
+ int ndest, dest_nodes[2];
err = re_node_set_insert (eps_via_nodes, node);
if (BE (err < 0, 0))
return -1;
/* Pick up valid destinations. */
- for (ndest = 0, i = 0; i < mctx->state_log[*pidx]->nodes.nelem; ++i)
+ for (ndest = 0, i = 0; i < dfa->edests[node].nelem; ++i)
{
- int candidate = mctx->state_log[*pidx]->nodes.elems[i];
- int entity;
- entity = ((dfa->nodes[candidate].type == OP_CONTEXT_NODE)
- ? dfa->nodes[candidate].opr.ctx_info->entity : candidate);
- if (!re_node_set_contains (dfa->edests + node, entity))
+ int candidate = dfa->edests[node].elems[i];
+ if (!re_node_set_contains (cur_nodes, candidate))
continue;
dest_nodes[0] = (ndest == 0) ? candidate : dest_nodes[0];
- dest_entities[0] = (ndest == 0) ? entity : dest_entities[0];
dest_nodes[1] = (ndest == 1) ? candidate : dest_nodes[1];
- dest_entities[1] = (ndest == 1) ? entity : dest_entities[1];
++ndest;
}
if (ndest <= 1)
return ndest == 0 ? -1 : (ndest == 1 ? dest_nodes[0] : 0);
- if (dest_entities[0] > dest_entities[1])
- {
- int swap_work = dest_nodes[0];
- dest_nodes[0] = dest_nodes[1];
- dest_nodes[1] = swap_work;
- }
/* In order to avoid infinite loop like "(a*)*". */
if (re_node_set_contains (eps_via_nodes, dest_nodes[0]))
return dest_nodes[1];
@@ -1050,22 +1035,17 @@ proceed_next_node (preg, nregs, regs, mctx, pidx, node, eps_via_nodes, fs)
}
else
{
- int naccepted = 0, entity = node;
+ int naccepted = 0;
re_token_type_t type = dfa->nodes[node].type;
- if (type == OP_CONTEXT_NODE)
- {
- entity = dfa->nodes[node].opr.ctx_info->entity;
- type = dfa->nodes[entity].type;
- }
#ifdef RE_ENABLE_I18N
if (ACCEPT_MB_NODE (type))
- naccepted = check_node_accept_bytes (preg, entity, mctx->input, *pidx);
+ naccepted = check_node_accept_bytes (preg, node, mctx->input, *pidx);
else
#endif /* RE_ENABLE_I18N */
if (type == OP_BACK_REF)
{
- int subexp_idx = dfa->nodes[entity].opr.idx;
+ int subexp_idx = dfa->nodes[node].opr.idx;
naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;
if (fs != NULL)
{
@@ -1085,18 +1065,10 @@ proceed_next_node (preg, nregs, regs, mctx, pidx, node, eps_via_nodes, fs)
err = re_node_set_insert (eps_via_nodes, node);
if (BE (err < 0, 0))
return -2;
- dest_node = dfa->nexts[node];
+ dest_node = dfa->edests[node].elems[0];
if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
dest_node))
return dest_node;
- for (i = 0; i < mctx->state_log[*pidx]->nodes.nelem; ++i)
- {
- dest_node = mctx->state_log[*pidx]->nodes.elems[i];
- if ((dfa->nodes[dest_node].type == OP_CONTEXT_NODE
- && (dfa->nexts[node]
- == dfa->nodes[dest_node].opr.ctx_info->entity)))
- return dest_node;
- }
}
}
@@ -1153,6 +1125,7 @@ pop_fail_stack (fs, pidx, nregs, regs, eps_via_nodes)
*pidx = fs->stack[num].idx;
memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
re_node_set_free (eps_via_nodes);
+ re_free (fs->stack[num].regs);
*eps_via_nodes = fs->stack[num].eps_via_nodes;
return fs->stack[num].node;
}
@@ -1201,12 +1174,18 @@ set_regs (preg, mctx, nmatch, pmatch, fl_backtrack)
if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1)
break;
if (reg_idx == nmatch)
- return REG_NOERROR;
+ {
+ re_node_set_free (&eps_via_nodes);
+ return free_fail_stack_return (fs);
+ }
cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
&eps_via_nodes);
}
else
- return REG_NOERROR;
+ {
+ re_node_set_free (&eps_via_nodes);
+ return REG_NOERROR;
+ }
}
/* Proceed to next node. */
@@ -1221,10 +1200,30 @@ set_regs (preg, mctx, nmatch, pmatch, fl_backtrack)
cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
&eps_via_nodes);
else
- return REG_NOMATCH;
+ {
+ re_node_set_free (&eps_via_nodes);
+ return REG_NOMATCH;
+ }
}
}
re_node_set_free (&eps_via_nodes);
+ return free_fail_stack_return (fs);
+}
+
+static reg_errcode_t
+free_fail_stack_return (fs)
+ struct re_fail_stack_t *fs;
+{
+ if (fs)
+ {
+ int fs_idx;
+ for (fs_idx = 0; fs_idx < fs->num; ++fs_idx)
+ {
+ re_node_set_free (&fs->stack[fs_idx].eps_via_nodes);
+ re_free (fs->stack[fs_idx].regs);
+ }
+ re_free (fs->stack);
+ }
return REG_NOERROR;
}
@@ -1314,6 +1313,7 @@ sift_states_backward (preg, mctx, sctx)
{
memset (sctx->sifted_states, '\0',
sizeof (re_dfastate_t *) * str_idx);
+ re_node_set_free (&cur_dest);
return REG_NOERROR;
}
re_node_set_empty (&cur_dest);
@@ -1330,22 +1330,16 @@ sift_states_backward (preg, mctx, sctx)
for (i = 0; i < cur_src->nelem; i++)
{
int prev_node = cur_src->elems[i];
- int entity = prev_node;
int naccepted = 0;
re_token_type_t type = dfa->nodes[prev_node].type;
if (IS_EPSILON_NODE(type))
continue;
- if (type == OP_CONTEXT_NODE)
- {
- entity = dfa->nodes[prev_node].opr.ctx_info->entity;
- type = dfa->nodes[entity].type;
- }
#ifdef RE_ENABLE_I18N
/* If the node may accept `multi byte'. */
if (ACCEPT_MB_NODE (type))
- naccepted = sift_states_iter_mb (preg, mctx, sctx, entity, str_idx,
- sctx->last_str_idx);
+ naccepted = sift_states_iter_mb (preg, mctx, sctx, prev_node,
+ str_idx, sctx->last_str_idx);
#endif /* RE_ENABLE_I18N */
/* We don't check backreferences here.
@@ -1459,12 +1453,15 @@ update_cur_sifted_state (preg, mctx, sctx, str_idx, dest_nodes)
/* At first, add the nodes which can epsilon transit to a node in
DEST_NODE. */
- err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
- if (BE (err != REG_NOERROR, 0))
- return err;
+ if (dest_nodes->nelem)
+ {
+ err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
/* Then, check the limitations in the current sift_context. */
- if (sctx->limits.nelem)
+ if (dest_nodes->nelem && sctx->limits.nelem)
{
err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,
mctx->bkref_ents, str_idx);
@@ -1479,7 +1476,7 @@ update_cur_sifted_state (preg, mctx, sctx, str_idx, dest_nodes)
/* If we are searching for the subexpression candidates.
Note that we were from transit_state_bkref_loop() in this case. */
- if (sctx->check_subexp)
+ if (dest_nodes->nelem && sctx->check_subexp)
{
err = search_subexp (preg, mctx, sctx, str_idx, dest_nodes);
if (BE (err != REG_NOERROR, 0))
@@ -1538,7 +1535,7 @@ sub_epsilon_src_nodes (dfa, node, dest_nodes, candidates)
int cur_node = inv_eclosure->elems[ecl_idx];
if (cur_node == node)
continue;
- if (dfa->edests[cur_node].nelem)
+ if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))
{
int edst1 = dfa->edests[cur_node].elems[0];
int edst2 = ((dfa->edests[cur_node].nelem > 1)
@@ -1580,12 +1577,10 @@ check_dst_limits (dfa, limits, mctx, dst_node, dst_idx, src_node, src_idx)
for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
{
- int bkref, subexp_idx/*, node_idx, cls_node*/;
+ int subexp_idx;
struct re_backref_cache_entry *ent;
ent = mctx->bkref_ents + limits->elems[lim_idx];
- bkref = (dfa->nodes[ent->node].type == OP_CONTEXT_NODE
- ? dfa->nodes[ent->node].opr.ctx_info->entity : ent->node);
- subexp_idx = dfa->nodes[bkref].opr.idx - 1;
+ subexp_idx = dfa->nodes[ent->node].opr.idx - 1;
dst_pos = check_dst_limits_calc_pos (dfa, mctx, limits->elems[lim_idx],
dfa->eclosures + dst_node,
@@ -1624,13 +1619,7 @@ check_dst_limits_calc_pos (dfa, mctx, limit, eclosures, subexp_idx, node,
for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx)
{
int node = eclosures->elems[node_idx];
- int entity = node;
re_token_type_t type= dfa->nodes[node].type;
- if (type == OP_CONTEXT_NODE)
- {
- entity = dfa->nodes[node].opr.ctx_info->entity;
- type = dfa->nodes[entity].type;
- }
if (type == OP_BACK_REF)
{
int bi;
@@ -1641,7 +1630,7 @@ check_dst_limits_calc_pos (dfa, mctx, limit, eclosures, subexp_idx, node,
&& ent->str_idx == str_idx)
{
int cpos, dst;
- dst = dfa->nexts[node];
+ dst = dfa->edests[node].elems[0];
cpos = check_dst_limits_calc_pos (dfa, mctx, limit,
dfa->eclosures + dst,
subexp_idx, dst,
@@ -1685,17 +1674,14 @@ check_subexp_limits (dfa, dest_nodes, candidates, limits, bkref_ents, str_idx)
for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
{
- int bkref, subexp_idx;
+ int subexp_idx;
struct re_backref_cache_entry *ent;
ent = bkref_ents + limits->elems[lim_idx];
if (str_idx <= ent->subexp_from || ent->str_idx < str_idx)
continue; /* This is unrelated limitation. */
- bkref = (dfa->nodes[ent->node].type == OP_CONTEXT_NODE
- ? dfa->nodes[ent->node].opr.ctx_info->entity : ent->node);
- subexp_idx = dfa->nodes[bkref].opr.idx - 1;
-
+ subexp_idx = dfa->nodes[ent->node].opr.idx - 1;
if (ent->subexp_to == str_idx)
{
int ops_node = -1;
@@ -1790,12 +1776,8 @@ search_subexp (preg, mctx, sctx, str_idx, dest_nodes)
for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
{
re_token_type_t type;
- int entity;
node = dest_nodes->elems[node_idx];
type = dfa->nodes[node].type;
- entity = (type != OP_CONTEXT_NODE ? node
- : dfa->nodes[node].opr.ctx_info->entity);
- type = (type != OP_CONTEXT_NODE ? type : dfa->nodes[entity].type);
if (type == OP_CLOSE_SUBEXP
&& sctx->check_subexp == dfa->nodes[node].opr.idx + 1)
@@ -1933,14 +1915,10 @@ sift_states_bkref (preg, mctx, sctx, str_idx, dest_nodes)
for (node_idx = 0; node_idx < candidates->nelem; ++node_idx)
{
- int entity;
int cur_bkref_idx = re_string_cur_idx (mctx->input);
re_token_type_t type;
node = candidates->elems[node_idx];
type = dfa->nodes[node].type;
- entity = (type != OP_CONTEXT_NODE ? node
- : dfa->nodes[node].opr.ctx_info->entity);
- type = (type != OP_CONTEXT_NODE ? type : dfa->nodes[entity].type);
if (node == sctx->cur_bkref && str_idx == cur_bkref_idx)
continue;
/* Avoid infinite loop for the REs like "()\1+". */
@@ -1951,37 +1929,25 @@ sift_states_bkref (preg, mctx, sctx, str_idx, dest_nodes)
int enabled_idx;
for (enabled_idx = 0; enabled_idx < mctx->nbkref_ents; ++enabled_idx)
{
- int disabled_idx, subexp_len, to_idx;
+ int disabled_idx, subexp_len, to_idx, dst_node;
struct re_backref_cache_entry *entry;
entry = mctx->bkref_ents + enabled_idx;
subexp_len = entry->subexp_to - entry->subexp_from;
to_idx = str_idx + subexp_len;
+ dst_node = (subexp_len ? dfa->nexts[node]
+ : dfa->edests[node].elems[0]);
if (entry->node != node || entry->str_idx != str_idx
|| to_idx > sctx->last_str_idx
|| sctx->sifted_states[to_idx] == NULL)
continue;
- if (!STATE_NODE_CONTAINS (sctx->sifted_states[to_idx],
- dfa->nexts[node]))
- {
- int dst_idx;
- re_node_set *dsts = &sctx->sifted_states[to_idx]->nodes;
- for (dst_idx = 0; dst_idx < dsts->nelem; ++dst_idx)
- {
- int dst_node = dsts->elems[dst_idx];
- if (dfa->nodes[dst_node].type == OP_CONTEXT_NODE
- && (dfa->nodes[dst_node].opr.ctx_info->entity
- == dfa->nexts[node]))
- break;
- }
- if (dst_idx == dsts->nelem)
- continue;
- }
+ if (!STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node))
+ continue;
if (check_dst_limits (dfa, &sctx->limits, mctx, node,
str_idx, dfa->nexts[node], to_idx))
continue;
- if (sctx->check_subexp == dfa->nodes[entity].opr.idx)
+ if (sctx->check_subexp == dfa->nodes[node].opr.idx)
{
char *buf;
buf = (char *) re_string_get_buffer (mctx->input);
@@ -2038,9 +2004,10 @@ sift_states_bkref (preg, mctx, sctx, str_idx, dest_nodes)
return err;
}
local_sctx.sifted_states[str_idx] = cur_state;
- re_node_set_remove_at (&local_sctx.limits,
- local_sctx.limits.nelem - 1);
- entry->flag = 1;
+ re_node_set_remove (&local_sctx.limits, enabled_idx);
+ /* We must not use the variable entry here, since
+ mctx->bkref_ents might be realloced. */
+ mctx->bkref_ents[enabled_idx].flag = 1;
}
}
for (enabled_idx = 0; enabled_idx < mctx->nbkref_ents; ++enabled_idx)
@@ -2301,15 +2268,14 @@ transit_state_mb (preg, pstate, mctx)
unsigned int context;
re_dfastate_t *dest_state;
- if (dfa->nodes[cur_node_idx].type == OP_CONTEXT_NODE)
+ if (dfa->nodes[cur_node_idx].constraint)
{
context = re_string_context_at (mctx->input,
re_string_cur_idx (mctx->input),
mctx->eflags, preg->newline_anchor);
if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,
- context))
+ context))
continue;
- cur_node_idx = dfa->nodes[cur_node_idx].opr.ctx_info->entity;
}
/* How many bytes the node can accepts? */
@@ -2404,17 +2370,16 @@ transit_state_bkref_loop (preg, nodes, work_state_log, mctx)
/* Check whether `node' is a backreference or not. */
if (node->type == OP_BACK_REF)
subexp_idx = node->opr.idx;
- else if (node->type == OP_CONTEXT_NODE &&
- dfa->nodes[node->opr.ctx_info->entity].type == OP_BACK_REF)
+ else
+ continue;
+
+ if (node->constraint)
{
context = re_string_context_at (mctx->input, cur_str_idx,
mctx->eflags, preg->newline_anchor);
if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
continue;
- subexp_idx = dfa->nodes[node->opr.ctx_info->entity].opr.idx;
}
- else
- continue;
/* `node' is a backreference.
Check the substring which the substring matched. */
@@ -2440,8 +2405,8 @@ transit_state_bkref_loop (preg, nodes, work_state_log, mctx)
if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx)
continue;
subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;
- new_dest_nodes = ((node->type == OP_CONTEXT_NODE && subexp_len == 0)
- ? dfa->nodes[node_idx].opr.ctx_info->bkref_eclosure
+ new_dest_nodes = (subexp_len == 0
+ ? dfa->eclosures + dfa->edests[node_idx].elems[0]
: dfa->eclosures + dfa->nexts[node_idx]);
dest_str_idx = (cur_str_idx + bkref_ent->subexp_to
- bkref_ent->subexp_from);
@@ -2688,16 +2653,9 @@ group_nodes_into_DFAstates (preg, state, dests_node, dests_ch)
/* For all the nodes belonging to `state', */
for (i = 0; i < cur_nodes->nelem; ++i)
{
- unsigned int constraint = 0;
re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];
re_token_type_t type = node->type;
-
- if (type == OP_CONTEXT_NODE)
- {
- constraint = node->constraint;
- node = dfa->nodes + node->opr.ctx_info->entity;
- type = node->type;
- }
+ unsigned int constraint = node->constraint;
/* Enumerate all single byte character this node can accept. */
if (type == CHARACTER)
@@ -2724,10 +2682,10 @@ group_nodes_into_DFAstates (preg, state, dests_node, dests_ch)
if (constraint & NEXT_WORD_CONSTRAINT)
for (j = 0; j < BITSET_UINTS; ++j)
accepts[j] &= dfa->word_char[j];
- else if (constraint & NEXT_NOTWORD_CONSTRAINT)
+ if (constraint & NEXT_NOTWORD_CONSTRAINT)
for (j = 0; j < BITSET_UINTS; ++j)
accepts[j] &= ~dfa->word_char[j];
- else if (constraint & NEXT_NEWLINE_CONSTRAINT)
+ if (constraint & NEXT_NEWLINE_CONSTRAINT)
{
int accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
bitset_empty (accepts);
@@ -3058,10 +3016,8 @@ check_node_accept (preg, node, mctx, idx)
const re_match_context_t *mctx;
int idx;
{
- const re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
- const re_token_t *cur_node;
unsigned char ch;
- if (node->type == OP_CONTEXT_NODE)
+ if (node->constraint)
{
/* The node has constraints. Check whether the current context
satisfies the constraints. */
@@ -3070,17 +3026,13 @@ check_node_accept (preg, node, mctx, idx)
preg->newline_anchor);
if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
return 0;
- cur_node = dfa->nodes + node->opr.ctx_info->entity;
}
- else
- cur_node = node;
-
ch = re_string_byte_at (mctx->input, idx);
- if (cur_node->type == CHARACTER)
- return cur_node->opr.c == ch;
- else if (cur_node->type == SIMPLE_BRACKET)
- return bitset_contain (cur_node->opr.sbcset, ch);
- else if (cur_node->type == OP_PERIOD)
+ if (node->type == CHARACTER)
+ return node->opr.c == ch;
+ else if (node->type == SIMPLE_BRACKET)
+ return bitset_contain (node->opr.sbcset, ch);
+ else if (node->type == OP_PERIOD)
return !((ch == '\n' && !(preg->syntax & RE_DOT_NEWLINE))
|| (ch == '\0' && (preg->syntax & RE_DOT_NOT_NULL)));
else
@@ -3221,5 +3173,7 @@ sift_ctx_init (sctx, sifted_sts, limited_sts, last_node, last_str_idx,
sctx->last_node = last_node;
sctx->last_str_idx = last_str_idx;
sctx->check_subexp = check_subexp;
+ sctx->cur_bkref = -1;
+ sctx->cls_subexp_idx = -1;
re_node_set_init_empty (&sctx->limits);
}