aboutsummaryrefslogtreecommitdiff
path: root/posix/regcomp.c
diff options
context:
space:
mode:
Diffstat (limited to 'posix/regcomp.c')
-rw-r--r--posix/regcomp.c240
1 files changed, 142 insertions, 98 deletions
diff --git a/posix/regcomp.c b/posix/regcomp.c
index 12da043..0b85f7d 100644
--- a/posix/regcomp.c
+++ b/posix/regcomp.c
@@ -63,7 +63,7 @@ static void re_compile_fastmap_iter (regex_t *bufp,
const re_dfastate_t *init_state,
char *fastmap);
static reg_errcode_t init_dfa (re_dfa_t *dfa, int pat_len);
-static void init_word_char (re_dfa_t *dfa);
+static reg_errcode_t init_word_char (re_dfa_t *dfa);
static void free_charset (re_charset_t *cset);
static void free_workarea_compile (regex_t *preg);
static reg_errcode_t create_initial_state (re_dfa_t *dfa);
@@ -72,10 +72,11 @@ static reg_errcode_t analyze_tree (re_dfa_t *dfa, bin_tree_t *node);
static void calc_first (re_dfa_t *dfa, bin_tree_t *node);
static void calc_next (re_dfa_t *dfa, bin_tree_t *node);
static void calc_epsdest (re_dfa_t *dfa, bin_tree_t *node);
-static int duplicate_node (re_dfa_t *dfa, int org_idx,
- unsigned int constraint);
+static reg_errcode_t duplicate_node (int *new_idx, re_dfa_t *dfa, int org_idx,
+ unsigned int constraint);
static reg_errcode_t calc_eclosure (re_dfa_t *dfa);
-static re_node_set calc_eclosure_iter (re_dfa_t *dfa, int node, int root);
+static reg_errcode_t calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa,
+ int node, int root);
static void calc_inveclosure (re_dfa_t *dfa);
static int fetch_number (re_string_t *input, re_token_t *token,
reg_syntax_t syntax);
@@ -446,8 +447,8 @@ regcomp (preg, pattern, cflags)
if (ret == REG_ERPAREN)
ret = REG_EPAREN;
- /* XXX Why the test for preg->fastmap != NULL? */
- if (ret == REG_NOERROR && preg->fastmap != NULL)
+ /* We have already checked preg->fastmap != NULL. */
+ if (ret == REG_NOERROR)
{
/* Compute the fastmap now, since regexec cannot modify the pattern
buffer. */
@@ -772,16 +773,19 @@ init_dfa (dfa, pat_len)
"word". In this case "word" means that it is the word construction
character used by some operators like "\<", "\>", etc. */
-static void
+static reg_errcode_t
init_word_char (dfa)
re_dfa_t *dfa;
{
int i, j, ch;
dfa->word_char = (re_bitset_ptr_t) calloc (sizeof (bitset), 1);
+ if (dfa->word_char == NULL)
+ return REG_ESPACE;
for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
for (j = 0; j < UINT_BITS; ++j, ++ch)
if (isalnum (ch) || ch == '_')
dfa->word_char[i] |= 1 << j;
+ return REG_NOERROR;
}
/* Free the work area which are only used while compiling. */
@@ -844,24 +848,28 @@ create_initial_state (dfa)
}
/* It must be the first time to invoke acquire_state. */
- dfa->init_state = re_acquire_state_context (dfa, &init_nodes, 0);
+ dfa->init_state = re_acquire_state_context (&err, dfa, &init_nodes, 0);
+ /* We don't check ERR here, since the initial state must not be NULL. */
+ if (dfa->init_state == NULL)
+ return err;
if (dfa->init_state->has_constraint)
{
- dfa->init_state_word = re_acquire_state_context (dfa, &init_nodes,
+ dfa->init_state_word = re_acquire_state_context (&err, dfa, &init_nodes,
CONTEXT_WORD);
- dfa->init_state_nl = re_acquire_state_context (dfa, &init_nodes,
+ dfa->init_state_nl = re_acquire_state_context (&err, dfa, &init_nodes,
CONTEXT_NEWLINE);
- dfa->init_state_begbuf = re_acquire_state_context (dfa, &init_nodes,
+ dfa->init_state_begbuf = re_acquire_state_context (&err, dfa,
+ &init_nodes,
CONTEXT_NEWLINE
| CONTEXT_BEGBUF);
+ if (dfa->init_state_word == NULL || dfa->init_state_nl == NULL
+ || dfa->init_state_begbuf == NULL)
+ return err;
}
else
dfa->init_state_word = dfa->init_state_nl
= dfa->init_state_begbuf = dfa->init_state;
- if (dfa->init_state == NULL || dfa->init_state_word == NULL
- || dfa->init_state_nl == NULL || dfa->init_state_begbuf == NULL )
- return REG_ESPACE;
re_node_set_free (&init_nodes);
return REG_NOERROR;
}
@@ -1114,20 +1122,30 @@ calc_epsdest (dfa, node)
}
}
-static int
-duplicate_node (dfa, org_idx, constraint)
+/* Duplicate the node whose index is ORG_IDX and set the constraint CONSTRAINT.
+ The new index will be stored in NEW_IDX and return REG_NOERROR if succeeded,
+ otherwise return the error code. */
+
+static reg_errcode_t
+duplicate_node (new_idx, dfa, org_idx, constraint)
re_dfa_t *dfa;
- int org_idx;
+ int *new_idx, org_idx;
unsigned int constraint;
{
re_token_t dup;
int dup_idx;
+ reg_errcode_t err;
dup.type = OP_CONTEXT_NODE;
if (dfa->nodes[org_idx].type == OP_CONTEXT_NODE)
{
+ /* If the node whose index is ORG_IDX is the same as the intended
+ node, use it. */
if (dfa->nodes[org_idx].constraint == constraint)
- return org_idx;
+ {
+ *new_idx = org_idx;
+ return REG_NOERROR;
+ }
dup.constraint = constraint |
dfa->nodes[org_idx].constraint;
}
@@ -1137,23 +1155,32 @@ duplicate_node (dfa, org_idx, constraint)
/* In case that `entity' points OP_CONTEXT_NODE,
we correct `entity' to real entity in calc_inveclosures(). */
dup.opr.ctx_info = malloc (sizeof (*dup.opr.ctx_info));
+ dup_idx = re_dfa_add_node (dfa, dup, 1);
+ if (dup.opr.ctx_info == NULL || dup_idx == -1)
+ return REG_ESPACE;
dup.opr.ctx_info->entity = org_idx;
dup.opr.ctx_info->bkref_eclosure = NULL;
- dup_idx = re_dfa_add_node (dfa, dup, 1);
- dfa->nodes[dup_idx].duplicated = 1;
+ dfa->nodes[dup_idx].duplicated = 1;
dfa->firsts[dup_idx] = dfa->firsts[org_idx];
dfa->nexts[dup_idx] = dfa->nexts[org_idx];
- re_node_set_init_copy (dfa->edests + dup_idx, dfa->edests + org_idx);
+ err = re_node_set_init_copy (dfa->edests + dup_idx, dfa->edests + org_idx);
+ if (err != REG_NOERROR)
+ return err;
/* Since we don't duplicate epsilon nodes, epsilon closure have
only itself. */
- re_node_set_init_1 (dfa->eclosures + dup_idx, dup_idx);
- re_node_set_init_1 (dfa->inveclosures + dup_idx, dup_idx);
+ err = re_node_set_init_1 (dfa->eclosures + dup_idx, dup_idx);
+ if (err != REG_NOERROR)
+ return err;
+ err = re_node_set_init_1 (dfa->inveclosures + dup_idx, dup_idx);
+ if (err != REG_NOERROR)
+ return err;
/* Then we must update inveclosure for this node.
We process them at last part of calc_eclosure(),
since we don't complete to calculate them here. */
- return dup_idx;
+ *new_idx = dup_idx;
+ return REG_NOERROR;
}
static void
@@ -1193,6 +1220,7 @@ calc_eclosure (dfa)
/* For each nodes, calculate epsilon closure. */
for (node_idx = 0, max = dfa->nodes_len; ; ++node_idx)
{
+ reg_errcode_t err;
re_node_set eclosure_elem;
if (node_idx == max)
{
@@ -1210,7 +1238,9 @@ calc_eclosure (dfa)
if (dfa->eclosures[node_idx].nelem != 0)
continue;
/* Calculate epsilon closure of `node_idx'. */
- eclosure_elem = calc_eclosure_iter (dfa, node_idx, 1);
+ err = calc_eclosure_iter (&eclosure_elem, dfa, node_idx, 1);
+ if (err != REG_NOERROR)
+ return err;
if (dfa->eclosures[node_idx].nelem == 0)
{
@@ -1241,7 +1271,13 @@ calc_eclosure (dfa)
{
int dest_node_idx = dfa->eclosures[dfa->nexts[idx]].elems[i];
if (!IS_EPSILON_NODE (dfa->nodes[dest_node_idx].type))
- dest_node_idx = duplicate_node (dfa, dest_node_idx, constraint);
+ {
+ reg_errcode_t err;
+ err = duplicate_node (&dest_node_idx, dfa, dest_node_idx,
+ constraint);
+ if (err != REG_NOERROR)
+ return err;
+ }
re_node_set_insert (bkref_eclosure, dest_node_idx);
}
dfa->nodes[idx].opr.ctx_info->bkref_eclosure = bkref_eclosure;
@@ -1252,15 +1288,19 @@ calc_eclosure (dfa)
/* Calculate epsilon closure of NODE. */
-static re_node_set
-calc_eclosure_iter (dfa, node, root)
+static reg_errcode_t
+calc_eclosure_iter (new_set, dfa, node, root)
+ re_node_set *new_set;
re_dfa_t *dfa;
int node, root;
{
+ reg_errcode_t err;
unsigned int constraint;
int i, max, incomplete = 0;
re_node_set eclosure;
- re_node_set_alloc (&eclosure, 1);
+ err = re_node_set_alloc (&eclosure, dfa->edests[node].nelem + 1);
+ if (err != REG_NOERROR)
+ return err;
/* This indicates that we are calculating this node now.
We reference this value to avoid infinite loop. */
@@ -1285,7 +1325,11 @@ calc_eclosure_iter (dfa, node, root)
/* If we haven't calculated the epsilon closure of `edest' yet,
calculate now. Otherwise use calculated epsilon closure. */
if (dfa->eclosures[edest].nelem == 0)
- eclosure_elem = calc_eclosure_iter (dfa, edest, 0);
+ {
+ err = calc_eclosure_iter (&eclosure_elem, dfa, edest, 0);
+ if (err != REG_NOERROR)
+ return err;
+ }
else
eclosure_elem = dfa->eclosures[edest];
/* Merge the epsilon closure of `edest'. */
@@ -1307,7 +1351,11 @@ calc_eclosure_iter (dfa, node, root)
int dest = eclosure.elems[i];
if (!IS_EPSILON_NODE (dfa->nodes[dest].type))
{
- int dup_dest = duplicate_node (dfa, dest, constraint);
+ int dup_dest;
+ reg_errcode_t err;
+ err = duplicate_node (&dup_dest, dfa, dest, constraint);
+ if (err != REG_NOERROR)
+ return err;
if (dest != dup_dest)
{
re_node_set_remove_at (&eclosure, i--);
@@ -1323,7 +1371,8 @@ calc_eclosure_iter (dfa, node, root)
dfa->eclosures[node].nelem = 0;
else
dfa->eclosures[node] = eclosure;
- return eclosure;
+ *new_set = eclosure;
+ return REG_NOERROR;
}
/* Functions for token which are used in the parser. */
@@ -1865,7 +1914,11 @@ parse_expression (regexp, preg, token, syntax, nest, err)
break;
case ANCHOR:
if (dfa->word_char == NULL)
- init_word_char (dfa);
+ {
+ *err = init_word_char (dfa);
+ if (*err != REG_NOERROR)
+ return NULL;
+ }
if (token->opr.ctx_type == WORD_DELIM)
{
bin_tree_t *tree_first, *tree_last;
@@ -2137,28 +2190,6 @@ parse_dup_op (dup_elem, regexp, dfa, token, syntax, err)
I'm not sure, but maybe enough. */
#define BRACKET_NAME_BUF_SIZE 32
-static inline void *
-extend_array_for_cset (array, num, alloc, type_size)
- void *array;
- int num, *alloc, type_size;
-{
- void *new_array = array;
- if (*alloc == num)
- {
- if (*alloc == 0)
- {
- new_array = malloc (type_size);
- *alloc = 1;
- }
- else
- {
- new_array = realloc (array, type_size * num * 2);
- *alloc = 2 * num;
- }
- }
- return new_array;
-}
-
/* This function parse bracket expression like "[abc]", "[a-c]",
"[[.a-a.]]" etc. */
@@ -2299,22 +2330,15 @@ parse_bracket_exp (regexp, dfa, token, syntax, err)
uint32_t *new_array_end;
int new_nranges;
- /* XXX If mbcset->range_starts and mbcset->range_ends are NULL
- if *range_alloc == 0 then we do not need the if. */
- if (*range_alloc == 0)
- {
- new_nranges = 1;
- new_array_start = re_malloc (uint32_t, 1);
- new_array_end = re_malloc (uint32_t, 1);
- }
- else
- {
- new_nranges = 2 * mbcset->nranges;
- new_array_start = re_realloc (mbcset->range_starts, uint32_t,
- new_nranges);
- new_array_end = re_realloc (mbcset->range_ends, uint32_t,
- new_nranges);
- }
+ /* +1 in case of mbcset->nranges is 0. */
+ new_nranges = 2 * mbcset->nranges + 1;
+ /* Use realloc since mbcset->range_starts and mbcset->range_ends
+ are NULL if *range_alloc == 0. */
+ new_array_start = re_realloc (mbcset->range_starts, uint32_t,
+ new_nranges);
+ new_array_end = re_realloc (mbcset->range_ends, uint32_t,
+ new_nranges);
+
if (new_array_start == NULL || new_array_end == NULL)
return REG_ESPACE;
@@ -2394,13 +2418,18 @@ parse_bracket_exp (regexp, dfa, token, syntax, err)
/* Got valid collation sequence, add it as a new entry. */
/* Check the space of the arrays. */
- mbcset->coll_syms = extend_array_for_cset (mbcset->coll_syms,
- mbcset->ncoll_syms,
- coll_sym_alloc,
- sizeof (int32_t));
- if (mbcset->coll_syms == NULL)
- return REG_ESPACE;
-
+ if (*coll_sym_alloc == mbcset->ncoll_syms)
+ {
+ /* Not enough, realloc it. */
+ /* +1 in case of mbcset->ncoll_syms is 0. */
+ *coll_sym_alloc = 2 * mbcset->ncoll_syms + 1;
+ /* Use realloc since mbcset->coll_syms is NULL
+ if *alloc == 0. */
+ mbcset->coll_syms = re_realloc (mbcset->coll_syms, int32_t,
+ *coll_sym_alloc);
+ if (mbcset->coll_syms == NULL)
+ return REG_ESPACE;
+ }
mbcset->coll_syms[mbcset->ncoll_syms++] = idx;
return REG_NOERROR;
}
@@ -2557,12 +2586,18 @@ parse_bracket_exp (regexp, dfa, token, syntax, err)
bitset_set (sbcset, start_elem.opr.ch);
break;
case MB_CHAR:
- mbcset->mbchars = extend_array_for_cset (mbcset->mbchars,
- mbcset->nmbchars,
- &mbchar_alloc,
- sizeof (wchar_t));
- if (mbcset->mbchars == NULL)
- goto parse_bracket_exp_espace;
+ /* Check whether the array has enough space. */
+ if (mbchar_alloc == mbcset->nmbchars)
+ {
+ /* Not enough, realloc it. */
+ /* +1 in case of mbcset->nmbchars is 0. */
+ mbchar_alloc = 2 * mbcset->nmbchars + 1;
+ /* Use realloc since array is NULL if *alloc == 0. */
+ mbcset->mbchars = re_realloc (mbcset->mbchars, wchar_t,
+ mbchar_alloc);
+ if (mbcset->mbchars == NULL)
+ goto parse_bracket_exp_espace;
+ }
mbcset->mbchars[mbcset->nmbchars++] = start_elem.opr.wch;
break;
case EQUIV_CLASS:
@@ -2779,14 +2814,18 @@ build_equiv_class (mbcset, sbcset, equiv_class_alloc, name)
bitset_set (sbcset, ch);
}
}
- /* Check the space of the arrays, and extend if we need. */
- mbcset->equiv_classes = extend_array_for_cset (mbcset->equiv_classes,
- mbcset->nequiv_classes,
- equiv_class_alloc,
- sizeof (int32_t));
- if (mbcset->equiv_classes == NULL)
- return REG_ESPACE;
-
+ /* Check whether the array has enough space. */
+ if (*equiv_class_alloc == mbcset->nequiv_classes)
+ {
+ /* Not enough, realloc it. */
+ /* +1 in case of mbcset->nequiv_classes is 0. */
+ *equiv_class_alloc = 2 * mbcset->nequiv_classes + 1;
+ /* Use realloc since the array is NULL if *alloc == 0. */
+ mbcset->equiv_classes = re_realloc (mbcset->equiv_classes, int32_t,
+ *equiv_class_alloc);
+ if (mbcset->equiv_classes == NULL)
+ return REG_ESPACE;
+ }
mbcset->equiv_classes[mbcset->nequiv_classes++] = idx1;
}
else
@@ -2815,12 +2854,17 @@ build_charclass (mbcset, sbcset, char_class_alloc, name)
int i;
/* Check the space of the arrays. */
- mbcset->char_classes = extend_array_for_cset (mbcset->char_classes,
- mbcset->nchar_classes,
- char_class_alloc,
- sizeof (wctype_t));
- if (mbcset->char_classes == NULL)
- return REG_ESPACE;
+ if (*char_class_alloc == mbcset->nchar_classes)
+ {
+ /* Not enough, realloc it. */
+ /* +1 in case of mbcset->nchar_classes is 0. */
+ *char_class_alloc = 2 * mbcset->nchar_classes + 1;
+ /* Use realloc since array is NULL if *alloc == 0. */
+ mbcset->char_classes = re_realloc (mbcset->char_classes, wctype_t,
+ *char_class_alloc);
+ if (mbcset->char_classes == NULL)
+ return REG_ESPACE;
+ }
mbcset->char_classes[mbcset->nchar_classes++] = __wctype (name);