aboutsummaryrefslogtreecommitdiff
path: root/posix/regex_internal.c
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>2005-09-07 01:15:33 +0000
committerUlrich Drepper <drepper@redhat.com>2005-09-07 01:15:33 +0000
commit01ed6ceb7c440f0695726463ee9ee307921ea97e (patch)
treeeda9aef2d63fd0c0f39e51208e7bb9d463d65661 /posix/regex_internal.c
parent2d87db5b5341bd6b714f175c1c268b7136444a71 (diff)
downloadglibc-01ed6ceb7c440f0695726463ee9ee307921ea97e.zip
glibc-01ed6ceb7c440f0695726463ee9ee307921ea97e.tar.gz
glibc-01ed6ceb7c440f0695726463ee9ee307921ea97e.tar.bz2
* posix/regex_internal.c (re_string_reconstruct): Avoid calling
mbrtowc for very simple UTF-8 case. 2005-09-01 Paul Eggert <eggert@cs.ucla.edu> * posix/regex_internal.c (build_wcs_upper_buffer): Fix portability bugs in int versus size_t comparisons. 2005-09-06 Ulrich Drepper <drepper@redhat.com> * posix/regex_internal.c (re_acquire_state): Make DFA pointer arg a pointer-to-const. (re_acquire_state_context): Likewise. * posix/regex_internal.h: Adjust prototypes. 2005-08-31 Jim Meyering <jim@meyering.net> * posix/regcomp.c (search_duplicated_node): Make first pointer arg a pointer-to-const. * posix/regex_internal.c (create_ci_newstate, create_cd_newstate, register_state): Likewise. * posix/regexec.c (search_cur_bkref_entry, check_dst_limits): (check_dst_limits_calc_pos_1, check_dst_limits_calc_pos): (group_nodes_into_DFAstates): Likewise. * posix/regexec.c (re_search_internal): Simplify update of rm_so and rm_eo by replacing "if (A == B) A += C - B;" with the equivalent of "if (A == B) A = C;". 2005-09-06 Ulrich Drepper <drepper@redhat.com> * posix/regcomp.c (re_compile_internal): Change third parameter type to size_t. (init_dfa): Likewise. Make sure that arithmetic on pat_len doesn't overflow. * posix/regex_internal.h (struct re_dfa_t): Change type of nodes_alloc and nodes_len to size_t. * posix/regex_internal.c (re_dfa_add_node): Use size_t as type for new_nodes_alloc. Check for overflow. 2005-08-31 Paul Eggert <eggert@cs.ucla.edu> * posix/regcomp.c (re_compile_fastmap_iter, init_dfa, init_word_char): (optimize_subexps, lower_subexp): Don't assume 1<<31 has defined behavior on hosts with 32-bit int, since the signed shift might overflow. Use 1u<<31 instead. * posix/regex_internal.h (bitset_set, bitset_clear, bitset_contain): Likewise. * posix/regexec.c (check_dst_limits_calc_pos_1): Likewise. (check_subexp_matching_top): Likewise. * posix/regcomp.c (optimize_subexps, lower_subexp): Use CHAR_BIT rather than 8, for clarity. * posix/regexec.c (check_dst_limits_calc_pos_1): (check_subexp_matching_top): Likewise. * posix/regcomp.c (init_dfa): Make table_size unsigned, so that we don't have to worry about portability issues when shifting it left. Remove no-longer-needed test for table_size > 0. * posix/regcomp.c (parse_sub_exp): Do not shift more bits than there are in a word, as the resulting behavior is undefined. * posix/regexec.c (check_dst_limits_calc_pos_1): Likewise; in one case, a <= should have been an <, and in another case the whole test was missing. * posix/regex_internal.h (BYTE_BITS): Remove. All uses changed to the standard name CHAR_BIT.
Diffstat (limited to 'posix/regex_internal.c')
-rw-r--r--posix/regex_internal.c120
1 files changed, 69 insertions, 51 deletions
diff --git a/posix/regex_internal.c b/posix/regex_internal.c
index 821ed7f..240e887 100644
--- a/posix/regex_internal.c
+++ b/posix/regex_internal.c
@@ -26,12 +26,13 @@ static void re_string_construct_common (const char *str, int len,
static int re_string_skip_chars (re_string_t *pstr, int new_raw_idx,
wint_t *last_wc) internal_function;
#endif /* RE_ENABLE_I18N */
-static reg_errcode_t register_state (re_dfa_t *dfa, re_dfastate_t *newstate,
+static reg_errcode_t register_state (const re_dfa_t *dfa,
+ re_dfastate_t *newstate,
unsigned int hash) internal_function;
-static re_dfastate_t *create_ci_newstate (re_dfa_t *dfa,
+static re_dfastate_t *create_ci_newstate (const re_dfa_t *dfa,
const re_node_set *nodes,
unsigned int hash) internal_function;
-static re_dfastate_t *create_cd_newstate (re_dfa_t *dfa,
+static re_dfastate_t *create_cd_newstate (const re_dfa_t *dfa,
const re_node_set *nodes,
unsigned int context,
unsigned int hash) internal_function;
@@ -654,37 +655,50 @@ re_string_reconstruct (pstr, idx, eflags)
byte other than 0x80 - 0xbf. */
raw = pstr->raw_mbs + pstr->raw_mbs_idx;
end = raw + (offset - pstr->mb_cur_max);
- for (p = raw + offset - 1; p >= end; --p)
- if ((*p & 0xc0) != 0x80)
- {
- mbstate_t cur_state;
- wchar_t wc2;
- int mlen = raw + pstr->len - p;
- unsigned char buf[6];
-
- q = p;
- if (BE (pstr->trans != NULL, 0))
- {
- int i = mlen < 6 ? mlen : 6;
- while (--i >= 0)
- buf[i] = pstr->trans[p[i]];
- q = buf;
- }
- /* XXX Don't use mbrtowc, we know which conversion
- to use (UTF-8 -> UCS4). */
- memset (&cur_state, 0, sizeof (cur_state));
- mlen = (mbrtowc (&wc2, (const char *) p, mlen,
- &cur_state)
- - (raw + offset - p));
- if (mlen >= 0)
- {
- memset (&pstr->cur_state, '\0',
- sizeof (mbstate_t));
- pstr->valid_len = mlen;
- wc = wc2;
- }
- break;
- }
+ p = raw + offset - 1;
+#ifdef _LIBC
+ /* We know the wchar_t encoding is UCS4, so for the simple
+ case, ASCII characters, skip the conversion step. */
+ if (isascii (*p) && BE (pstr->trans == NULL, 1))
+ {
+ memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
+ pstr->valid_len = 0;
+ wc = (wchar_t) *p;
+ }
+ else
+#endif
+ for (; p >= end; --p)
+ if ((*p & 0xc0) != 0x80)
+ {
+ mbstate_t cur_state;
+ wchar_t wc2;
+ int mlen = raw + pstr->len - p;
+ unsigned char buf[6];
+ size_t mbclen;
+
+ q = p;
+ if (BE (pstr->trans != NULL, 0))
+ {
+ int i = mlen < 6 ? mlen : 6;
+ while (--i >= 0)
+ buf[i] = pstr->trans[p[i]];
+ q = buf;
+ }
+ /* XXX Don't use mbrtowc, we know which conversion
+ to use (UTF-8 -> UCS4). */
+ memset (&cur_state, 0, sizeof (cur_state));
+ mbclen = mbrtowc (&wc2, (const char *) p, mlen,
+ &cur_state);
+ if (raw + offset - p <= mbclen
+ && mbclen < (size_t) -2)
+ {
+ memset (&pstr->cur_state, '\0',
+ sizeof (mbstate_t));
+ pstr->valid_len = mbclen - (raw + offset - p);
+ wc = wc2;
+ }
+ break;
+ }
}
if (wc == WEOF)
@@ -738,15 +752,15 @@ re_string_reconstruct (pstr, idx, eflags)
}
else
#endif /* RE_ENABLE_I18N */
- if (BE (pstr->mbs_allocated, 0))
- {
- if (pstr->icase)
- build_upper_buffer (pstr);
- else if (pstr->trans != NULL)
- re_string_translate_buffer (pstr);
- }
- else
- pstr->valid_len = pstr->len;
+ if (BE (pstr->mbs_allocated, 0))
+ {
+ if (pstr->icase)
+ build_upper_buffer (pstr);
+ else if (pstr->trans != NULL)
+ re_string_translate_buffer (pstr);
+ }
+ else
+ pstr->valid_len = pstr->len;
pstr->cur_idx = 0;
return REG_NOERROR;
@@ -1345,12 +1359,16 @@ re_dfa_add_node (dfa, token)
int type = token.type;
if (BE (dfa->nodes_len >= dfa->nodes_alloc, 0))
{
- int new_nodes_alloc = dfa->nodes_alloc * 2;
+ size_t new_nodes_alloc = dfa->nodes_alloc * 2;
int *new_nexts, *new_indices;
re_node_set *new_edests, *new_eclosures;
+ re_token_t *new_nodes;
+
+ /* Avoid overflows. */
+ if (BE (new_nodes_alloc < dfa->nodes_alloc, 0))
+ return -1;
- re_token_t *new_nodes = re_realloc (dfa->nodes, re_token_t,
- new_nodes_alloc);
+ new_nodes = re_realloc (dfa->nodes, re_token_t, new_nodes_alloc);
if (BE (new_nodes == NULL, 0))
return -1;
dfa->nodes = new_nodes;
@@ -1403,7 +1421,7 @@ calc_state_hash (nodes, context)
static re_dfastate_t*
re_acquire_state (err, dfa, nodes)
reg_errcode_t *err;
- re_dfa_t *dfa;
+ const re_dfa_t *dfa;
const re_node_set *nodes;
{
unsigned int hash;
@@ -1448,7 +1466,7 @@ re_acquire_state (err, dfa, nodes)
static re_dfastate_t*
re_acquire_state_context (err, dfa, nodes, context)
reg_errcode_t *err;
- re_dfa_t *dfa;
+ const re_dfa_t *dfa;
const re_node_set *nodes;
unsigned int context;
{
@@ -1486,7 +1504,7 @@ re_acquire_state_context (err, dfa, nodes, context)
static reg_errcode_t
register_state (dfa, newstate, hash)
- re_dfa_t *dfa;
+ const re_dfa_t *dfa;
re_dfastate_t *newstate;
unsigned int hash;
{
@@ -1525,7 +1543,7 @@ register_state (dfa, newstate, hash)
static re_dfastate_t *
create_ci_newstate (dfa, nodes, hash)
- re_dfa_t *dfa;
+ const re_dfa_t *dfa;
const re_node_set *nodes;
unsigned int hash;
{
@@ -1576,7 +1594,7 @@ create_ci_newstate (dfa, nodes, hash)
static re_dfastate_t *
create_cd_newstate (dfa, nodes, context, hash)
- re_dfa_t *dfa;
+ const re_dfa_t *dfa;
const re_node_set *nodes;
unsigned int context, hash;
{