aboutsummaryrefslogtreecommitdiff
path: root/posix/regex_internal.c
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>2002-04-24 21:54:53 +0000
committerUlrich Drepper <drepper@redhat.com>2002-04-24 21:54:53 +0000
commit612546c60dd28d7af44fbb2bc98c69c33b4a0c49 (patch)
tree11dc6bf94bd7beb3271366b33a86be162b3bfa6e /posix/regex_internal.c
parentbe479a6dfe81c5b426e2cbabd62be2c042ab2d45 (diff)
downloadglibc-612546c60dd28d7af44fbb2bc98c69c33b4a0c49.zip
glibc-612546c60dd28d7af44fbb2bc98c69c33b4a0c49.tar.gz
glibc-612546c60dd28d7af44fbb2bc98c69c33b4a0c49.tar.bz2
Update.
2002-04-22 Isamu Hasegawa <isamu@yamato.ibm.com> * posix/regcomp.c (re_compile_internal): Adapt it to new interface of buffer building functions. * posix/regex_internal.c (re_string_allocate): New function. (re_string_realloc_buffers): New function. (re_string_skip_chars): New function. (re_string_reconstruct): New function. (re_string_construct): Adapt it to new interface of buffer building functions. (re_string_construct_common): Likewise. (build_wcs_buffer): Likewise. (build_wcs_upper_buffer): Likewise. (build_upper_buffer): Likewise. (re_string_translate_buffer): Likewise. (re_string_context_at): Adapt it to variable length buffers. * posix/regex_internal.h (re_string_t): Add new fields to handle variable length buffers. (re_match_context_t): Likewise. * posix/regexec.c (re_search_internal): Adapt it to new interface of re_string_t and re_match_context_t. (acquire_init_state_context): Likewise. (check_matching): Likewise. (check_halt_state_context): Likewise. (proceed_next_node): Likewise. (set_regs): Likewise. (sift_states_backward): Likewise. (clean_state_log_if_need): Likewise. (sift_states_iter_mb): Likewise. (sift_states_iter_bkref): Likewise. (add_epsilon_backreference): Likewise. (transit_state): Likewise. (transit_state_sb): Likewise. (transit_state_mb): Likewise. (transit_state_bkref): Likewise. (transit_state_bkref_loop): Likewise. (check_node_accept): Likewise. (match_ctx_init): Likewise. (extend_buffers): New function. 2002-04-21 Bruno Haible <bruno@clisp.org> * iconvdata/tst-table.sh: For the second check, use the truncated GB18030 charmap table, like for the first check.
Diffstat (limited to 'posix/regex_internal.c')
-rw-r--r--posix/regex_internal.c511
1 files changed, 321 insertions, 190 deletions
diff --git a/posix/regex_internal.c b/posix/regex_internal.c
index 8b16dd6..b688d0f 100644
--- a/posix/regex_internal.c
+++ b/posix/regex_internal.c
@@ -58,14 +58,9 @@
#include "regex_internal.h"
static void re_string_construct_common (const unsigned char *str,
- int len, re_string_t *pstr);
-#ifdef RE_ENABLE_I18N
-static reg_errcode_t build_wcs_buffer (re_string_t *pstr);
-static reg_errcode_t build_wcs_upper_buffer (re_string_t *pstr);
-#endif /* RE_ENABLE_I18N */
-static reg_errcode_t build_upper_buffer (re_string_t *pstr);
-static reg_errcode_t re_string_translate_buffer (re_string_t *pstr,
- RE_TRANSLATE_TYPE trans);
+ int len, re_string_t *pstr,
+ RE_TRANSLATE_TYPE trans, int icase);
+static int re_string_skip_chars (re_string_t *pstr, int new_raw_idx);
static re_dfastate_t *create_newstate_common (re_dfa_t *dfa,
const re_node_set *nodes,
unsigned int hash);
@@ -83,278 +78,416 @@ static unsigned int inline calc_state_hash (const re_node_set *nodes,
/* Functions for string operation. */
-/* Construct string object. */
+/* This function allocate the buffers. It is necessary to call
+ re_string_reconstruct before using the object. */
+
static reg_errcode_t
-re_string_construct (pstr, str, len, trans)
+re_string_allocate (pstr, str, len, init_len, trans, icase)
re_string_t *pstr;
const unsigned char *str;
- int len;
+ int len, init_len, icase;
RE_TRANSLATE_TYPE trans;
{
reg_errcode_t ret;
- re_string_construct_common (str, len, pstr);
-#ifdef RE_ENABLE_I18N
- if (MB_CUR_MAX >1 && pstr->len > 0)
+ int init_buf_len = (len + 1 < init_len) ? len + 1: init_len;
+ re_string_construct_common (str, len, pstr, trans, icase);
+
+ ret = re_string_realloc_buffers (pstr, init_buf_len);
+ if (BE (ret != REG_NOERROR, 0))
+ return ret;
+
+ pstr->mbs_case = (MBS_CASE_ALLOCATED (pstr) ? pstr->mbs_case
+ : (unsigned char *)str);
+ pstr->mbs = MBS_ALLOCATED (pstr) ? pstr->mbs : pstr->mbs_case;
+ pstr->valid_len = (MBS_CASE_ALLOCATED (pstr) || MBS_ALLOCATED (pstr)
+ || MB_CUR_MAX > 1) ? pstr->valid_len : len;
+ return REG_NOERROR;
+}
+
+/* This function allocate the buffers, and initialize them. */
+
+static reg_errcode_t
+re_string_construct (pstr, str, len, trans, icase)
+ re_string_t *pstr;
+ const unsigned char *str;
+ int len, icase;
+ RE_TRANSLATE_TYPE trans;
+{
+ reg_errcode_t ret;
+ re_string_construct_common (str, len, pstr, trans, icase);
+ /* Set 0 so that this function can initialize whole buffers. */
+ pstr->valid_len = 0;
+
+ if (len > 0)
{
- ret = build_wcs_buffer (pstr);
+ ret = re_string_realloc_buffers (pstr, len + 1);
if (BE (ret != REG_NOERROR, 0))
return ret;
}
+ pstr->mbs_case = (MBS_CASE_ALLOCATED (pstr) ? pstr->mbs_case
+ : (unsigned char *)str);
+ pstr->mbs = MBS_ALLOCATED (pstr) ? pstr->mbs : pstr->mbs_case;
+
+ if (icase)
+ {
+#ifdef RE_ENABLE_I18N
+ if (MB_CUR_MAX > 1)
+ build_wcs_upper_buffer (pstr);
+ else
+ build_upper_buffer (pstr);
#endif /* RE_ENABLE_I18N */
- pstr->mbs_case = str;
- if (trans != NULL)
+ }
+ else
{
- ret = re_string_translate_buffer (pstr, trans);
- if (BE (ret != REG_NOERROR, 0))
- return ret;
+#ifdef RE_ENABLE_I18N
+ if (MB_CUR_MAX > 1)
+ build_wcs_buffer (pstr);
+ else
+#endif /* RE_ENABLE_I18N */
+ {
+ if (trans != NULL)
+ re_string_translate_buffer (pstr);
+ else
+ pstr->valid_len = len;
+ }
}
+
+ /* Initialized whole buffers, then valid_len == bufs_len. */
+ pstr->valid_len = pstr->bufs_len;
return REG_NOERROR;
}
-/* Construct string object. We use this function instead of
- re_string_construct for case insensitive mode. */
+/* Helper functions for re_string_allocate, and re_string_construct. */
static reg_errcode_t
-re_string_construct_toupper (pstr, str, len, trans)
+re_string_realloc_buffers (pstr, new_buf_len)
re_string_t *pstr;
- const unsigned char *str;
- int len;
- RE_TRANSLATE_TYPE trans;
+ int new_buf_len;
{
- reg_errcode_t ret;
- /* Set case sensitive buffer. */
- re_string_construct_common (str, len, pstr);
#ifdef RE_ENABLE_I18N
- if (MB_CUR_MAX >1)
+ if (MB_CUR_MAX > 1)
{
- if (BE (pstr->len > 0, 1))
- {
- ret = build_wcs_upper_buffer (pstr);
- if (BE (ret != REG_NOERROR, 0))
- return ret;
- }
+ pstr->wcs = re_realloc (pstr->wcs, wchar_t, new_buf_len);
+ if (BE (pstr->wcs == NULL, 0))
+ return REG_ESPACE;
}
- else
#endif /* RE_ENABLE_I18N */
+ if (MBS_ALLOCATED (pstr))
{
- if (BE (pstr->len > 0, 1))
- {
- ret = build_upper_buffer (pstr);
- if (BE (ret != REG_NOERROR, 0))
- return ret;
- }
+ pstr->mbs = re_realloc (pstr->mbs, unsigned char, new_buf_len);
+ if (BE (pstr->mbs == NULL, 0))
+ return REG_ESPACE;
}
- pstr->mbs_case = str;
- if (trans != NULL)
+ if (MBS_CASE_ALLOCATED (pstr))
{
- ret = re_string_translate_buffer (pstr, trans);
- if (BE (ret != REG_NOERROR, 0))
- return ret;
+ pstr->mbs_case = re_realloc (pstr->mbs_case, unsigned char, new_buf_len);
+ if (BE (pstr->mbs_case == NULL, 0))
+ return REG_ESPACE;
+ if (!MBS_ALLOCATED (pstr))
+ pstr->mbs = pstr->mbs_case;
}
+ pstr->bufs_len = new_buf_len;
return REG_NOERROR;
}
-/* Helper functions for re_string_construct_*. */
+
static void
-re_string_construct_common (str, len, pstr)
+re_string_construct_common (str, len, pstr, trans, icase)
const unsigned char *str;
int len;
re_string_t *pstr;
+ RE_TRANSLATE_TYPE trans;
+ int icase;
{
- pstr->mbs = str;
- pstr->cur_idx = 0;
+ memset (pstr, '\0', sizeof (re_string_t));
+ pstr->raw_mbs = str;
pstr->len = len;
-#ifdef RE_ENABLE_I18N
- pstr->wcs = NULL;
-#endif
- pstr->mbs_case = NULL;
- pstr->mbs_alloc = 0;
- pstr->mbs_case_alloc = 0;
+ pstr->trans = trans;
+ pstr->icase = icase ? 1 : 0;
}
#ifdef RE_ENABLE_I18N
-/* Build wide character buffer for `pstr'.
+/* Build wide character buffer PSTR->WCS.
If the byte sequence of the string are:
<mb1>(0), <mb1>(1), <mb2>(0), <mb2>(1), <sb3>
Then wide character buffer will be:
<wc1> , WEOF , <wc2> , WEOF , <wc3>
We use WEOF for padding, they indicate that the position isn't
- a first byte of a multibyte character. */
+ a first byte of a multibyte character.
-static reg_errcode_t
+ Note that this function assumes PSTR->VALID_LEN elements are already
+ built and starts from PSTR->VALID_LEN. */
+
+static void
build_wcs_buffer (pstr)
re_string_t *pstr;
{
- mbstate_t state, prev_st;
- wchar_t wc;
- int char_idx, char_len, mbclen;
-
- pstr->wcs = re_malloc (wchar_t, pstr->len + 1);
- if (BE (pstr->wcs == NULL, 0))
- return REG_ESPACE;
-
- memset (&state, '\0', sizeof (mbstate_t));
- char_len = pstr->len;
- for (char_idx = 0; char_idx < char_len ;)
+ mbstate_t prev_st;
+ int byte_idx, end_idx, mbclen, remain_len;
+ /* Build the buffers from pstr->valid_len to either pstr->len or
+ pstr->bufs_len. */
+ end_idx = (pstr->bufs_len > pstr->len)? pstr->len : pstr->bufs_len;
+ for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
{
- int next_idx, remain_len = char_len - char_idx;
- prev_st = state;
- mbclen = mbrtowc (&wc, pstr->mbs + char_idx, remain_len, &state);
- if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0))
- /* We treat these cases as a singlebyte character. */
+ wchar_t wc;
+ remain_len = end_idx - byte_idx;
+ prev_st = pstr->cur_state;
+ mbclen = mbrtowc (&wc, pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx,
+ remain_len, &pstr->cur_state);
+ if (BE (mbclen == (size_t) -2, 0))
+ {
+ /* The buffer doesn't have enough space, finish to build. */
+ pstr->cur_state = prev_st;
+ break;
+ }
+ else if (BE (mbclen == (size_t) -1 || mbclen == 0, 0))
{
+ /* We treat these cases as a singlebyte character. */
mbclen = 1;
- wc = (wchar_t) pstr->mbs[char_idx++];
- state = prev_st;
+ wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
+ pstr->cur_state = prev_st;
+ }
+
+ /* Apply the translateion if we need. */
+ if (pstr->trans != NULL && mbclen == 1)
+ {
+ int ch = pstr->trans[pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]];
+ pstr->mbs_case[byte_idx] = ch;
}
/* Write wide character and padding. */
- pstr->wcs[char_idx++] = wc;
- for (next_idx = char_idx + mbclen - 1; char_idx < next_idx ;)
- pstr->wcs[char_idx++] = WEOF;
+ pstr->wcs[byte_idx++] = wc;
+ /* Write paddings. */
+ for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
+ pstr->wcs[byte_idx++] = WEOF;
}
- return REG_NOERROR;
+ pstr->valid_len = byte_idx;
}
-static reg_errcode_t
+/* Build wide character buffer PSTR->WCS like build_wcs_buffer,
+ but for REG_ICASE. */
+
+static void
build_wcs_upper_buffer (pstr)
re_string_t *pstr;
{
- mbstate_t state, prev_st;
- wchar_t wc;
- unsigned char *mbs_upper;
- int char_idx, char_len, mbclen;
-
- pstr->wcs = re_malloc (wchar_t, pstr->len + 1);
- mbs_upper = re_malloc (unsigned char, pstr->len + 1);
- if (BE (pstr->wcs == NULL || mbs_upper == NULL, 0))
+ mbstate_t prev_st;
+ int byte_idx, end_idx, mbclen, remain_len;
+ /* Build the buffers from pstr->valid_len to either pstr->len or
+ pstr->bufs_len. */
+ end_idx = (pstr->bufs_len > pstr->len)? pstr->len : pstr->bufs_len;
+ for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
{
- pstr->wcs = NULL;
- return REG_ESPACE;
- }
-
- memset (&state, '\0', sizeof (mbstate_t));
- char_len = pstr->len;
- for (char_idx = 0 ; char_idx < char_len ; char_idx += mbclen)
- {
- int byte_idx, remain_len = char_len - char_idx;
- prev_st = state;
- mbclen = mbrtowc (&wc, pstr->mbs + char_idx, remain_len, &state);
- if (mbclen == 1)
+ wchar_t wc;
+ remain_len = end_idx - byte_idx;
+ prev_st = pstr->cur_state;
+ mbclen = mbrtowc (&wc, pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx,
+ remain_len, &pstr->cur_state);
+ if (BE (mbclen == (size_t) -2, 0))
{
- pstr->wcs[char_idx] = wc;
- if (islower (pstr->mbs[char_idx]))
- mbs_upper[char_idx] = toupper (pstr->mbs[char_idx]);
- else
- mbs_upper[char_idx] = pstr->mbs[char_idx];
+ /* The buffer doesn't have enough space, finish to build. */
+ pstr->cur_state = prev_st;
+ break;
}
- else if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1
- || mbclen == 0, 0))
- /* We treat these cases as a singlebyte character. */
+ else if (mbclen == 1 || mbclen == (size_t) -1 || mbclen == 0)
{
- mbclen = 1;
- pstr->wcs[char_idx] = (wchar_t) pstr->mbs[char_idx];
- mbs_upper[char_idx] = pstr->mbs[char_idx];
- state = prev_st;
+ /* In case of a singlebyte character. */
+ int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
+ /* Apply the translateion if we need. */
+ if (pstr->trans != NULL && mbclen == 1)
+ {
+ ch = pstr->trans[ch];
+ pstr->mbs_case[byte_idx] = ch;
+ }
+ pstr->wcs[byte_idx] = iswlower (wc) ? toupper (wc) : wc;
+ pstr->mbs[byte_idx++] = islower (ch) ? toupper (ch) : ch;
+ if (BE (mbclen == (size_t) -1, 0))
+ pstr->cur_state = prev_st;
}
else /* mbclen > 1 */
{
- pstr->wcs[char_idx] = wc;
if (iswlower (wc))
- wcrtomb (mbs_upper + char_idx, towupper (wc), &prev_st);
+ wcrtomb (pstr->mbs + byte_idx, towupper (wc), &prev_st);
else
- memcpy (mbs_upper + char_idx, pstr->mbs + char_idx, mbclen);
- for (byte_idx = 1 ; byte_idx < mbclen ; byte_idx++)
- pstr->wcs[char_idx + byte_idx] = WEOF;
+ memcpy (pstr->mbs + byte_idx,
+ pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen);
+ pstr->wcs[byte_idx++] = iswlower (wc) ? toupper (wc) : wc;
+ /* Write paddings. */
+ for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
+ pstr->wcs[byte_idx++] = WEOF;
}
}
- pstr->mbs = mbs_upper;
- pstr->mbs_alloc = 1;
- return REG_NOERROR;
+ pstr->valid_len = byte_idx;
+}
+
+/* Skip characters until the index becomes greater than NEW_RAW_IDX.
+ Return the index. */
+
+static int
+re_string_skip_chars (pstr, new_raw_idx)
+ re_string_t *pstr;
+ int new_raw_idx;
+{
+ mbstate_t prev_st;
+ int rawbuf_idx, mbclen;
+
+ /* Skip the characters which are not necessary to check. */
+ for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_len;
+ rawbuf_idx < new_raw_idx;)
+ {
+ int remain_len = pstr->len - rawbuf_idx;
+ prev_st = pstr->cur_state;
+ mbclen = mbrlen (pstr->raw_mbs + rawbuf_idx, remain_len,
+ &pstr->cur_state);
+ if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0))
+ {
+ /* We treat these cases as a singlebyte character. */
+ mbclen = 1;
+ pstr->cur_state = prev_st;
+ }
+ /* Then proceed the next character. */
+ rawbuf_idx += mbclen;
+ }
+ return rawbuf_idx;
}
#endif /* RE_ENABLE_I18N */
-static reg_errcode_t
+/* Build the buffer PSTR->MBS, and apply the translation if we need.
+ This function is used in case of REG_ICASE. */
+
+static void
build_upper_buffer (pstr)
re_string_t *pstr;
{
- unsigned char *mbs_upper;
- int char_idx, char_len;
-
- mbs_upper = re_malloc (unsigned char, pstr->len + 1);
- if (BE (mbs_upper == NULL, 0))
- return REG_ESPACE;
+ int char_idx, end_idx;
+ end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
- char_len = pstr->len;
- for (char_idx = 0 ; char_idx < char_len ; char_idx ++)
+ for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx)
{
- if (islower (pstr->mbs[char_idx]))
- mbs_upper[char_idx] = toupper (pstr->mbs[char_idx]);
+ int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx];
+ if (pstr->trans != NULL)
+ {
+ ch = pstr->trans[ch];
+ pstr->mbs_case[char_idx] = ch;
+ }
+ if (islower (ch))
+ pstr->mbs[char_idx] = toupper (ch);
else
- mbs_upper[char_idx] = pstr->mbs[char_idx];
+ pstr->mbs[char_idx] = ch;
}
- pstr->mbs = mbs_upper;
- pstr->mbs_alloc = 1;
- return REG_NOERROR;
+ pstr->valid_len = char_idx;
}
-/* Apply TRANS to the buffer in PSTR. We assume that wide char buffer
- is already constructed if MB_CUR_MAX > 1. */
+/* Apply TRANS to the buffer in PSTR. */
-static reg_errcode_t
-re_string_translate_buffer (pstr, trans)
+static void
+re_string_translate_buffer (pstr)
re_string_t *pstr;
- RE_TRANSLATE_TYPE trans;
{
- int buf_idx;
- unsigned char *transed_buf, *transed_case_buf;
-#ifdef DEBUG
- assert (trans != NULL);
-#endif
- if (pstr->mbs_alloc)
+ int buf_idx, end_idx;
+ end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
+
+ for (buf_idx = pstr->valid_len; buf_idx < end_idx; ++buf_idx)
{
- transed_buf = (unsigned char *) pstr->mbs;
- transed_case_buf = re_malloc (unsigned char, pstr->len + 1);
- if (BE (transed_case_buf == NULL, 0))
- return REG_ESPACE;
- pstr->mbs_case_alloc = 1;
+ int ch = pstr->raw_mbs[pstr->raw_mbs_idx + buf_idx];
+ pstr->mbs_case[buf_idx] = pstr->trans[ch];
}
- else
+
+ pstr->valid_len = buf_idx;
+}
+
+/* This function re-construct the buffers.
+ Concretely, convert to wide character in case of MB_CUR_MAX > 1,
+ convert to upper case in case of REG_ICASE, apply translation. */
+
+static reg_errcode_t
+re_string_reconstruct (pstr, idx, eflags, newline)
+ re_string_t *pstr;
+ int idx, eflags, newline;
+{
+ int offset = idx - pstr->raw_mbs_idx;
+ if (offset < 0)
{
- transed_buf = re_malloc (unsigned char, pstr->len + 1);
- if (BE (transed_buf == NULL, 0))
- return REG_ESPACE;
- transed_case_buf = NULL;
- pstr->mbs_alloc = 1;
+ /* Reset buffer. */
+ memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
+ pstr->valid_len = pstr->raw_mbs_idx = 0;
+ pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
+ : CONTEXT_NEWLINE | CONTEXT_BEGBUF);
+ if (!MBS_CASE_ALLOCATED (pstr))
+ pstr->mbs_case = (unsigned char *)pstr->raw_mbs;
+ if (!MBS_ALLOCATED (pstr) && !MBS_CASE_ALLOCATED (pstr))
+ pstr->mbs = (unsigned char *)pstr->raw_mbs;
+ offset = idx;
}
- for (buf_idx = 0 ; buf_idx < pstr->len ; buf_idx++)
+
+ if (offset != 0)
{
+ pstr->tip_context = re_string_context_at (pstr, offset - 1, eflags,
+ newline);
+ /* Are the characters which are already checked remain? */
+ if (offset < pstr->valid_len)
+ {
+ /* Yes, move them to the front of the buffer. */
#ifdef RE_ENABLE_I18N
- if (MB_CUR_MAX > 1 && !re_string_is_single_byte_char (pstr, buf_idx))
- transed_buf[buf_idx] = pstr->mbs[buf_idx];
- else
+ if (MB_CUR_MAX > 1)
+ memmove (pstr->wcs, pstr->wcs + offset,
+ (pstr->valid_len - offset) * sizeof (wchar_t));
+#endif /* RE_ENABLE_I18N */
+ if (MBS_ALLOCATED (pstr))
+ memmove (pstr->mbs, pstr->mbs + offset,
+ pstr->valid_len - offset);
+ if (MBS_CASE_ALLOCATED (pstr))
+ memmove (pstr->mbs_case, pstr->mbs_case + offset,
+ pstr->valid_len - offset);
+ pstr->valid_len -= offset;
+#if DEBUG
+ assert (pstr->valid_len > 0);
#endif
- transed_buf[buf_idx] = trans[pstr->mbs[buf_idx]];
- if (transed_case_buf)
+ }
+ else
{
+ /* No, skip all characters until IDX. */
+ pstr->valid_len = 0;
#ifdef RE_ENABLE_I18N
- if (MB_CUR_MAX > 1 && !re_string_is_single_byte_char (pstr, buf_idx))
- transed_case_buf[buf_idx] = pstr->mbs_case[buf_idx];
- else
-#endif
- transed_case_buf[buf_idx] = trans[pstr->mbs_case[buf_idx]];
+ if (MB_CUR_MAX > 1)
+ {
+ int wcs_idx;
+ pstr->valid_len = re_string_skip_chars (pstr, idx) - idx;
+ for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx)
+ pstr->wcs[wcs_idx] = WEOF;
+ }
+#endif /* RE_ENABLE_I18N */
+ }
+ if (!MBS_CASE_ALLOCATED (pstr))
+ {
+ pstr->mbs_case += offset;
+ /* In case of !MBS_ALLOCATED && !MBS_CASE_ALLOCATED. */
+ if (!MBS_ALLOCATED (pstr))
+ pstr->mbs += offset;
}
}
- if (pstr->mbs_case_alloc == 1)
+ pstr->raw_mbs_idx = idx;
+ pstr->len -= offset;
+
+ /* Then build the buffers. */
+#ifdef RE_ENABLE_I18N
+ if (MB_CUR_MAX > 1)
{
- pstr->mbs = transed_buf;
- pstr->mbs_case = transed_case_buf;
+ if (pstr->icase)
+ build_wcs_upper_buffer (pstr);
+ else
+ build_wcs_buffer (pstr);
}
else
+#endif /* RE_ENABLE_I18N */
{
- pstr->mbs = transed_buf;
- pstr->mbs_case = transed_buf;
+ if (pstr->icase)
+ build_upper_buffer (pstr);
+ else if (pstr->trans != NULL)
+ re_string_translate_buffer (pstr);
}
+ pstr->cur_idx = 0;
+
return REG_NOERROR;
}
@@ -365,13 +498,14 @@ re_string_destruct (pstr)
#ifdef RE_ENABLE_I18N
re_free (pstr->wcs);
#endif /* RE_ENABLE_I18N */
- if (pstr->mbs_alloc)
- re_free ((void *) pstr->mbs);
- if (pstr->mbs_case_alloc)
- re_free ((void *) pstr->mbs_case);
+ if (MBS_ALLOCATED (pstr))
+ re_free (pstr->mbs);
+ if (MBS_CASE_ALLOCATED (pstr))
+ re_free (pstr->mbs_case);
}
/* Return the context at IDX in INPUT. */
+
static unsigned int
re_string_context_at (input, idx, eflags, newline_anchor)
const re_string_t *input;
@@ -380,17 +514,13 @@ re_string_context_at (input, idx, eflags, newline_anchor)
int c;
if (idx < 0 || idx == input->len)
{
- unsigned int context = 0;
if (idx < 0)
- context = CONTEXT_BEGBUF;
+ /* In this case, we use the value stored in input->tip_context,
+ since we can't know the character in input->mbs[-1] here. */
+ return input->tip_context;
else /* (idx == input->len) */
- context = CONTEXT_ENDBUF;
-
- if ((idx < 0 && !(eflags & REG_NOTBOL))
- || (idx == input->len && !(eflags & REG_NOTEOL)))
- return CONTEXT_NEWLINE | context;
- else
- return context;
+ return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF
+ : CONTEXT_NEWLINE | CONTEXT_ENDBUF);
}
c = re_string_byte_at (input, idx);
if (IS_WORD_CHAR (c))
@@ -737,6 +867,7 @@ re_node_set_insert (set, elem)
if (set->nelem - idx > 0)
memcpy (new_array + idx + 1, set->elems + idx,
sizeof (int) * (set->nelem - idx));
+ re_free (set->elems);
set->elems = new_array;
}
else