From 8887a920a4b81a500f54893250085e0d1a52cf9a Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Sat, 28 May 2011 17:14:30 -0400 Subject: Fix unnecessary overallocation due to incomplete character When incomplete characters are found at the end of a string the code ran amok and allocated lots of memory. Stricter limits are now in place. --- posix/Makefile | 2 +- posix/bug-regex32.c | 36 ++++++++++++++++++++++++++++++++++++ posix/regex_internal.c | 24 ++++++++++++++---------- posix/regexec.c | 10 ++++++---- 4 files changed, 57 insertions(+), 15 deletions(-) create mode 100644 posix/bug-regex32.c (limited to 'posix') diff --git a/posix/Makefile b/posix/Makefile index e89f21e..499d53d 100644 --- a/posix/Makefile +++ b/posix/Makefile @@ -82,7 +82,7 @@ tests := tstgetopt testfnm runtests runptests \ bug-regex17 bug-regex18 bug-regex19 bug-regex20 \ bug-regex21 bug-regex22 bug-regex23 bug-regex24 \ bug-regex25 bug-regex26 bug-regex27 bug-regex28 \ - bug-regex29 bug-regex30 bug-regex31 \ + bug-regex29 bug-regex30 bug-regex31 bug-regex32 \ tst-nice tst-nanosleep tst-regex2 \ transbug tst-rxspencer tst-pcre tst-boost \ bug-ga1 tst-vfork1 tst-vfork2 tst-vfork3 tst-waitid \ diff --git a/posix/bug-regex32.c b/posix/bug-regex32.c new file mode 100644 index 0000000..525232c --- /dev/null +++ b/posix/bug-regex32.c @@ -0,0 +1,36 @@ +// BZ 12811 +#include +#include +#include + +static int +do_test (void) +{ + char buf[1000]; + regex_t preg; + if (setlocale (LC_CTYPE, "de_DE.UTF-8") == NULL) + { + puts ("setlocale failed"); + return 1; + } + + int e = regcomp (&preg, ".*ab", REG_ICASE); + if (e != 0) + { + regerror (e, &preg, buf, sizeof (buf)); + printf ("regcomp = %d \"%s\"\n", e, buf); + return 1; + } + + // Incomplete character at the end of the buffer + e = regexec (&preg, "aaaaaaaaaaaa\xc4", 0, NULL, 0); + + regfree (&preg); + regerror (e, &preg, buf, sizeof (buf)); + printf ("regexec = %d \"%s\"\n", e, buf); + + return e != REG_NOMATCH; +} + +#define TEST_FUNCTION do_test () +#include "../test-skeleton.c" diff --git a/posix/regex_internal.c b/posix/regex_internal.c index 8183a29..285ae3b 100644 --- a/posix/regex_internal.c +++ b/posix/regex_internal.c @@ -237,13 +237,8 @@ build_wcs_buffer (re_string_t *pstr) else p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx; mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state); - if (BE (mbclen == (size_t) -2, 0)) - { - /* The buffer doesn't have enough space, finish to build. */ - pstr->cur_state = prev_st; - break; - } - else if (BE (mbclen == (size_t) -1 || mbclen == 0, 0)) + if (BE (mbclen == (size_t) -1 || mbclen == 0 + || (mbclen == (size_t) -2 && pstr->bufs_len >= pstr->len), 0)) { /* We treat these cases as a singlebyte character. */ mbclen = 1; @@ -252,6 +247,12 @@ build_wcs_buffer (re_string_t *pstr) wc = pstr->trans[wc]; pstr->cur_state = prev_st; } + else if (BE (mbclen == (size_t) -2, 0)) + { + /* The buffer doesn't have enough space, finish to build. */ + pstr->cur_state = prev_st; + break; + } /* Write wide character and padding. */ pstr->wcs[byte_idx++] = wc; @@ -334,9 +335,11 @@ build_wcs_upper_buffer (re_string_t *pstr) for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;) pstr->wcs[byte_idx++] = WEOF; } - else if (mbclen == (size_t) -1 || mbclen == 0) + else if (mbclen == (size_t) -1 || mbclen == 0 + || (mbclen == (size_t) -2 && pstr->bufs_len >= pstr->len)) { - /* It is an invalid character or '\0'. Just use the byte. */ + /* It is an invalid character, an incomplete character + at the end of the string, or '\0'. Just use the byte. */ int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]; pstr->mbs[byte_idx] = ch; /* And also cast it to wide char. */ @@ -449,7 +452,8 @@ build_wcs_upper_buffer (re_string_t *pstr) for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;) pstr->wcs[byte_idx++] = WEOF; } - else if (mbclen == (size_t) -1 || mbclen == 0) + else if (mbclen == (size_t) -1 || mbclen == 0 + || (mbclen == (size_t) -2 && pstr->bufs_len >= pstr->len)) { /* It is an invalid character or '\0'. Just use the byte. */ int ch = pstr->raw_mbs[pstr->raw_mbs_idx + src_idx]; diff --git a/posix/regexec.c b/posix/regexec.c index 8d4475c..9e0c565 100644 --- a/posix/regexec.c +++ b/posix/regexec.c @@ -1,5 +1,5 @@ /* Extended regular expression matching and search library. - Copyright (C) 2002-2005, 2007, 2009, 2010 Free Software Foundation, Inc. + Copyright (C) 2002-2005,2007,2009,2010,2011 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Isamu Hasegawa . @@ -1156,7 +1156,8 @@ check_matching (re_match_context_t *mctx, int fl_longest_match, re_dfastate_t *old_state = cur_state; int next_char_idx = re_string_cur_idx (&mctx->input) + 1; - if (BE (next_char_idx >= mctx->input.bufs_len, 0) + if ((BE (next_char_idx >= mctx->input.bufs_len, 0) + && mctx->input.bufs_len < mctx->input.len) || (BE (next_char_idx >= mctx->input.valid_len, 0) && mctx->input.valid_len < mctx->input.len)) { @@ -1732,7 +1733,8 @@ clean_state_log_if_needed (re_match_context_t *mctx, int next_state_log_idx) { int top = mctx->state_log_top; - if (next_state_log_idx >= mctx->input.bufs_len + if ((next_state_log_idx >= mctx->input.bufs_len + && mctx->input.bufs_len < mctx->input.len) || (next_state_log_idx >= mctx->input.valid_len && mctx->input.valid_len < mctx->input.len)) { @@ -4111,7 +4113,7 @@ extend_buffers (re_match_context_t *mctx) return REG_ESPACE; /* Double the lengthes of the buffers. */ - ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2); + ret = re_string_realloc_buffers (pstr, MIN (pstr->len, pstr->bufs_len * 2)); if (BE (ret != REG_NOERROR, 0)) return ret; -- cgit v1.1