From 33e63e7993d50ca4a8409aefaccf247301c0e8f2 Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Sun, 4 Jun 2006 04:59:36 +0000 Subject: * posix/regex_internal.c (re_string_skip_chars): If no character has been converted at all, set *last_wc to WEOF. If mbrtowc failed, set wc to the byte which couldn't be converted. (re_string_reconstruct): Don't clear valid_raw_len before calling re_string_skip_chars. If wc is WEOF after re_string_skip_chars, set tip_context using re_string_context_at. * posix/Makefile: Add rules to build and run bug-regex25 test. * posix/bug-regex25.c: New test. --- ChangeLog | 11 ++++++++++ posix/Makefile | 3 ++- posix/bug-regex25.c | 57 ++++++++++++++++++++++++++++++++++++++++++++++++++ posix/regex_internal.c | 26 +++++++++++++++-------- 4 files changed, 87 insertions(+), 10 deletions(-) create mode 100644 posix/bug-regex25.c diff --git a/ChangeLog b/ChangeLog index c299adc..6999e7d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,14 @@ +2006-06-02 Jakub Jelinek + + * posix/regex_internal.c (re_string_skip_chars): If no character has + been converted at all, set *last_wc to WEOF. If mbrtowc failed, set wc + to the byte which couldn't be converted. + (re_string_reconstruct): Don't clear valid_raw_len before calling + re_string_skip_chars. If wc is WEOF after re_string_skip_chars, set + tip_context using re_string_context_at. + * posix/Makefile: Add rules to build and run bug-regex25 test. + * posix/bug-regex25.c: New test. + 2006-06-02 Ryan S. Arnold [BZ #2703] diff --git a/posix/Makefile b/posix/Makefile index 418e1cb..a9d9910 100644 --- a/posix/Makefile +++ b/posix/Makefile @@ -81,7 +81,7 @@ tests := tstgetopt testfnm runtests runptests \ bug-regex13 bug-regex14 bug-regex15 bug-regex16 \ bug-regex17 bug-regex18 bug-regex19 bug-regex20 \ bug-regex21 bug-regex22 bug-regex23 bug-regex24 \ - tst-nice tst-nanosleep tst-regex2 \ + bug-regex25 tst-nice tst-nanosleep tst-regex2 \ transbug tst-rxspencer tst-pcre tst-boost \ bug-ga1 tst-vfork1 tst-vfork2 tst-waitid \ tst-getaddrinfo2 bug-glob1 bug-glob2 tst-sysconf \ @@ -188,6 +188,7 @@ bug-regex19-ENV = LOCPATH=$(common-objpfx)localedata bug-regex20-ENV = LOCPATH=$(common-objpfx)localedata bug-regex22-ENV = LOCPATH=$(common-objpfx)localedata bug-regex23-ENV = LOCPATH=$(common-objpfx)localedata +bug-regex25-ENV = LOCPATH=$(common-objpfx)localedata tst-rxspencer-ARGS = --utf8 rxspencer/tests tst-rxspencer-ENV = LOCPATH=$(common-objpfx)localedata tst-pcre-ARGS = PCRE.tests diff --git a/posix/bug-regex25.c b/posix/bug-regex25.c new file mode 100644 index 0000000..5e56e49 --- /dev/null +++ b/posix/bug-regex25.c @@ -0,0 +1,57 @@ +/* Test re_search in multibyte locale other than UTF-8. + Copyright (C) 2006 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Jakub Jelinek , 2006. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include +#include +#include +#include + +const char *str1 = "\xa3\xd8\xa3\xc9\xa3\xc9"; +const char *str2 = "\xa3\xd8\xa3\xc9"; + +int +main (void) +{ + setlocale (LC_ALL, "ja_JP.eucJP"); + + re_set_syntax (RE_SYNTAX_SED); + + struct re_pattern_buffer re; + memset (&re, 0, sizeof (re)); + + struct re_registers regs; + memset (®s, 0, sizeof (regs)); + + re_compile_pattern ("$", 1, &re); + + int ret = 0, r = re_search (&re, str1, 4, 0, 4, ®s); + if (r != 4) + { + printf ("First re_search returned %d\n", r); + ret = 1; + } + r = re_search (&re, str2, 4, 0, 4, ®s); + if (r != 4) + { + printf ("Second re_search returned %d\n", r); + ret = 1; + } + return ret; +} diff --git a/posix/regex_internal.c b/posix/regex_internal.c index 855497e..ac312db 100644 --- a/posix/regex_internal.c +++ b/posix/regex_internal.c @@ -482,7 +482,7 @@ re_string_skip_chars (re_string_t *pstr, int new_raw_idx, wint_t *last_wc) mbstate_t prev_st; int rawbuf_idx; size_t mbclen; - wchar_t wc = 0; + wchar_t wc = WEOF; /* Skip the characters which are not necessary to check. */ for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_raw_len; @@ -495,7 +495,11 @@ re_string_skip_chars (re_string_t *pstr, int new_raw_idx, wint_t *last_wc) remain_len, &pstr->cur_state); if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0)) { - /* We treat these cases as a singlebyte character. */ + /* We treat these cases as a single byte character. */ + if (mbclen == 0 || remain_len == 0) + wc = L'\0'; + else + wc = *(unsigned char *) (pstr->raw_mbs + rawbuf_idx); mbclen = 1; pstr->cur_state = prev_st; } @@ -618,7 +622,6 @@ re_string_reconstruct (re_string_t *pstr, int idx, int eflags) } #endif pstr->valid_len = 0; - pstr->valid_raw_len = 0; #ifdef RE_ENABLE_I18N if (pstr->mb_cur_max > 1) { @@ -681,6 +684,16 @@ re_string_reconstruct (re_string_t *pstr, int idx, int eflags) if (wc == WEOF) pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx; + if (wc == WEOF) + pstr->tip_context + = re_string_context_at (pstr, pstr->valid_raw_len - 1, eflags); + else + pstr->tip_context = ((BE (pstr->word_ops_used != 0, 0) + && IS_WIDE_WORD_CHAR (wc)) + ? CONTEXT_WORD + : ((IS_WIDE_NEWLINE (wc) + && pstr->newline_anchor) + ? CONTEXT_NEWLINE : 0)); if (BE (pstr->valid_len, 0)) { for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx) @@ -689,17 +702,12 @@ re_string_reconstruct (re_string_t *pstr, int idx, int eflags) memset (pstr->mbs, 255, pstr->valid_len); } pstr->valid_raw_len = pstr->valid_len; - pstr->tip_context = ((BE (pstr->word_ops_used != 0, 0) - && IS_WIDE_WORD_CHAR (wc)) - ? CONTEXT_WORD - : ((IS_WIDE_NEWLINE (wc) - && pstr->newline_anchor) - ? CONTEXT_NEWLINE : 0)); } else #endif /* RE_ENABLE_I18N */ { int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - 1]; + pstr->valid_raw_len = 0; if (pstr->trans) c = pstr->trans[c]; pstr->tip_context = (bitset_contain (pstr->word_char, c) -- cgit v1.1