From c0a0f9a32c8baa6ab93d00eb42d92c02e9e146d7 Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Fri, 24 May 2002 08:49:00 +0000 Subject: Update. 2002-05-21 Isamu Hasegawa * posix/regex.c: Define `inline' as a macro into nothing for the compilers which lack the keyword. * posix/regex.h: (RE_SYNTAX_GNU_AWK): Remove RE_CONTEXT_INVALID_OPS for the compatibility of gawk. * posix/regcomp.c: Add fake implementation of isblank() for the environments which lack the function. Don't use free_charset() in case of non-i18n envs. (build_range_exp): Don't use i18n related code in case of non-i18n envs. (build_collating_symbol): Likewise. (build_equiv_class): Likewise. (build_charclass): Likewise. (re_compile_fastmap_iter): Likewise. (parse_bracket_exp): Likewise. (build_word_op): Likewise. (regfree): Don't use free_charset() in case of non-i18n envs. * posix/regex_internal.h: Remove COMPLEX_BRACKET from re_token_type_t in case of non-i18n envs. Don't define re_charset_t in case of non-i18n envs. Change the type of wcs of re_string_t from wchar_t to wint_t, since we store also WEOF. * posix/regex_internal.c: (re_string_realloc_buffers): Change the type of wcs of re_string_t from wchar_t to wint_t. (re_string_reconstruct): Likewise. (create_ci_newstate): Don't use i18n related code in case of non-i18n envs. (create_cd_newstate): Likewise. 2002-05-24 Ulrich Drepper * iconv/loop.c: Fix typo. 2002-05-23 Jakub Jelinek * inet/ether_line.c (ether_line): Fix a typo causing only lower 4 bits of each ethernet address byte being assigned. Don't modify what line points to. * inet/tst-ether_aton.c (main): Add ether_line tests. 2002-05-23 Marcus Brinkmann * manual/filesys.texi: Don't make readlink example leak memory when readlink fails. --- ChangeLog | 46 +++++++ iconv/loop.c | 4 +- inet/ether_line.c | 10 +- inet/tst-ether_aton.c | 62 ++++++--- linuxthreads/ChangeLog | 4 + linuxthreads/sysdeps/ia64/pt-machine.h | 4 +- manual/filesys.texi | 9 +- posix/regcomp.c | 239 ++++++++++++++++++++++++++------- posix/regex.c | 1 + posix/regex.h | 3 +- posix/regex_internal.c | 10 +- posix/regex_internal.h | 22 +-- 12 files changed, 324 insertions(+), 90 deletions(-) diff --git a/ChangeLog b/ChangeLog index a691f78..471f60f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,49 @@ +2002-05-21 Isamu Hasegawa + + * posix/regex.c: Define `inline' as a macro into nothing for the + compilers which lack the keyword. + * posix/regex.h: (RE_SYNTAX_GNU_AWK): Remove RE_CONTEXT_INVALID_OPS + for the compatibility of gawk. + * posix/regcomp.c: Add fake implementation of isblank() for the + environments which lack the function. + Don't use free_charset() in case of non-i18n envs. + (build_range_exp): Don't use i18n related code in case of non-i18n + envs. + (build_collating_symbol): Likewise. + (build_equiv_class): Likewise. + (build_charclass): Likewise. + (re_compile_fastmap_iter): Likewise. + (parse_bracket_exp): Likewise. + (build_word_op): Likewise. + (regfree): Don't use free_charset() in case of non-i18n envs. + * posix/regex_internal.h: Remove COMPLEX_BRACKET from + re_token_type_t in case of non-i18n envs. + Don't define re_charset_t in case of non-i18n envs. + Change the type of wcs of re_string_t from wchar_t to wint_t, + since we store also WEOF. + * posix/regex_internal.c: (re_string_realloc_buffers): Change + the type of wcs of re_string_t from wchar_t to wint_t. + (re_string_reconstruct): Likewise. + (create_ci_newstate): Don't use i18n related code in case of + non-i18n envs. + (create_cd_newstate): Likewise. + +2002-05-24 Ulrich Drepper + + * iconv/loop.c: Fix typo. + +2002-05-23 Jakub Jelinek + + * inet/ether_line.c (ether_line): Fix a typo causing only + lower 4 bits of each ethernet address byte being assigned. + Don't modify what line points to. + * inet/tst-ether_aton.c (main): Add ether_line tests. + +2002-05-23 Marcus Brinkmann + + * manual/filesys.texi: Don't make readlink example leak memory + when readlink fails. + 2002-05-20 Ulrich Drepper * iconv/loop.c: Fix condition for defining unaligned loop. Add diff --git a/iconv/loop.c b/iconv/loop.c index 3609c19..0b1bce7 100644 --- a/iconv/loop.c +++ b/iconv/loop.c @@ -145,8 +145,8 @@ /* We produce at least one byte in the next round. */ #ifndef MIN_NEEDED_OUTPUT # error "MIN_NEEDED_OUTPUT definition missing" -#elif MIN_NEEDED_INPUT < 1 -# error "MIN_NEEDED_INPUT must be >= 1" +#elif MIN_NEEDED_OUTPUT < 1 +# error "MIN_NEEDED_OUTPUT must be >= 1" #endif /* Let's see how many bytes we produce. */ diff --git a/inet/ether_line.c b/inet/ether_line.c index d9ad918..7e871a6 100644 --- a/inet/ether_line.c +++ b/inet/ether_line.c @@ -1,4 +1,4 @@ -/* Copyright (C) 1996, 1999 Free Software Foundation, Inc. +/* Copyright (C) 1996, 1999, 2002 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -46,7 +46,7 @@ ether_line (const char *line, struct ether_addr *addr, char *hostname) if ((ch < '0' || ch > '9') && (ch < 'a' || ch > 'f')) return -1; number <<= 4; - number = isdigit (ch) ? (ch - '0') : (ch - 'a' + 10); + number += isdigit (ch) ? (ch - '0') : (ch - 'a' + 10); ch = *line; if (cnt < 5 && ch != ':') @@ -65,15 +65,15 @@ ether_line (const char *line, struct ether_addr *addr, char *hostname) cp = __strchrnul (line, '#'); while (cp > line && isspace (cp[-1])) --cp; - *cp = '\0'; - if (*line == '\0') + if (cp == line) /* No hostname. */ return -1; /* XXX This can cause trouble because the hostname might be too long but we have no possibility to check it here. */ - strcpy (hostname, line); + memcpy (hostname, line, cp - line); + hostname [cp - line] = '\0'; return 0; } diff --git a/inet/tst-ether_aton.c b/inet/tst-ether_aton.c index 24718f4..94ceec8 100644 --- a/inet/tst-ether_aton.c +++ b/inet/tst-ether_aton.c @@ -1,29 +1,57 @@ #include +#include #include int main (int argc, char *argv[]) { - struct ether_addr *val; - int result; + struct ether_addr *valp, val; + int result, r; + char hostname[32], buf[64], *p; - val = ether_aton ("12:34:56:78:9a:bc"); + valp = ether_aton ("12:34:56:78:9a:bc"); printf ("ether_aton (\"12:34:56:78:9a:bc\") = %hhx:%hhx:%hhx:%hhx:%hhx:%hhx\n", - val->ether_addr_octet[0], - val->ether_addr_octet[1], - val->ether_addr_octet[2], - val->ether_addr_octet[3], - val->ether_addr_octet[4], - val->ether_addr_octet[5]); - - - result = (val->ether_addr_octet[0] != 0x12 - || val->ether_addr_octet[1] != 0x34 - || val->ether_addr_octet[2] != 0x56 - || val->ether_addr_octet[3] != 0x78 - || val->ether_addr_octet[4] != 0x9a - || val->ether_addr_octet[5] != 0xbc); + valp->ether_addr_octet[0], + valp->ether_addr_octet[1], + valp->ether_addr_octet[2], + valp->ether_addr_octet[3], + valp->ether_addr_octet[4], + valp->ether_addr_octet[5]); + + result = (valp->ether_addr_octet[0] != 0x12 + || valp->ether_addr_octet[1] != 0x34 + || valp->ether_addr_octet[2] != 0x56 + || valp->ether_addr_octet[3] != 0x78 + || valp->ether_addr_octet[4] != 0x9a + || valp->ether_addr_octet[5] != 0xbc); + + if ((r = ether_line ("0:c0:f0:46:5f:97 host.ether.com \t# comment", + &val, hostname)) == 0) + { + ether_ntoa_r (&val, buf); + p = strchr (buf, '\0'); + *p++ = ' '; + strcpy (p, hostname); + + printf ("ether_line (\"0:c0:f0:46:5f:97 host.ether.com\") = \"%s\"\n", + buf); + + result |= strcmp ("0:c0:f0:46:5f:97 host.ether.com", buf) != 0; + } + else + { + printf ("ether_line (\"0:c0:f0:46:5f:97 host.ether.com\") = %d\n", r); + result |= 1; + } + + r = ether_line ("0:c0:2:d0 foo.bar ", &val, hostname); + printf ("ether_line (\"0:c0:2:d0 foo.bar \") = %d\n", r); + result |= r != -1; + + r = ether_line ("0:c0:2:d0:1a:2a ", &val, hostname); + printf ("ether_line (\"0:c0:2:d0:1a:2a \") = %d\n", r); + result |= r != -1; return result; } diff --git a/linuxthreads/ChangeLog b/linuxthreads/ChangeLog index 2e65b83..b4ed82d 100644 --- a/linuxthreads/ChangeLog +++ b/linuxthreads/ChangeLog @@ -1,3 +1,7 @@ +2002-05-24 Ulrich Drepper + + * sysdeps/ia64/pt-machine.h (MEMORY_BARRIER): Use __sync_synchronize. + 2002-05-21 Ulrich Drepper * sysdeps/pthread/pthread.h (pthread_create): Rename first diff --git a/linuxthreads/sysdeps/ia64/pt-machine.h b/linuxthreads/sysdeps/ia64/pt-machine.h index 3d35533..8adead3 100644 --- a/linuxthreads/sysdeps/ia64/pt-machine.h +++ b/linuxthreads/sysdeps/ia64/pt-machine.h @@ -21,6 +21,8 @@ #ifndef _PT_MACHINE_H #define _PT_MACHINE_H 1 +#include + #ifndef PT_EI # define PT_EI extern inline #endif @@ -69,7 +71,7 @@ register struct _pthread_descr_struct *__thread_self __asm__("r13"); /* Memory barrier */ -#define MEMORY_BARRIER() __asm__ __volatile__("mf" : : : "memory") +#define MEMORY_BARRIER() __sync_syncronize () #define HAS_COMPARE_AND_SWAP_WITH_RELEASE_SEMANTICS diff --git a/manual/filesys.texi b/manual/filesys.texi index e715ec7..a74f32d 100644 --- a/manual/filesys.texi +++ b/manual/filesys.texi @@ -1141,16 +1141,19 @@ char * readlink_malloc (const char *filename) @{ int size = 100; + char *buffer = NULL; while (1) @{ - char *buffer = (char *) xmalloc (size); + buffer = (char *) xrealloc (buffer, size); int nchars = readlink (filename, buffer, size); if (nchars < 0) - return NULL; + @{ + free (buffer); + return NULL; + @} if (nchars < size) return buffer; - free (buffer); size *= 2; @} @} diff --git a/posix/regcomp.c b/posix/regcomp.c index 59836b1..b9b0560 100644 --- a/posix/regcomp.c +++ b/posix/regcomp.c @@ -28,6 +28,11 @@ #include #include +/* In case that the system doesn't have isblank(). */ +#if !defined _LIBC && !defined HAVE_ISBLANK && !defined isblank +# define isblank(ch) ((ch) == ' ' || (ch) == '\t') +#endif + #ifdef _LIBC # ifndef _RE_DEFINE_LOCALE_FUNCTIONS # define _RE_DEFINE_LOCALE_FUNCTIONS 1 @@ -65,7 +70,9 @@ static void re_compile_fastmap_iter (regex_t *bufp, char *fastmap); static reg_errcode_t init_dfa (re_dfa_t *dfa, int pat_len); static reg_errcode_t init_word_char (re_dfa_t *dfa); +#ifdef RE_ENABLE_I18N static void free_charset (re_charset_t *cset); +#endif /* RE_ENABLE_I18N */ static void free_workarea_compile (regex_t *preg); static reg_errcode_t create_initial_state (re_dfa_t *dfa); static reg_errcode_t analyze (re_dfa_t *dfa); @@ -115,24 +122,40 @@ static reg_errcode_t parse_bracket_symbol (bracket_elem_t *elem, re_string_t *regexp, re_token_t *token); #ifndef _LIBC -static reg_errcode_t build_range_exp (re_charset_t *mbcset, - re_bitset_ptr_t sbcset, int *range_alloc, +# ifdef RE_ENABLE_I18N +static reg_errcode_t build_range_exp (re_bitset_ptr_t sbcset, + re_charset_t *mbcset, int *range_alloc, bracket_elem_t *start_elem, bracket_elem_t *end_elem); -static reg_errcode_t build_collating_symbol (re_charset_t *mbcset, - re_bitset_ptr_t sbcset, +static reg_errcode_t build_collating_symbol (re_bitset_ptr_t sbcset, + re_charset_t *mbcset, int *coll_sym_alloc, unsigned char *name); +# else /* not RE_ENABLE_I18N */ +static reg_errcode_t build_range_exp (re_bitset_ptr_t sbcset, + bracket_elem_t *start_elem, + bracket_elem_t *end_elem); +static reg_errcode_t build_collating_symbol (re_bitset_ptr_t sbcset, + unsigned char *name); +# endif /* not RE_ENABLE_I18N */ #endif /* not _LIBC */ -static reg_errcode_t build_equiv_class (re_charset_t *mbcset, - re_bitset_ptr_t sbcset, +#ifdef RE_ENABLE_I18N +static reg_errcode_t build_equiv_class (re_bitset_ptr_t sbcset, + re_charset_t *mbcset, int *equiv_class_alloc, const unsigned char *name); -static reg_errcode_t build_charclass (re_charset_t *mbcset, - re_bitset_ptr_t sbcset, +static reg_errcode_t build_charclass (re_bitset_ptr_t sbcset, + re_charset_t *mbcset, int *char_class_alloc, const unsigned char *class_name, reg_syntax_t syntax); +#else /* not RE_ENABLE_I18N */ +static reg_errcode_t build_equiv_class (re_bitset_ptr_t sbcset, + const unsigned char *name); +static reg_errcode_t build_charclass (re_bitset_ptr_t sbcset, + const unsigned char *class_name, + reg_syntax_t syntax); +#endif /* not RE_ENABLE_I18N */ static bin_tree_t *build_word_op (re_dfa_t *dfa, int not, reg_errcode_t *err); static void free_bin_tree (bin_tree_t *tree); static bin_tree_t *create_tree (bin_tree_t *left, bin_tree_t *right, @@ -340,6 +363,7 @@ re_compile_fastmap_iter (bufp, init_state, fastmap) if (dfa->nodes[node].opr.sbcset[i] & (1 << j)) fastmap[ch] = 1; } +#ifdef RE_ENABLE_I18N else if (type == COMPLEX_BRACKET) { int i; @@ -347,7 +371,7 @@ re_compile_fastmap_iter (bufp, init_state, fastmap) if (cset->non_match || cset->ncoll_syms || cset->nequiv_classes || cset->nranges || cset->nchar_classes) { -#ifdef _LIBC +# ifdef _LIBC if (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES) != 0) { /* In this case we want to catch the bytes which are @@ -364,14 +388,12 @@ re_compile_fastmap_iter (bufp, init_state, fastmap) if (table[ch] < 0) fastmap[ch] = 1; } -#else -# ifdef RE_ENABLE_I18N +# else if (MB_CUR_MAX > 1) for (i = 0; i < SBC_MAX; ++i) if (__btowc (i) == WEOF) fastmap[i] = 1; -# endif /* RE_ENABLE_I18N */ -#endif /* not _LIBC */ +# endif /* not _LIBC */ } for (i = 0; i < cset->nmbchars; ++i) { @@ -380,8 +402,12 @@ re_compile_fastmap_iter (bufp, init_state, fastmap) fastmap[buf[0]] = 1; } } - else if (type == END_OF_RE || type == COMPLEX_BRACKET - || type == OP_PERIOD) +#endif /* RE_ENABLE_I18N */ + else if (type == END_OF_RE || type == OP_PERIOD +#ifdef RE_ENABLE_I18N + || type == COMPLEX_BRACKET +#endif /* RE_ENABLE_I18N */ + ) { memset (fastmap, '\1', sizeof (char) * SBC_MAX); if (type == END_OF_RE) @@ -550,9 +576,12 @@ regfree (preg) for (i = 0; i < dfa->nodes_len; ++i) { re_token_t *node = dfa->nodes + i; +#ifdef RE_ENABLE_I18N if (node->type == COMPLEX_BRACKET && node->duplicated == 0) free_charset (node->opr.mbcset); - else if (node->type == SIMPLE_BRACKET && node->duplicated == 0) + else +#endif /* RE_ENABLE_I18N */ + if (node->type == SIMPLE_BRACKET && node->duplicated == 0) re_free (node->opr.sbcset); else if (node->type == OP_CONTEXT_NODE) { @@ -993,7 +1022,9 @@ calc_first (dfa, node) case OP_PERIOD: case OP_DUP_ASTERISK: case OP_DUP_QUESTION: +#ifdef RE_ENABLE_I18N case COMPLEX_BRACKET: +#endif /* RE_ENABLE_I18N */ case SIMPLE_BRACKET: case OP_BACK_REF: case ANCHOR: @@ -2233,10 +2264,14 @@ parse_dup_op (dup_elem, regexp, dfa, token, syntax, err) update it. */ static reg_errcode_t -build_range_exp (mbcset, sbcset, range_alloc, start_elem, end_elem) +# ifdef RE_ENABLE_I18N +build_range_exp (sbcset, mbcset, range_alloc, start_elem, end_elem) re_charset_t *mbcset; - re_bitset_ptr_t sbcset; int *range_alloc; +# else /* not RE_ENABLE_I18N */ +build_range_exp (sbcset, start_elem, end_elem) +# endif /* not RE_ENABLE_I18N */ + re_bitset_ptr_t sbcset; bracket_elem_t *start_elem, *end_elem; { unsigned int start_ch, end_ch; @@ -2338,10 +2373,14 @@ build_range_exp (mbcset, sbcset, range_alloc, start_elem, end_elem) pointer argument since we may update it. */ static reg_errcode_t -build_collating_symbol (mbcset, sbcset, coll_sym_alloc, name) +# ifdef RE_ENABLE_I18N +build_collating_symbol (sbcset, mbcset, coll_sym_alloc, name) re_charset_t *mbcset; - re_bitset_ptr_t sbcset; int *coll_sym_alloc; +# else /* not RE_ENABLE_I18N */ +build_collating_symbol (sbcset, name) +# endif /* not RE_ENABLE_I18N */ + re_bitset_ptr_t sbcset; unsigned char *name; { if (BE (strlen (name) != 1, 0)) @@ -2476,16 +2515,21 @@ parse_bracket_exp (regexp, dfa, token, syntax, err) update it. */ static inline reg_errcode_t - build_range_exp (mbcset, sbcset, range_alloc, start_elem, end_elem) +# ifdef RE_ENABLE_I18N + build_range_exp (sbcset, mbcset, range_alloc, start_elem, end_elem) re_charset_t *mbcset; - re_bitset_ptr_t sbcset; int *range_alloc; +# else /* not RE_ENABLE_I18N */ + build_range_exp (sbcset, start_elem, end_elem) +# endif /* not RE_ENABLE_I18N */ + re_bitset_ptr_t sbcset; bracket_elem_t *start_elem, *end_elem; { unsigned int ch; uint32_t start_collseq; uint32_t end_collseq; +# ifdef RE_ENABLE_I18N /* Check the space of the arrays. */ if (*range_alloc == mbcset->nranges) { @@ -2510,6 +2554,7 @@ parse_bracket_exp (regexp, dfa, token, syntax, err) mbcset->range_ends = new_array_end; *range_alloc = new_nranges; } +# endif /* RE_ENABLE_I18N */ /* Equivalence Classes and Character Classes can't be a range start/end. */ @@ -2526,9 +2571,11 @@ parse_bracket_exp (regexp, dfa, token, syntax, err) if (BE ((syntax & RE_NO_EMPTY_RANGES) && start_collseq > end_collseq, 0)) return REG_ERANGE; +# ifdef RE_ENABLE_I18N /* Got valid collation sequence values, add them as a new entry. */ mbcset->range_starts[mbcset->nranges] = start_collseq; mbcset->range_ends[mbcset->nranges++] = end_collseq; +# endif /* RE_ENABLE_I18N */ /* Build the table for single byte characters. */ for (ch = 0; ch <= SBC_MAX; ch++) @@ -2554,10 +2601,14 @@ parse_bracket_exp (regexp, dfa, token, syntax, err) pointer argument sinse we may update it. */ static inline reg_errcode_t - build_collating_symbol (mbcset, sbcset, coll_sym_alloc, name) +# ifdef RE_ENABLE_I18N + build_collating_symbol (sbcset, mbcset, coll_sym_alloc, name) re_charset_t *mbcset; - re_bitset_ptr_t sbcset; int *coll_sym_alloc; +# else /* not RE_ENABLE_I18N */ + build_collating_symbol (sbcset, name) +# endif /* not RE_ENABLE_I18N */ + re_bitset_ptr_t sbcset; unsigned char *name; { int32_t elem, idx; @@ -2581,6 +2632,7 @@ parse_bracket_exp (regexp, dfa, token, syntax, err) else return REG_ECOLLATE; +# ifdef RE_ENABLE_I18N /* Got valid collation sequence, add it as a new entry. */ /* Check the space of the arrays. */ if (*coll_sym_alloc == mbcset->ncoll_syms) @@ -2596,6 +2648,7 @@ parse_bracket_exp (regexp, dfa, token, syntax, err) return REG_ESPACE; } mbcset->coll_syms[mbcset->ncoll_syms++] = idx; +# endif /* RE_ENABLE_I18N */ return REG_NOERROR; } else @@ -2613,11 +2666,15 @@ parse_bracket_exp (regexp, dfa, token, syntax, err) re_token_t br_token; re_bitset_ptr_t sbcset; +#ifdef RE_ENABLE_I18N re_charset_t *mbcset; - bin_tree_t *work_tree; - int token_len, new_idx; int coll_sym_alloc = 0, range_alloc = 0, mbchar_alloc = 0; int equiv_class_alloc = 0, char_class_alloc = 0; +#else /* not RE_ENABLE_I18N */ + int non_match = 0; +#endif /* not RE_ENABLE_I18N */ + bin_tree_t *work_tree; + int token_len, new_idx; #ifdef _LIBC collseqmb = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB); nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); @@ -2635,8 +2692,14 @@ parse_bracket_exp (regexp, dfa, token, syntax, err) } #endif sbcset = (re_bitset_ptr_t) calloc (sizeof (unsigned int), BITSET_UINTS); +#ifdef RE_ENABLE_I18N mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1); +#endif /* RE_ENABLE_I18N */ +#ifdef RE_ENABLE_I18N if (BE (sbcset == NULL || mbcset == NULL, 0)) +#else + if (BE (sbcset == NULL, 0)) +#endif /* RE_ENABLE_I18N */ { *err = REG_ESPACE; return NULL; @@ -2650,8 +2713,12 @@ parse_bracket_exp (regexp, dfa, token, syntax, err) } if (token->type == OP_NON_MATCH_LIST) { +#ifdef RE_ENABLE_I18N int i; mbcset->non_match = 1; +#else /* not RE_ENABLE_I18N */ + non_match = 1; +#endif /* not RE_ENABLE_I18N */ if (syntax & RE_HAT_LISTS_NOT_NEWLINE) bitset_set (sbcset, '\0'); re_string_skip_bytes (regexp, token_len); /* Skip a token. */ @@ -2661,10 +2728,12 @@ parse_bracket_exp (regexp, dfa, token, syntax, err) *err = REG_BADPAT; goto parse_bracket_exp_free_return; } +#ifdef RE_ENABLE_I18N if (MB_CUR_MAX > 1) for (i = 0; i < SBC_MAX; ++i) if (__btowc (i) == WEOF) bitset_set (sbcset, i); +#endif /* RE_ENABLE_I18N */ } /* We treat the first ']' as a normal character. */ @@ -2731,8 +2800,11 @@ parse_bracket_exp (regexp, dfa, token, syntax, err) *err = REG_BADPAT; goto parse_bracket_exp_free_return; } - *err = build_range_exp (mbcset, sbcset, &range_alloc, &start_elem, - &end_elem); + *err = build_range_exp (sbcset, +#ifdef RE_ENABLE_I18N + mbcset, &range_alloc, +#endif /* RE_ENABLE_I18N */ + &start_elem, &end_elem); if (BE (*err != REG_NOERROR, 0)) goto parse_bracket_exp_free_return; } @@ -2743,6 +2815,7 @@ parse_bracket_exp (regexp, dfa, token, syntax, err) case SB_CHAR: bitset_set (sbcset, start_elem.opr.ch); break; +#ifdef RE_ENABLE_I18N case MB_CHAR: /* Check whether the array has enough space. */ if (mbchar_alloc == mbcset->nmbchars) @@ -2758,20 +2831,30 @@ parse_bracket_exp (regexp, dfa, token, syntax, err) } mbcset->mbchars[mbcset->nmbchars++] = start_elem.opr.wch; break; +#endif /* RE_ENABLE_I18N */ case EQUIV_CLASS: - *err = build_equiv_class (mbcset, sbcset, &equiv_class_alloc, + *err = build_equiv_class (sbcset, +#ifdef RE_ENABLE_I18N + mbcset, &equiv_class_alloc, +#endif /* RE_ENABLE_I18N */ start_elem.opr.name); if (BE (*err != REG_NOERROR, 0)) goto parse_bracket_exp_free_return; break; case COLL_SYM: - *err = build_collating_symbol (mbcset, sbcset, &coll_sym_alloc, + *err = build_collating_symbol (sbcset, +#ifdef RE_ENABLE_I18N + mbcset, &coll_sym_alloc, +#endif /* RE_ENABLE_I18N */ start_elem.opr.name); if (BE (*err != REG_NOERROR, 0)) goto parse_bracket_exp_free_return; break; case CHAR_CLASS: - ret = build_charclass (mbcset, sbcset, &char_class_alloc, + ret = build_charclass (sbcset, +#ifdef RE_ENABLE_I18N + mbcset, &char_class_alloc, +#endif /* RE_ENABLE_I18N */ start_elem.opr.name, syntax); if (BE (ret != REG_NOERROR, 0)) goto parse_bracket_exp_espace; @@ -2788,7 +2871,11 @@ parse_bracket_exp (regexp, dfa, token, syntax, err) re_string_skip_bytes (regexp, token_len); /* Skip a token. */ /* If it is non-matching list. */ +#ifdef RE_ENABLE_I18N if (mbcset->non_match) +#else /* not RE_ENABLE_I18N */ + if (non_match) +#endif /* not RE_ENABLE_I18N */ bitset_not (sbcset); /* Build a tree for simple bracket. */ @@ -2799,6 +2886,7 @@ parse_bracket_exp (regexp, dfa, token, syntax, err) if (BE (new_idx == -1 || work_tree == NULL, 0)) goto parse_bracket_exp_espace; +#ifdef RE_ENABLE_I18N if (mbcset->nmbchars || mbcset->ncoll_syms || mbcset->nequiv_classes || mbcset->nranges || (MB_CUR_MAX > 1 && (mbcset->nchar_classes || mbcset->non_match))) @@ -2825,12 +2913,17 @@ parse_bracket_exp (regexp, dfa, token, syntax, err) free_charset (mbcset); return work_tree; } +#else /* not RE_ENABLE_I18N */ + return work_tree; +#endif /* not RE_ENABLE_I18N */ parse_bracket_exp_espace: *err = REG_ESPACE; parse_bracket_exp_free_return: re_free (sbcset); +#ifdef RE_ENABLE_I18N free_charset (mbcset); +#endif /* RE_ENABLE_I18N */ return NULL; } @@ -2915,13 +3008,17 @@ parse_bracket_symbol (elem, regexp, token) is a pointer argument sinse we may update it. */ static reg_errcode_t -build_equiv_class (mbcset, sbcset, equiv_class_alloc, name) +#ifdef RE_ENABLE_I18N +build_equiv_class (sbcset, mbcset, equiv_class_alloc, name) re_charset_t *mbcset; - re_bitset_ptr_t sbcset; int *equiv_class_alloc; +#else /* not RE_ENABLE_I18N */ +build_equiv_class (sbcset, name) +#endif /* not RE_ENABLE_I18N */ + re_bitset_ptr_t sbcset; const unsigned char *name; { -#ifdef _LIBC +#if defined _LIBC && defined RE_ENABLE_I18N uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); if (nrules != 0) { @@ -2987,7 +3084,7 @@ build_equiv_class (mbcset, sbcset, equiv_class_alloc, name) mbcset->equiv_classes[mbcset->nequiv_classes++] = idx1; } else -#endif +#endif /* _LIBC && RE_ENABLE_I18N */ { if (BE (strlen (name) != 1, 0)) return REG_ECOLLATE; @@ -3003,15 +3100,27 @@ build_equiv_class (mbcset, sbcset, equiv_class_alloc, name) is a pointer argument sinse we may update it. */ static reg_errcode_t -build_charclass (mbcset, sbcset, char_class_alloc, class_name, syntax) +#ifdef RE_ENABLE_I18N +build_charclass (sbcset, mbcset, char_class_alloc, class_name, syntax) re_charset_t *mbcset; - re_bitset_ptr_t sbcset; int *char_class_alloc; +#else /* not RE_ENABLE_I18N */ +build_charclass (sbcset, class_name, syntax) +#endif /* not RE_ENABLE_I18N */ + re_bitset_ptr_t sbcset; const unsigned char *class_name; reg_syntax_t syntax; { int i; const unsigned char *name = class_name; + + /* In case of REG_ICASE "upper" and "lower" match the both of + upper and lower cases. */ + if ((syntax & RE_ICASE) + && (strcmp (name, "upper") == 0 || strcmp (name, "lower") == 0)) + name = "alpha"; + +#ifdef RE_ENABLE_I18N /* Check the space of the arrays. */ if (*char_class_alloc == mbcset->nchar_classes) { @@ -3024,14 +3133,8 @@ build_charclass (mbcset, sbcset, char_class_alloc, class_name, syntax) if (BE (mbcset->char_classes == NULL, 0)) return REG_ESPACE; } - - /* In case of REG_ICASE "upper" and "lower" match the both of - upper and lower cases. */ - if ((syntax & RE_ICASE) - && (strcmp (name, "upper") == 0 || strcmp (name, "lower") == 0)) - name = "alpha"; - mbcset->char_classes[mbcset->nchar_classes++] = __wctype (name); +#endif /* RE_ENABLE_I18N */ #define BUILD_CHARCLASS_LOOP(ctype_func)\ for (i = 0; i < SBC_MAX; ++i) \ @@ -3077,15 +3180,27 @@ build_word_op (dfa, not, err) reg_errcode_t *err; { re_bitset_ptr_t sbcset; +#ifdef RE_ENABLE_I18N re_charset_t *mbcset; + int alloc = 0; +#else /* not RE_ENABLE_I18N */ + int non_match = 0; +#endif /* not RE_ENABLE_I18N */ reg_errcode_t ret; re_token_t br_token; bin_tree_t *tree; - int new_idx, alloc = 0; + int new_idx; sbcset = (re_bitset_ptr_t) calloc (sizeof (unsigned int), BITSET_UINTS); +#ifdef RE_ENABLE_I18N mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1); +#endif /* RE_ENABLE_I18N */ + +#ifdef RE_ENABLE_I18N if (BE (sbcset == NULL || mbcset == NULL, 0)) +#else /* not RE_ENABLE_I18N */ + if (BE (sbcset == NULL, 0)) +#endif /* not RE_ENABLE_I18N */ { *err = REG_ESPACE; return NULL; @@ -3093,26 +3208,35 @@ build_word_op (dfa, not, err) if (not) { +#ifdef RE_ENABLE_I18N int i; - mbcset->non_match = 1; /* if (syntax & RE_HAT_LISTS_NOT_NEWLINE) bitset_set(cset->sbcset, '\0'); */ -#ifdef RE_ENABLE_I18N + mbcset->non_match = 1; if (MB_CUR_MAX > 1) for (i = 0; i < SBC_MAX; ++i) if (__btowc (i) == WEOF) bitset_set (sbcset, i); -#endif /* RE_ENABLE_I18N */ +#else /* not RE_ENABLE_I18N */ + non_match = 1; +#endif /* not RE_ENABLE_I18N */ } /* We don't care the syntax in this case. */ - ret = build_charclass (mbcset, sbcset, &alloc, "alpha", 0); + ret = build_charclass (sbcset, +#ifdef RE_ENABLE_I18N + mbcset, &alloc, +#endif /* RE_ENABLE_I18N */ + "alpha", 0); + if (BE (ret != REG_NOERROR, 0)) { re_free (sbcset); +#ifdef RE_ENABLE_I18N free_charset (mbcset); +#endif /* RE_ENABLE_I18N */ *err = REG_ESPACE; return NULL; } @@ -3120,7 +3244,11 @@ build_word_op (dfa, not, err) bitset_set (sbcset, '_'); /* If it is non-matching list. */ +#ifdef RE_ENABLE_I18N if (mbcset->non_match) +#else /* not RE_ENABLE_I18N */ + if (non_match) +#endif /* not RE_ENABLE_I18N */ bitset_not (sbcset); /* Build a tree for simple bracket. */ @@ -3131,6 +3259,7 @@ build_word_op (dfa, not, err) if (BE (new_idx == -1 || tree == NULL, 0)) goto build_word_op_espace; +#ifdef RE_ENABLE_I18N if (MB_CUR_MAX > 1) { re_token_t alt_token; @@ -3155,9 +3284,15 @@ build_word_op (dfa, not, err) free_charset (mbcset); return tree; } +#else /* not RE_ENABLE_I18N */ + return tree; +#endif /* not RE_ENABLE_I18N */ + build_word_op_espace: re_free (sbcset); +#ifdef RE_ENABLE_I18N free_charset (mbcset); +#endif /* RE_ENABLE_I18N */ *err = REG_ESPACE; return NULL; } @@ -3190,19 +3325,21 @@ fetch_number (input, token, syntax) return num; } +#ifdef RE_ENABLE_I18N static void free_charset (re_charset_t *cset) { re_free (cset->mbchars); -#ifdef _LIBC +# ifdef _LIBC re_free (cset->coll_syms); re_free (cset->equiv_classes); re_free (cset->range_starts); re_free (cset->range_ends); -#endif +# endif re_free (cset->char_classes); re_free (cset); } +#endif /* RE_ENABLE_I18N */ /* Functions for binary tree operation. */ diff --git a/posix/regex.c b/posix/regex.c index 0fe38d3..1d5dd0f 100644 --- a/posix/regex.c +++ b/posix/regex.c @@ -45,6 +45,7 @@ # define BE(expr, val) __builtin_expect (expr, val) #else # define BE(expr, val) (expr) +# define inline #endif #include "regcomp.c" diff --git a/posix/regex.h b/posix/regex.h index d1e4b68..fac441d 100644 --- a/posix/regex.h +++ b/posix/regex.h @@ -189,7 +189,8 @@ extern reg_syntax_t re_syntax_options; #define RE_SYNTAX_GNU_AWK \ ((RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DEBUG) \ - & ~(RE_DOT_NOT_NULL | RE_INTERVALS | RE_CONTEXT_INDEP_OPS)) + & ~(RE_DOT_NOT_NULL | RE_INTERVALS | RE_CONTEXT_INDEP_OPS \ + | RE_CONTEXT_INVALID_OPS )) #define RE_SYNTAX_POSIX_AWK \ (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS \ diff --git a/posix/regex_internal.c b/posix/regex_internal.c index 5327c26..2809c45 100644 --- a/posix/regex_internal.c +++ b/posix/regex_internal.c @@ -169,7 +169,7 @@ re_string_realloc_buffers (pstr, new_buf_len) #ifdef RE_ENABLE_I18N if (MB_CUR_MAX > 1) { - pstr->wcs = re_realloc (pstr->wcs, wchar_t, new_buf_len); + pstr->wcs = re_realloc (pstr->wcs, wint_t, new_buf_len); if (BE (pstr->wcs == NULL, 0)) return REG_ESPACE; } @@ -436,7 +436,7 @@ re_string_reconstruct (pstr, idx, eflags, newline) #ifdef RE_ENABLE_I18N if (MB_CUR_MAX > 1) memmove (pstr->wcs, pstr->wcs + offset, - (pstr->valid_len - offset) * sizeof (wchar_t)); + (pstr->valid_len - offset) * sizeof (wint_t)); #endif /* RE_ENABLE_I18N */ if (MBS_ALLOCATED (pstr)) memmove (pstr->mbs, pstr->mbs + offset, @@ -1166,9 +1166,11 @@ create_ci_newstate (dfa, nodes, hash) /* If the state has the halt node, the state is a halt state. */ else if (type == END_OF_RE) newstate->halt = 1; +#ifdef RE_ENABLE_I18N else if (type == COMPLEX_BRACKET || (type == OP_PERIOD && MB_CUR_MAX > 1)) newstate->accept_mb = 1; +#endif /* RE_ENABLE_I18N */ else if (type == OP_BACK_REF) newstate->has_backref = 1; else if (type == ANCHOR || OP_CONTEXT_NODE) @@ -1213,9 +1215,11 @@ create_cd_newstate (dfa, nodes, context, hash) /* If the state has the halt node, the state is a halt state. */ else if (type == END_OF_RE) newstate->halt = 1; +#ifdef RE_ENABLE_I18N else if (type == COMPLEX_BRACKET || (type == OP_PERIOD && MB_CUR_MAX > 1)) newstate->accept_mb = 1; +#endif /* RE_ENABLE_I18N */ else if (type == OP_BACK_REF) newstate->has_backref = 1; else if (type == ANCHOR) @@ -1228,9 +1232,11 @@ create_cd_newstate (dfa, nodes, context, hash) newstate->halt = 1; else if (ctype == OP_BACK_REF) newstate->has_backref = 1; +#ifdef RE_ENABLE_I18N else if (ctype == COMPLEX_BRACKET || (type == OP_PERIOD && MB_CUR_MAX > 1)) newstate->accept_mb = 1; +#endif /* RE_ENABLE_I18N */ } if (constraint) diff --git a/posix/regex_internal.h b/posix/regex_internal.h index 75cc815..95ae46e 100644 --- a/posix/regex_internal.h +++ b/posix/regex_internal.h @@ -119,7 +119,9 @@ typedef enum ALT, SUBEXP, SIMPLE_BRACKET, +#ifdef RE_ENABLE_I18N COMPLEX_BRACKET, +#endif /* RE_ENABLE_I18N */ /* Node type, These are used by token, node, tree. */ OP_PERIOD, @@ -137,6 +139,7 @@ typedef enum END_OF_RE_TOKEN_T } re_token_type_t; +#ifdef RE_ENABLE_I18N typedef struct { /* If this character set is the non-matching list. */ @@ -147,31 +150,32 @@ typedef struct int nmbchars; /* Collating symbols. */ -#ifdef _LIBC +# ifdef _LIBC int32_t *coll_syms; -#endif +# endif int ncoll_syms; /* Equivalence classes. */ -#ifdef _LIBC +# ifdef _LIBC int32_t *equiv_classes; -#endif +# endif int nequiv_classes; /* Range expressions. */ -#ifdef _LIBC +# ifdef _LIBC uint32_t *range_starts; uint32_t *range_ends; -#else /* not _LIBC */ +# else /* not _LIBC */ wchar_t *range_starts; wchar_t *range_ends; -#endif /* not _LIBC */ +# endif /* not _LIBC */ int nranges; /* Character classes. */ wctype_t *char_classes; int nchar_classes; } re_charset_t; +#endif /* RE_ENABLE_I18N */ typedef struct { @@ -180,7 +184,9 @@ typedef struct { unsigned char c; /* for CHARACTER */ re_bitset_ptr_t sbcset; /* for SIMPLE_BRACKET */ +#ifdef RE_ENABLE_I18N re_charset_t *mbcset; /* for COMPLEX_BRACKET */ +#endif /* RE_ENABLE_I18N */ int idx; /* for BACK_REF */ re_context_type ctx_type; /* for ANCHOR */ struct @@ -221,7 +227,7 @@ struct re_string_t unsigned char *mbs_case; #ifdef RE_ENABLE_I18N /* Store the wide character string which is corresponding to MBS. */ - wchar_t *wcs; + wint_t *wcs; mbstate_t cur_state; #endif /* The length of the valid characters in the buffers. */ -- cgit v1.1