From 51702635af8445431054291c86aaace7c30b0970 Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Wed, 7 May 1997 15:31:21 +0000 Subject: Update. * locale/categories.def: Change type of _NL_CTYPE_CLASS_NAMES and _NL_CTYPE_MAP_NAMES field to stringlist. Change name of _NL_CTYPE_CODESET_NAME to "charmap". * locale/localeinfo.h (enum value_type): Add stringlist. * locale/programs/locale.c (show_info): Handle stringlist. * locale/programs/charmap.c (charmap_read): If charmap file is not using the given name try to find it by looking through all available charmap files and compare the code set name. * locale/programs/locale.c (write_charmaps): Also print names of charset in fields in the files. * elf/ldd.bash.in: Correct translatable strings. * posix/TESTS: Add some more tests for character class matching. * posix/regex.c: Merge with GNU awk version. (regex_compile): Use ISO C/amend 1 functions for character class handling. * posix/regex.h: Merge with GNU awk version. * posix/getopt.c: Declare as master copies. * posix/getopt1.c: Likewise. * posix/getopt.h: Likewise. * sysdeps/unix/sysv/linux/sys/mount.h: Add definitions for option value to mount functions. Patch by a sun . * stdio-common/bug4.c (main): Use /tmp/bug4.test for concurrency with other tests. * sunrpc/svc_run.c (svc_exit): New, defined. (svc_run): Test for svc_stop variable. * sysdeps/m68k/s_cexp.c: Rewritten. * sysdeps/m68k/s_cexpf.c: Likewise. * sysdeps/m68k/s_cexpl.c: Likewise. --- posix/regex.c | 96 +++++++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 83 insertions(+), 13 deletions(-) (limited to 'posix/regex.c') diff --git a/posix/regex.c b/posix/regex.c index b7c82f6..fc4db38 100644 --- a/posix/regex.c +++ b/posix/regex.c @@ -1,6 +1,6 @@ /* Extended regular expression matching and search library, version 0.12. - (Implements POSIX draft P10003.2/D11.2, except for + (Implements POSIX draft P1003.2/D11.2, except for some of the internationalization features.) Copyright (C) 1993, 1994, 1995, 1996, 1997 Free Software Foundation, Inc. @@ -42,6 +42,13 @@ #include #endif +/* For platform which support the ISO C amendement 1 functionality we + support user defined character classes. */ +#if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H) +# include +# include +#endif + /* This is for other GNU distributions with internationalized messages. */ #if HAVE_LIBINTL_H || defined (_LIBC) # include @@ -946,6 +953,12 @@ re_set_syntax (syntax) reg_syntax_t ret = re_syntax_options; re_syntax_options = syntax; +#ifdef DEBUG + if (syntax & RE_DEBUG) + debug = 1; + else if (debug) /* was on but now is not */ + debug = 0; +#endif /* DEBUG */ return ret; } @@ -1026,22 +1039,24 @@ static const char *re_error_msgid[] = #endif /* Roughly the maximum number of failure points on the stack. Would be - exactly that if always used MAX_FAILURE_SPACE each time we failed. + exactly that if always used MAX_FAILURE_ITEMS items each time we failed. This is a variable only so users of regex can assign to it; we never change it ourselves. */ #ifdef INT_IS_16BIT #if defined (MATCH_MAY_ALLOCATE) -long re_max_failures = 4000; +/* 4400 was enough to cause a crash on Alpha OSF/1, + whose default stack limit is 2mb. */ +long int re_max_failures = 4000; #else -long re_max_failures = 2000; +long int re_max_failures = 2000; #endif union fail_stack_elt { unsigned char *pointer; - long integer; + long int integer; }; typedef union fail_stack_elt fail_stack_elt_t; @@ -1049,8 +1064,8 @@ typedef union fail_stack_elt fail_stack_elt_t; typedef struct { fail_stack_elt_t *stack; - unsigned long size; - unsigned long avail; /* Offset of next open position. */ + unsigned long int size; + unsigned long int avail; /* Offset of next open position. */ } fail_stack_type; #else /* not INT_IS_16BIT */ @@ -1058,7 +1073,7 @@ typedef struct #if defined (MATCH_MAY_ALLOCATE) /* 4400 was enough to cause a crash on Alpha OSF/1, whose default stack limit is 2mb. */ -int re_max_failures = 4000; +int re_max_failures = 20000; #else int re_max_failures = 2000; #endif @@ -1661,15 +1676,29 @@ typedef struct } \ } -#define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */ +#if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H) +/* The GNU C library provides support for user-defined character classes + and the functions from ISO C amendement 1. */ +# ifdef CHARCLASS_NAME_MAX +# define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX +# else +/* This shouldn't happen but some implementation might still have this + problem. Use a reasonable default value. */ +# define CHAR_CLASS_MAX_LENGTH 256 +# endif + +# define IS_CHAR_CLASS(string) wctype (string) +#else +# define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */ -#define IS_CHAR_CLASS(string) \ +# define IS_CHAR_CLASS(string) \ (STREQ (string, "alpha") || STREQ (string, "upper") \ || STREQ (string, "lower") || STREQ (string, "digit") \ || STREQ (string, "alnum") || STREQ (string, "xdigit") \ || STREQ (string, "space") || STREQ (string, "print") \ || STREQ (string, "punct") || STREQ (string, "graph") \ || STREQ (string, "cntrl") || STREQ (string, "blank")) +#endif #ifndef MATCH_MAY_ALLOCATE @@ -2147,6 +2176,34 @@ regex_compile (pattern, size, syntax, bufp) the leading `:' and `[' (but set bits for them). */ if (c == ':' && *p == ']') { +#if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H) + boolean is_lower = STREQ (str, "lower"); + boolean is_upper = STREQ (str, "upper"); + wctype_t wt; + int ch; + + wt = wctype (str); + if (wt == 0) + FREE_STACK_RETURN (REG_ECTYPE); + + /* Throw away the ] at the end of the character + class. */ + PATFETCH (c); + + if (p == pend) FREE_STACK_RETURN (REG_EBRACK); + + for (ch = 0; ch < 1 << BYTEWIDTH; ++ch) + { + if (iswctype (btowc (ch), wt)) + SET_LIST_BIT (ch); + + if (translate && (is_upper || is_lower) + && (ISUPPER (ch) || ISLOWER (ch))) + SET_LIST_BIT (ch); + } + + had_char_class = true; +#else int ch; boolean is_alnum = STREQ (str, "alnum"); boolean is_alpha = STREQ (str, "alpha"); @@ -2194,6 +2251,7 @@ regex_compile (pattern, size, syntax, bufp) SET_LIST_BIT (ch); } had_char_class = true; +#endif /* libc || wctype.h */ } else { @@ -3551,12 +3609,14 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop) : (d) == string2 - 1 ? *(end1 - 1) : *(d)) \ == Sword) +/* Disabled due to a compiler bug -- see comment at case wordbound */ +#if 0 /* Test if the character before D and the one at D differ with respect to being word-constituent. */ #define AT_WORD_BOUNDARY(d) \ (AT_STRINGS_BEG (d) || AT_STRINGS_END (d) \ || WORDCHAR_P (d - 1) != WORDCHAR_P (d)) - +#endif /* Free everything we malloc. */ #ifdef MATCH_MAY_ALLOCATE @@ -4725,6 +4785,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) dummy_low_reg, dummy_high_reg, reg_dummy, reg_dummy, reg_info_dummy); } + /* Note fall through. */ unconditional_jump: #ifdef _LIBC @@ -5355,7 +5416,13 @@ re_compile_pattern (pattern, length, bufp) /* BSD has one and only one pattern buffer. */ static struct re_pattern_buffer re_comp_buf; -char * weak_function +char * +#ifdef _LIBC +/* Make these definitions weak in libc, so POSIX programs can redefine + these names if they don't use our functions, and still use + regcomp/regexec below without link errors. */ +weak_function +#endif re_comp (s) const char *s; { @@ -5396,7 +5463,10 @@ re_comp (s) } -int weak_function +int +#ifdef _LIBC +weak_function +#endif re_exec (s) const char *s; { -- cgit v1.1