diff options
Diffstat (limited to 'posix/regex.c')
-rw-r--r-- | posix/regex.c | 96 |
1 files changed, 83 insertions, 13 deletions
diff --git a/posix/regex.c b/posix/regex.c index b7c82f6..fc4db38 100644 --- a/posix/regex.c +++ b/posix/regex.c @@ -1,6 +1,6 @@ /* Extended regular expression matching and search library, version 0.12. - (Implements POSIX draft P10003.2/D11.2, except for + (Implements POSIX draft P1003.2/D11.2, except for some of the internationalization features.) Copyright (C) 1993, 1994, 1995, 1996, 1997 Free Software Foundation, Inc. @@ -42,6 +42,13 @@ #include <sys/types.h> #endif +/* For platform which support the ISO C amendement 1 functionality we + support user defined character classes. */ +#if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H) +# include <wctype.h> +# include <wchar.h> +#endif + /* This is for other GNU distributions with internationalized messages. */ #if HAVE_LIBINTL_H || defined (_LIBC) # include <libintl.h> @@ -946,6 +953,12 @@ re_set_syntax (syntax) reg_syntax_t ret = re_syntax_options; re_syntax_options = syntax; +#ifdef DEBUG + if (syntax & RE_DEBUG) + debug = 1; + else if (debug) /* was on but now is not */ + debug = 0; +#endif /* DEBUG */ return ret; } @@ -1026,22 +1039,24 @@ static const char *re_error_msgid[] = #endif /* Roughly the maximum number of failure points on the stack. Would be - exactly that if always used MAX_FAILURE_SPACE each time we failed. + exactly that if always used MAX_FAILURE_ITEMS items each time we failed. This is a variable only so users of regex can assign to it; we never change it ourselves. */ #ifdef INT_IS_16BIT #if defined (MATCH_MAY_ALLOCATE) -long re_max_failures = 4000; +/* 4400 was enough to cause a crash on Alpha OSF/1, + whose default stack limit is 2mb. */ +long int re_max_failures = 4000; #else -long re_max_failures = 2000; +long int re_max_failures = 2000; #endif union fail_stack_elt { unsigned char *pointer; - long integer; + long int integer; }; typedef union fail_stack_elt fail_stack_elt_t; @@ -1049,8 +1064,8 @@ typedef union fail_stack_elt fail_stack_elt_t; typedef struct { fail_stack_elt_t *stack; - unsigned long size; - unsigned long avail; /* Offset of next open position. */ + unsigned long int size; + unsigned long int avail; /* Offset of next open position. */ } fail_stack_type; #else /* not INT_IS_16BIT */ @@ -1058,7 +1073,7 @@ typedef struct #if defined (MATCH_MAY_ALLOCATE) /* 4400 was enough to cause a crash on Alpha OSF/1, whose default stack limit is 2mb. */ -int re_max_failures = 4000; +int re_max_failures = 20000; #else int re_max_failures = 2000; #endif @@ -1661,15 +1676,29 @@ typedef struct } \ } -#define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */ +#if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H) +/* The GNU C library provides support for user-defined character classes + and the functions from ISO C amendement 1. */ +# ifdef CHARCLASS_NAME_MAX +# define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX +# else +/* This shouldn't happen but some implementation might still have this + problem. Use a reasonable default value. */ +# define CHAR_CLASS_MAX_LENGTH 256 +# endif + +# define IS_CHAR_CLASS(string) wctype (string) +#else +# define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */ -#define IS_CHAR_CLASS(string) \ +# define IS_CHAR_CLASS(string) \ (STREQ (string, "alpha") || STREQ (string, "upper") \ || STREQ (string, "lower") || STREQ (string, "digit") \ || STREQ (string, "alnum") || STREQ (string, "xdigit") \ || STREQ (string, "space") || STREQ (string, "print") \ || STREQ (string, "punct") || STREQ (string, "graph") \ || STREQ (string, "cntrl") || STREQ (string, "blank")) +#endif #ifndef MATCH_MAY_ALLOCATE @@ -2147,6 +2176,34 @@ regex_compile (pattern, size, syntax, bufp) the leading `:' and `[' (but set bits for them). */ if (c == ':' && *p == ']') { +#if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H) + boolean is_lower = STREQ (str, "lower"); + boolean is_upper = STREQ (str, "upper"); + wctype_t wt; + int ch; + + wt = wctype (str); + if (wt == 0) + FREE_STACK_RETURN (REG_ECTYPE); + + /* Throw away the ] at the end of the character + class. */ + PATFETCH (c); + + if (p == pend) FREE_STACK_RETURN (REG_EBRACK); + + for (ch = 0; ch < 1 << BYTEWIDTH; ++ch) + { + if (iswctype (btowc (ch), wt)) + SET_LIST_BIT (ch); + + if (translate && (is_upper || is_lower) + && (ISUPPER (ch) || ISLOWER (ch))) + SET_LIST_BIT (ch); + } + + had_char_class = true; +#else int ch; boolean is_alnum = STREQ (str, "alnum"); boolean is_alpha = STREQ (str, "alpha"); @@ -2194,6 +2251,7 @@ regex_compile (pattern, size, syntax, bufp) SET_LIST_BIT (ch); } had_char_class = true; +#endif /* libc || wctype.h */ } else { @@ -3551,12 +3609,14 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop) : (d) == string2 - 1 ? *(end1 - 1) : *(d)) \ == Sword) +/* Disabled due to a compiler bug -- see comment at case wordbound */ +#if 0 /* Test if the character before D and the one at D differ with respect to being word-constituent. */ #define AT_WORD_BOUNDARY(d) \ (AT_STRINGS_BEG (d) || AT_STRINGS_END (d) \ || WORDCHAR_P (d - 1) != WORDCHAR_P (d)) - +#endif /* Free everything we malloc. */ #ifdef MATCH_MAY_ALLOCATE @@ -4725,6 +4785,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) dummy_low_reg, dummy_high_reg, reg_dummy, reg_dummy, reg_info_dummy); } + /* Note fall through. */ unconditional_jump: #ifdef _LIBC @@ -5355,7 +5416,13 @@ re_compile_pattern (pattern, length, bufp) /* BSD has one and only one pattern buffer. */ static struct re_pattern_buffer re_comp_buf; -char * weak_function +char * +#ifdef _LIBC +/* Make these definitions weak in libc, so POSIX programs can redefine + these names if they don't use our functions, and still use + regcomp/regexec below without link errors. */ +weak_function +#endif re_comp (s) const char *s; { @@ -5396,7 +5463,10 @@ re_comp (s) } -int weak_function +int +#ifdef _LIBC +weak_function +#endif re_exec (s) const char *s; { |